import numpy as np
import mxnet as mx
import logging
logging.getLogger().setLevel(logging.DEBUG) # logging to stdout
mxnet基本數(shù)據(jù)結(jié)構(gòu)
ndarray
ndarray是mxnet中最基本的數(shù)據(jù)結(jié)構(gòu),ndarray和mxnet的關(guān)系與tensor和pytorch的關(guān)系類似域那。該數(shù)據(jù)結(jié)構(gòu)可以看成numpy的一種變體猜煮,基本上numpy的操作ndarray都可以實現(xiàn)友瘤。與ndarray相關(guān)的部分是mxnet.nd.
,關(guān)于ndarray操作的API可查看官方API文檔
ndarray操作
a = mx.nd.random.normal(shape=(4,3))
b = mx.nd.ones((4,3))
print(a)
print(b)
print(a + b)
[[ 0.23107234 0.30030754 -0.32433936]
[ 1.04932904 0.7368623 -0.0097888 ]
[ 0.46656415 1.72023427 0.87809837]
[-1.07333779 -0.86925656 -0.26717702]]
<NDArray 4x3 @cpu(0)>
[[ 1. 1. 1.]
[ 1. 1. 1.]
[ 1. 1. 1.]
[ 1. 1. 1.]]
<NDArray 4x3 @cpu(0)>
[[ 1.23107231 1.30030751 0.67566061]
[ 2.04932904 1.7368623 0.99021119]
[ 1.46656418 2.72023439 1.87809837]
[-0.07333779 0.13074344 0.73282301]]
<NDArray 4x3 @cpu(0)>
ndarray與numpy相互轉(zhuǎn)換
-
mxnet.nd.array()
傳入一個numpy矩陣可以將其轉(zhuǎn)換為ndarray - 使用
ndarray.asnumpy()
方法將ndarray轉(zhuǎn)為numpy矩陣
a = np.random.randn(2,3)
print(a,type(a))
b = mx.nd.array(a)
print(b,type(b))
b = b.asnumpy()
print(b,type(b))
[[ 0.85512384 -0.58311797 -1.41627038]
[-0.56862628 1.15431958 0.13168715]] <class 'numpy.ndarray'>
[[ 0.85512382 -0.58311796 -1.41627038]
[-0.56862628 1.15431952 0.13168715]]
<NDArray 2x3 @cpu(0)> <class 'mxnet.ndarray.ndarray.NDArray'>
[[ 0.85512382 -0.58311796 -1.41627038]
[-0.56862628 1.15431952 0.13168715]] <class 'numpy.ndarray'>
symbol
symbol是另一個重要的概念,可以理解為符號绪妹,就像我們平時使用的代數(shù)符號x柿究,y蝇摸,z一樣。一個簡單的類比律歼,一個函數(shù)$f(x) = x^{2}$险毁,符號x就是symbol们童,而具體x的值就是ndarray慧库,關(guān)于symbol的是mxnet.sym.
完沪,具體可參照官方API文檔
基本操作
- 使用
mxnet.sym.Variable()
傳入名稱可建立一個symbol - 使用
mxnet.viz.plot_network(symbol=)
傳入symbol可以繪制運算圖
a = mx.sym.Variable('a')
b = mx.sym.Variable('b')
c = mx.sym.add_n(a,b,name="c")
mx.viz.plot_network(symbol=c)
帶入ndarray
使用mxnet.sym.bind()
方法可以獲得一個帶入操作數(shù)的對象,再使用forward()
方法可運算出數(shù)值
x = c.bind(ctx=mx.cpu(),args={"a": mx.nd.ones(5),"b":mx.nd.ones(5)})
result = x.forward()
print(result)
[
[ 2. 2. 2. 2. 2.]
<NDArray 5 @cpu(0)>]
mxnet的數(shù)據(jù)載入
深度學習中數(shù)據(jù)的載入方式非常重要熟呛,mxnet提供了mxnet.io.
的一系列dataiter用于處理數(shù)據(jù)載入尉姨,詳細可參照官方API文檔又厉。同時覆致,動態(tài)圖接口gluon也提供了mxnet.gluon.data.
系列的dataiter用于數(shù)據(jù)載入,詳細可參照官方API文檔
mxnet.io數(shù)據(jù)載入
mxnet.io
的數(shù)據(jù)載入核心是mxnet.io.DataIter
類及其派生類,例如ndarray的iter:NDArrayIter
- 參數(shù)
data=
:傳入一個(名稱-數(shù)據(jù))的數(shù)據(jù)dict - 參數(shù)
label=
:傳入一個(名稱-標簽)的標簽dict - 參數(shù)
batch_size=
:傳入batch大小
dataset = mx.io.NDArrayIter(data={'data':mx.nd.ones((10,5))},label={'label':mx.nd.arange(10)},batch_size=5)
for i in dataset:
print(i)
print(i.data,type(i.data[0]))
print(i.label,type(i.label[0]))
DataBatch: data shapes: [(5, 5)] label shapes: [(5,)]
[
[[ 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1.]]
<NDArray 5x5 @cpu(0)>] <class 'mxnet.ndarray.ndarray.NDArray'>
[
[ 0. 1. 2. 3. 4.]
<NDArray 5 @cpu(0)>] <class 'mxnet.ndarray.ndarray.NDArray'>
DataBatch: data shapes: [(5, 5)] label shapes: [(5,)]
[
[[ 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1.]]
<NDArray 5x5 @cpu(0)>] <class 'mxnet.ndarray.ndarray.NDArray'>
[
[ 5. 6. 7. 8. 9.]
<NDArray 5 @cpu(0)>] <class 'mxnet.ndarray.ndarray.NDArray'>
gluon.data數(shù)據(jù)載入
gluon的數(shù)據(jù)API幾乎與pytorch相同,均是Dataset
+DataLoader
的方式:
- Dataset:存儲數(shù)據(jù)汰蜘,使用時需要繼承該基類并重載
__len__(self)
和__getitem__(self,idx)
方法 - DataLoader:將Dataset變成能產(chǎn)生batch的可迭代對象
dataset = mx.gluon.data.ArrayDataset(mx.nd.ones((10,5)),mx.nd.arange(10))
loader = mx.gluon.data.DataLoader(dataset,batch_size=5)
for i,data in enumerate(loader):
print(i)
print(data)
0
[
[[ 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1.]]
<NDArray 5x5 @cpu(0)>,
[ 0. 1. 2. 3. 4.]
<NDArray 5 @cpu(0)>]
1
[
[[ 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1.]]
<NDArray 5x5 @cpu(0)>,
[ 5. 6. 7. 8. 9.]
<NDArray 5 @cpu(0)>]
class TestSet(mx.gluon.data.Dataset):
def __init__(self):
self.x = mx.nd.zeros((10,5))
self.y = mx.nd.arange(10)
def __getitem__(self,i):
return self.x[i],self.y[i]
def __len__(self):
return 10
for i,data in enumerate(mx.gluon.data.DataLoader(TestSet(),batch_size=5)):
print(data)
[
[[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]]
<NDArray 5x5 @cpu(0)>,
[[ 0.]
[ 1.]
[ 2.]
[ 3.]
[ 4.]]
<NDArray 5x1 @cpu(0)>]
[
[[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]]
<NDArray 5x5 @cpu(0)>,
[[ 5.]
[ 6.]
[ 7.]
[ 8.]
[ 9.]]
<NDArray 5x1 @cpu(0)>]
網(wǎng)絡(luò)搭建
mxnet網(wǎng)絡(luò)搭建
mxnet網(wǎng)絡(luò)搭建類似于TensorFlow,使用symbol搭建出網(wǎng)絡(luò)比被,再用一個module封裝
data = mx.sym.Variable('data')
# layer1
conv1 = mx.sym.Convolution(data=data, kernel=(5,5), num_filter=32,name="conv1")
relu1 = mx.sym.Activation(data=conv1,act_type="relu",name="relu1")
pool1 = mx.sym.Pooling(data=relu1,pool_type="max",kernel=(2,2),stride=(2,2),name="pool1")
# layer2
conv2 = mx.sym.Convolution(data=pool1, kernel=(3,3), num_filter=64,name="conv2")
relu2 = mx.sym.Activation(data=conv2,act_type="relu",name="relu2")
pool2 = mx.sym.Pooling(data=relu2,pool_type="max",kernel=(2,2),stride=(2,2),name="pool2")
# layer3
fc1 = mx.symbol.FullyConnected(data=mx.sym.flatten(pool2), num_hidden=256,name="fc1")
relu3 = mx.sym.Activation(data=fc1, act_type="relu",name="relu3")
# layer4
fc2 = mx.symbol.FullyConnected(data=relu3, num_hidden=10,name="fc2")
out = mx.sym.SoftmaxOutput(data=fc2, label=mx.sym.Variable("label"),name='softmax')
mxnet_model = mx.mod.Module(symbol=out,label_names=["label"],context=mx.gpu())
mx.viz.plot_network(symbol=out)
Gluon模型搭建
Gluon模型搭建與pytorch類似莱预,通過繼承一個mx.gluon.Block
或使用mx.gluon.nn.Sequential()
來實現(xiàn)
一般搭建方法
class MLP(mx.gluon.Block):
def __init__(self, **kwargs):
super(MLP, self).__init__(**kwargs)
with self.name_scope():
self.dense0 = mx.gluon.nn.Dense(256)
self.dense1 = mx.gluon.nn.Dense(64)
self.dense2 = mx.gluon.nn.Dense(10)
def forward(self, x):
x = mx.nd.relu(self.dense0(x))
x = mx.nd.relu(self.dense1(x))
x = self.dense2(x)
return x
gluon_model = MLP()
print(gluon_model)
# mx.viz.plot_network(symbol=gluon_model)
MLP(
(dense0): Dense(None -> 256, linear)
(dense2): Dense(None -> 10, linear)
(dense1): Dense(None -> 64, linear)
)
快速搭建方法
gluon_model2 = mx.gluon.nn.Sequential()
with gluon_model2.name_scope():
gluon_model2.add(mx.gluon.nn.Dense(256,activation="relu"))
gluon_model2.add(mx.gluon.nn.Dense(64,activation="relu"))
gluon_model2.add(mx.gluon.nn.Dense(10,activation="relu"))
print(gluon_model2)
Sequential(
(0): Dense(None -> 256, Activation(relu))
(1): Dense(None -> 64, Activation(relu))
(2): Dense(None -> 10, Activation(relu))
)
模型訓練
mxnet模型訓練
mxnet提供了兩套不同層次上的訓練封裝涯贞,一般使用最方便的頂層封裝fit()
即可
mnist = mx.test_utils.get_mnist()
train_iter = mx.io.NDArrayIter(mnist['train_data'], mnist['train_label'], batch_size=100, data_name='data',label_name='label',shuffle=True)
val_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size=100,data_name='data',label_name='label')
INFO:root:train-labels-idx1-ubyte.gz exists, skipping download
INFO:root:train-images-idx3-ubyte.gz exists, skipping download
INFO:root:t10k-labels-idx1-ubyte.gz exists, skipping download
INFO:root:t10k-images-idx3-ubyte.gz exists, skipping download
mxnet_model.fit(train_iter, # train data
eval_data=val_iter, # validation data
optimizer='adam', # use SGD to train
optimizer_params={'learning_rate':0.01}, # use fixed learning rate
eval_metric='acc', # report accuracy during training
batch_end_callback = mx.callback.Speedometer(100, 200), # output progress for each 100 data batches
num_epoch=3) # train for at most 3 dataset passes
INFO:root:Epoch[0] Batch [200] Speed: 5239.83 samples/sec accuracy=0.890348
INFO:root:Epoch[0] Batch [400] Speed: 5135.49 samples/sec accuracy=0.971450
INFO:root:Epoch[0] Train-accuracy=0.977236
INFO:root:Epoch[0] Time cost=11.520
INFO:root:Epoch[0] Validation-accuracy=0.980300
INFO:root:Epoch[1] Batch [200] Speed: 5336.36 samples/sec accuracy=0.979453
INFO:root:Epoch[1] Batch [400] Speed: 5312.22 samples/sec accuracy=0.982550
INFO:root:Epoch[1] Train-accuracy=0.984724
INFO:root:Epoch[1] Time cost=11.704
INFO:root:Epoch[1] Validation-accuracy=0.980500
INFO:root:Epoch[2] Batch [200] Speed: 5522.89 samples/sec accuracy=0.982388
INFO:root:Epoch[2] Batch [400] Speed: 5562.08 samples/sec accuracy=0.984550
INFO:root:Epoch[2] Train-accuracy=0.985075
INFO:root:Epoch[2] Time cost=10.860
INFO:root:Epoch[2] Validation-accuracy=0.978000
gluon模型訓練
gluon的模型訓練包括:
- 初始化模型參數(shù)
- 定義代價函數(shù)和優(yōu)化器
- 計算前向傳播
- 反向傳播計算梯度
- 調(diào)用優(yōu)化器優(yōu)化模型
def transform(data, label):
return data.astype(np.float32)/255, label.astype(np.float32)
gluon_train_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform),
100, shuffle=True)
gluon_test_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform),
100, shuffle=False)
gluon_model.collect_params().initialize(mx.init.Normal(sigma=.1), ctx=mx.gpu())
softmax_cross_entropy = mx.gluon.loss.SoftmaxCrossEntropyLoss()
trainer = mx.gluon.Trainer(gluon_model.collect_params(), 'sgd', {'learning_rate': .1})
for _ in range(2):
for i,(data,label) in enumerate(gluon_train_data):
data = data.as_in_context(mx.gpu()).reshape((-1, 784))
label = label.as_in_context(mx.gpu())
with mx.autograd.record():
outputs = gluon_model(data)
loss = softmax_cross_entropy(outputs,label)
loss.backward()
trainer.step(data.shape[0])
if i % 100 == 1:
print(loss.mean().asnumpy()[0])
2.3196
0.280345
0.268811
0.419094
0.260873
0.252575
0.162117
0.247361
0.169366
0.184899
0.0986493
0.251358
準確率計算
mxnet模型準確率計算
mxnet的模型提供score()
方法用于計算指標,用法與sklearn類似严蓖,除了用該API,也可以使用ndarray搭建評估函數(shù)
acc = mx.metric.Accuracy()
mxnet_model.score(val_iter,acc)
print(acc)
EvalMetric: {'accuracy': 0.97799999999999998}
gluon模型準確率計算
gluon官方教程中沒有使用提供好的準確率計算方法毫深,需要使用mxnet函數(shù)的metric.Accuracy()
搭建
def evaluate_accuracy():
acc = mx.metric.Accuracy()
for i, (data, label) in enumerate(gluon_test_data):
data = data.as_in_context(mx.gpu()).reshape((-1, 784))
label = label.as_in_context(mx.gpu())
output = gluon_model(data)
predictions = mx.nd.argmax(output, axis=1)
acc.update(preds=predictions, labels=label)
return acc.get()[1]
evaluate_accuracy()
0.95079999999999998
模型保存與載入
mxnet
mxnet保存模型
- mxnet在fit中使用
mx.callback.module_checkpoint()
作為fit參數(shù)epoch_end_callback
可以在訓練中保存模型 - 訓練完成后可以使用
module.save_checkpoint()
保存模型
mxnet_model.save_checkpoint("mxnet_",3)
INFO:root:Saved checkpoint to "mxnet_-0003.params"
mxnet載入模型
使用mx.model.load_checkpoint()
和mx.model.set_params
載入模型
# mxnet_model2 = mx.mod.Module(symbol=out,label_names=["label"],context=mx.gpu())
sym, arg_params, aux_params = mx.model.load_checkpoint("mxnet_", 3)
mxnet_model2 = mx.mod.Module(symbol=sym,label_names=["label"],context=mx.gpu())
mxnet_model2.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label)
mxnet_model2.set_params(arg_params,aux_params)
mxnet_model2.score(val_iter,acc)
print(acc)
EvalMetric: {'accuracy': 0.97799999999999998}
gluon
gluon保存模型
使用gluon.Block.save_params()
可以保存模型
gluon_model.save_params("gluon_model")
gluon載入模型
使用gluon.Block.load_params()
可以載入模型參數(shù)
gluon_model2.load_params("gluon_model",ctx=mx.gpu())
def evaluate_accuracy():
acc = mx.metric.Accuracy()
for i, (data, label) in enumerate(gluon_test_data):
data = data.as_in_context(mx.gpu()).reshape((-1, 784))
label = label.as_in_context(mx.gpu())
output = gluon_model2(data)
predictions = mx.nd.argmax(output, axis=1)
acc.update(preds=predictions, labels=label)
return acc.get()[1]
evaluate_accuracy()
0.95079999999999998