import mxnet as mx
官方github教程部分代碼
網(wǎng)絡(luò)生成
num_layers = 2
num_hidden = 256
stack = mx.rnn.SequentialRNNCell()
for i in range(num_layers):
stack.add(mx.rnn.LSTMCell(num_hidden=num_hidden, prefix='lstm_l%d_'%i))
-
mx.rnn.SequentialRNNCell()
:RNN容器嫉鲸,用于組合多個(gè)RNN層 -
mx.rnn.LSTMCell(num_hidden=num_hidden, prefix='lstm_l%d_'%i)
:LSTM單元
num_embed = 256
def sym_gen(seq_len):
data = mx.sym.Variable('data')
label = mx.sym.Variable('softmax_label')
embed = mx.sym.Embedding(data=data, input_dim=1000,output_dim=num_embed, name='embed')
# 數(shù)據(jù)生成世吨,定義Variable并進(jìn)行詞向量化
stack.reset()
outputs, states = stack.unroll(seq_len, inputs=embed, merge_outputs=True)
# 按時(shí)間展開輸出和狀態(tài)
pred = mx.sym.Reshape(outputs, shape=(-1, num_hidden))
pred = mx.sym.FullyConnected(data=pred, num_hidden=1000, name='pred')
# 變換輸出形式炼彪,將輸出變?yōu)?-1,num_hidden)尺寸
label = mx.sym.Reshape(label, shape=(-1,))
pred = mx.sym.SoftmaxOutput(data=pred, label=label, name='softmax')
# 展平label典挑,并計(jì)算代價(jià)函數(shù)
return pred, ('data',), ('softmax_label',)
sym_gen(1)
(<Symbol softmax>, ('data',), ('softmax_label',))
-
unroll()
函數(shù)按時(shí)間展開RNN單元,輸出最終的運(yùn)算結(jié)果 - 輸出接全連接層,再轉(zhuǎn)換為詞向量
官方API文檔代碼
數(shù)據(jù)轉(zhuǎn)換
step_input = mx.symbol.Variable('step_data')
# First we embed our raw input data to be used as LSTM's input.
embedded_step = mx.symbol.Embedding(data=step_input, \
input_dim=50, \
output_dim=50)
# print(embedded_step.shape)
mx.viz.plot_network(symbol=embedded_step)
# Then we create an LSTM cell.
output_7_0.png
Embedding
是一種詞向量化技術(shù),這種技術(shù)可以保持語義(例如相近語義的詞的向量距離會(huì)較近),將尺寸為(d0,d1...dn)的輸入向量進(jìn)行詞向量化技術(shù)后轉(zhuǎn)換為尺寸為(d0,d1,...,dn,out_dim)的向量获诈,多出的一維為詞向量,即使用一個(gè)向量代替原來一個(gè)詞的位置心褐。
- 參數(shù)input_dim為輸入向量的范圍舔涎,即輸入data的范圍在[0,input_dim)之間
- 參數(shù)output_dim為詞向量大小
- 可選參數(shù)weight,可傳入指定的詞向量字典
- 可選參數(shù)name逗爹,可傳入名稱
vocabulary_size = 26
embed_dim = 16
seq_len, batch_size = (10, 64)
input = mx.sym.Variable('letters')
op = mx.sym.Embedding(data=input, input_dim=vocabulary_size, output_dim=embed_dim,name='embed')
op.infer_shape(letters=(seq_len, batch_size))
([(10, 64), (26, 16)], [(10, 64, 16)], [])
上文的例子可以看出輸入向量尺寸為(10,64),輸出向量尺寸變?yōu)榱耍?0,64,16)
網(wǎng)絡(luò)構(gòu)建
使用了隱層為50的LSTM單元亡嫌,并帶入轉(zhuǎn)換好的數(shù)據(jù),該圖繪制出的lstm圖較經(jīng)典LSTM有一些出入
lstm_cell = mx.rnn.LSTMCell(num_hidden=50)
begin_state = lstm_cell.begin_state()
output, states = lstm_cell(embedded_step, begin_state)
mx.viz.plot_network(symbol=output)
output_11_0.png
LSTM的源碼的構(gòu)造函數(shù)如下:
def __init__(self, num_hidden, prefix='lstm_', params=None, forget_bias=1.0):
super(LSTMCell, self).__init__(prefix=prefix, params=params)
self._num_hidden = num_hidden
self._iW = self.params.get('i2h_weight')
self._hW = self.params.get('h2h_weight')
# we add the forget_bias to i2h_bias, this adds the bias to the forget gate activation
self._iB = self.params.get('i2h_bias', init=init.LSTMBias(forget_bias=forget_bias))
self._hB = self.params.get('h2h_bias')
其中:self.params.get()
方法為嘗試找到傳入名稱對(duì)應(yīng)的Variable掘而,若找不到則新建挟冠,因此該LSTM單元一共僅有兩對(duì)參數(shù):iW和iB,hW和hB
前向傳播函數(shù)如下:
def __call__(self, inputs, states):
self._counter += 1
name = '%st%d_'%(self._prefix, self._counter)
i2h = symbol.FullyConnected(data=inputs, weight=self._iW, bias=self._iB,
num_hidden=self._num_hidden*4,
name='%si2h'%name)
h2h = symbol.FullyConnected(data=states[0], weight=self._hW, bias=self._hB,
num_hidden=self._num_hidden*4,
name='%sh2h'%name)
gates = i2h + h2h
slice_gates = symbol.SliceChannel(gates, num_outputs=4,name="%sslice"%name)
in_gate = symbol.Activation(slice_gates[0], act_type="sigmoid",name='%si'%name)
forget_gate = symbol.Activation(slice_gates[1], act_type="sigmoid",name='%sf'%name)
in_transform = symbol.Activation(slice_gates[2], act_type="tanh",name='%sc'%name)
out_gate = symbol.Activation(slice_gates[3], act_type="sigmoid",name='%so'%name)
next_c = symbol._internal._plus(forget_gate * states[1], in_gate * in_transform,name='%sstate'%name)
next_h = symbol._internal._mul(out_gate, symbol.Activation(next_c, act_type="tanh"),name='%sout'%name)
return next_h, [next_h, next_c]
可以看出袍睡,LSTM的實(shí)現(xiàn)過程如下所示
- 計(jì)算隱層輸入與狀態(tài)知染,隱層的channel數(shù)量是配置的hidden_num的四倍
- 將隱層輸入結(jié)果和隱層狀態(tài)相加,并按channel數(shù)量切分為4份
- 第一份作為輸入門層斑胜,經(jīng)過sigmoid函數(shù)
- 第二份作為忘記門層控淡,經(jīng)過sigmoid函數(shù)
- 第三份作為輸入轉(zhuǎn)換層嫌吠,經(jīng)過tanh函數(shù)
- 第四份作為輸出門層,經(jīng)過sigmoid函數(shù)
- 產(chǎn)生輸出
- 輸出狀態(tài)為忘記門層乘狀態(tài)的一部分加輸入門層乘輸入轉(zhuǎn)換層
- 輸出結(jié)果為輸出狀態(tài)經(jīng)過tanh乘輸出門層
結(jié)果生成
sequence_length = 10
input_dim = 10
seq_input = mx.symbol.Variable('seq_data')
embedded_seq = mx.symbol.Embedding(data=seq_input, \
input_dim=input_dim, \
output_dim=embed_dim)
outputs, states = lstm_cell.unroll(length=sequence_length, \
inputs=embedded_seq, \
layout='NTC', \
merge_outputs=True)
使用unroll
方法按時(shí)間展平運(yùn)算掺炭,輸入數(shù)據(jù)為(batch_size,lenght,...)(layout="NTC)或(lenght,batch,...)(layout="TNC)
該函數(shù)的源碼為:
def unroll(self, length, inputs, begin_state=None, layout='NTC', merge_outputs=None):
self.reset()
inputs, _ = _normalize_sequence(length, inputs, layout, False)
if begin_state is None:
begin_state = self.begin_state()
states = begin_state
outputs = []
for i in range(length):
output, states = self(inputs[i], states)
outputs.append(output)
outputs, _ = _normalize_sequence(length, outputs, layout, merge_outputs)
return outputs, states
方法_normalize_sequence
是對(duì)輸入做一些處理辫诅,由一個(gè)for循環(huán)可以看出該方法循環(huán)了網(wǎng)絡(luò)運(yùn)算