原文講解
RCNN出處:論文Recurrent Convolutional Neural Networks for Text Classification
講解可以參考TextRCNN 閱讀筆記
網(wǎng)絡(luò)結(jié)構(gòu)
-
Word Representation Learning. RCNN uses a recurrent structure, which is a bi-directional recurrent neural network, to capture the contexts. Then, combine the word and its context to present the word. And apply a linear transformation together with the
tanh
activation fucntion to the representation. - Text Representation Learning. When all of the representations of words are calculated, it applys a element-wise max-pooling layer in order to capture the most important information throughout the entire text. Finally, do the linear transformation and apply the softmax function.
本文實(shí)現(xiàn)
定義網(wǎng)絡(luò)結(jié)構(gòu)
from tensorflow.keras import Input ,Model
from tensorflow.keras import backend as K
from tensorflow.layers import Embedding , Dense , SimpleRNN , Lambda , Concatenate , Conv1D , GlobalMaxPooling1D
class RCNN(object):
def __init__(self,maxlen , max_features , embedding_dims , class_num = 5 , last_activation = 'softmax'):
self.maxlen = maxlen
self.max_features = max_features
self.embedding_dims = embedding_dims
self.class_num = class_num
self.last_activation = last_activation
def get_model(self):
input_current = Input((self.maxlen,))
input_left = Input((self.maxlen,))
input_right = Input((self.maxlen,))
embedder = Embedding(self.max_features , self.embedding_dims, max_length = self.maxlen)
embedding_current = embedder(input_current)
embedding_left = embedder(input_left)
embedding_right = embedder(input_right)
x_left = SimpleRNN(128,return_sequence = True)(embedding_left)
x_right = SimpleRNN(128,return_sequence = True , go_backwards = True)(embedding_right)
x_right = Lambda(lambda x: K.reverse(x , axis = 1))(x_right)
x = Concatenate(axis = 2) ([x_left , embedding_current , x_right])
x = Conv1D(64,kernel_size = 1 , activation = 'tanh')(x)
x = GlobalMaxPooling1D(x)
output = Dense(self.class_num, activation = self.last_activation)(x)
model = Model(inputs = [input_current , input_left , input_right] , outputs = output)
return model
from tensorflow.keras.preprocessing import sequence
import random
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from utils import *
# 路徑等配置
data_dir = "./processed_data"
vocab_file = "./vocab/vocab.txt"
vocab_size = 40000
# 神經(jīng)網(wǎng)絡(luò)配置
max_features = 40001
maxlen = 400
batch_size = 32
embedding_dims = 50
epochs = 10
print('數(shù)據(jù)預(yù)處理與加載數(shù)據(jù)...')
# 如果不存在詞匯表革答,重建
if not os.path.exists(vocab_file):
build_vocab(data_dir, vocab_file, vocab_size)
# 獲得 詞匯/類別 與id映射字典
categories, cat_to_id = read_category()
words, word_to_id = read_vocab(vocab_file)
# 全部數(shù)據(jù)
x, y = read_files(data_dir)
data = list(zip(x,y))
del x,y
# 亂序
random.shuffle(data)
# 切分訓(xùn)練集和測試集
train_data, test_data = train_test_split(data)
# 對文本的詞id和類別id進(jìn)行編碼
x_train = encode_sentences([content[0] for content in train_data], word_to_id)
y_train = to_categorical(encode_cate([content[1] for content in train_data], cat_to_id))
x_test = encode_sentences([content[0] for content in test_data], word_to_id)
y_test = to_categorical(encode_cate([content[1] for content in test_data], cat_to_id))
print('對序列做padding,保證是 samples*timestep 的維度')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
print('為模型準(zhǔn)備輸入數(shù)據(jù)...')
x_train_current = x_train
x_train_left = np.hstack([np.expand_dims(x_train[:, 0], axis=1), x_train[:, 0:-1]])
x_train_right = np.hstack([x_train[:, 1:], np.expand_dims(x_train[:, -1], axis=1)])
x_test_current = x_test
x_test_left = np.hstack([np.expand_dims(x_test[:, 0], axis=1), x_test[:, 0:-1]])
x_test_right = np.hstack([x_test[:, 1:], np.expand_dims(x_test[:, -1], axis=1)])
print('x_train_current 維度:', x_train_current.shape)
print('x_train_left 維度:', x_train_left.shape)
print('x_train_right 維度:', x_train_right.shape)
print('x_test_current 維度:', x_test_current.shape)
print('x_test_left 維度:', x_test_left.shape)
print('x_test_right 維度:', x_test_right.shape)
print('構(gòu)建模型...')
model = RCNN(maxlen, max_features, embedding_dims).get_model()
model.compile('adam', 'categorical_crossentropy', metrics=['accuracy'])
print('Train...')
early_stopping = EarlyStopping(monitor='val_accuracy', patience=2, mode='max')
history = model.fit([x_train_current, x_train_left, x_train_right], y_train,
batch_size=batch_size,
epochs=epochs,
callbacks=[early_stopping],
validation_data=([x_test_current, x_test_left, x_test_right], y_test))
print('Test...')
result = model.predict([x_test_current, x_test_left, x_test_right])
import matplotlib.pyplot as plt
plt.switch_backend('agg')
%matplotlib inline
fig1 = plt.figure()
plt.plot(history.history['loss'],'r',linewidth=3.0)
plt.plot(history.history['val_loss'],'b',linewidth=3.0)
plt.legend(['Training loss', 'Validation Loss'],fontsize=18)
plt.xlabel('Epochs ',fontsize=16)
plt.ylabel('Loss',fontsize=16)
plt.title('Loss Curves :CNN',fontsize=16)
fig1.savefig('loss_cnn.png')
plt.show()
fig2=plt.figure()
plt.plot(history.history['accuracy'],'r',linewidth=3.0)
plt.plot(history.history['val_accuracy'],'b',linewidth=3.0)
plt.legend(['Training Accuracy', 'Validation Accuracy'],fontsize=18)
plt.xlabel('Epochs ',fontsize=16)
plt.ylabel('Accuracy',fontsize=16)
plt.title('Accuracy Curves : CNN',fontsize=16)
fig2.savefig('accuracy_cnn.png')
plt.show()
from tensorflow.keras.utils import plot_model
plot_model(model, show_shapes=True, show_layer_names=True)