用的是kaggle上的比賽“Quora Question Paris: Can you identify question pairs that have the same intent?”
評估指數(shù): log loss
測試集:
- 大惺嗵:40.4萬
- 屬性:6列,分別是id, qid1, question1, question2, is_duplicate
測試集: - 大屑崆ⅰ:235萬
- 屬性:3列凿跳,分別是test_id, qeustion1, question2
這里用到的第一個方法是孿生網(wǎng)絡(luò) Siamese Network,白話點就是我要看看這兩個句子是否一樣秒裕,就將兩個輸入feed進兩個神經(jīng)網(wǎng)絡(luò),word embedding后虎眨,通過Loss的計算叮姑,評價兩個輸入的相似度。
image.png
在之前word embedding中有提到cosine值是計算兩個向量的夾角來判斷兩個詞的相似性其兴,那么句子了顶瞒?段落了?可以用exp保留兩個向量的長度信息(見下圖)
image.png
兩邊都用了LSTM,仔細看下LSTMa一開始是不知道LSTMb的存在元旬,直到進行到h3(a)時榴徐,才會和LSTMb中的h4(b)進行匹配。h3-h4用曼哈頓距離來度量兩個句子的空間相似度匀归。
當兩邊都是LSTM時
with tf.name_scope('embeddings'):
self._m_token_embeddings = tf.Variable(
tf.truncated_normal(
[self._m_config["vocab_size"], self._m_config["embedding_dim"]],
stddev=0.1
),
name="token_embeddings"
)
embedded_sent1 = tf.nn.embedding_lookup(self._m_token_embeddings, self._m_ph_sent1)
embedded_sent2 = tf.nn.embedding_lookup(self._m_token_embeddings, self._m_ph_sent2)
self._m_embedded_sent1 = embedded_sent1
with tf.name_scope('lstm_layer'):
cell1 = tf.nn.rnn_cell.LSTMCell(
self._m_config["lstm_dim"],
state_is_tuple=True,
reuse=tf.AUTO_REUSE
)
cell2 = tf.nn.rnn_cell.LSTMCell(
self._m_config["lstm_dim"],
state_is_tuple=True,
reuse=tf.AUTO_REUSE
)
_, (_, output_cell1) = tf.nn.dynamic_rnn(
cell1, embedded_sent1, dtype=tf.float32, sequence_length=self._m_ph_sent1_size)
_, (_, output_cell2) = tf.nn.dynamic_rnn(
cell1, embedded_sent2, dtype=tf.float32, sequence_length=self._m_ph_sent2_size)
with tf.name_scope("feature_mapping"):
sent_diff = output_cell1 - output_cell2
sent_mul = tf.multiply(output_cell1, output_cell2)
features = tf.concat([sent_diff, sent_mul, output_cell1, output_cell2], axis=1)
W = tf.Variable(tf.truncated_normal(
shape=[self._m_config["lstm_dim"] * 4, self._m_config["label_num"]],
stddev=0.1, mean=0.0))
b = tf.Variable(tf.truncated_normal(
shape=[self._m_config["label_num"]], stddev=0.1, mean=0.0))
self._m_logits = tf.nn.xw_plus_b(features, W, b)
with tf.name_scope("loss"):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
labels=self._m_ph_label, logits=self._m_logits)
self._m_loss = tf.reduce_mean(cross_entropy)
with tf.name_scope("accuracy"):
self._m_prediction = tf.argmax(self._m_logits, axis=1)
correct = tf.equal(self._m_prediction, tf.argmax(self._m_ph_label, axis=1))
self._m_accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
with tf.name_scope("optimizer"):
self._m_global_step = tf.Variable(0, name="global_step", trainable=False)
self._m_optimizer = tf.train.AdamOptimizer(self._m_config["learning_rate"])
self._m_train_op = self._m_optimizer.minimize(
self._m_loss, global_step=self._m_global_step)
當兩邊都用CNN時
with tf.name_scope('embeddings'):
self._m_token_embeddings = tf.Variable(
tf.truncated_normal(
[self._m_config["vocab_size"], self._m_config["embedding_dim"]],
stddev=0.1
),
name="token_embeddings"
)
embedded_sent1 = tf.nn.embedding_lookup(self._m_token_embeddings, self._m_ph_sent1)
embedded_sent2 = tf.nn.embedding_lookup(self._m_token_embeddings, self._m_ph_sent2)
dropout_embedded_sent1 = tf.nn.dropout(embedded_sent1, keep_prob=self._m_ph_keep_prob)
dropout_embedded_sent2 = tf.nn.dropout(embedded_sent2, keep_prob=self._m_ph_keep_prob)
with tf.name_scope('sentence_features'):
sent1_features = self._build_conv_features(dropout_embedded_sent1)
sent2_features = self._build_conv_features(dropout_embedded_sent2)
#dropout_sent1_features = tf.nn.dropout(sent1_features, keep_prob=self._m_ph_keep_prob)
#dropout_sent2_features = tf.nn.dropout(sent2_features, keep_prob=self._m_ph_keep_prob)
dropout_sent1_features = tf.identity(sent1_features)
dropout_sent2_features = tf.identity(sent2_features)
with tf.name_scope("feature_mapping"):
sent_diff = dropout_sent1_features - dropout_sent2_features
sent_mul = tf.multiply(dropout_sent1_features, dropout_sent2_features)
features = tf.concat([sent_diff, sent_mul, dropout_sent1_features, dropout_sent2_features], axis=1)
dropout_features = tf.nn.dropout(features, keep_prob=self._m_ph_keep_prob)
cnn_feature_num = self._m_config["num_filters"] * len(self._m_config["filter_sizes"])
W = tf.Variable(tf.truncated_normal(
shape=[cnn_feature_num * 4, self._m_config["label_num"]],
stddev=0.1, mean=0.0))
b = tf.Variable(tf.truncated_normal(
shape=[self._m_config["label_num"]], stddev=0.1, mean=0.0))
self._m_logits = tf.nn.xw_plus_b(features, W, b)
with tf.name_scope("loss"):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
labels=self._m_ph_label, logits=self._m_logits)
self._m_loss = tf.reduce_mean(cross_entropy)
with tf.name_scope("accuracy"):
self._m_prediction = tf.argmax(self._m_logits, axis=1)
correct = tf.equal(self._m_prediction, tf.argmax(self._m_ph_label, axis=1))
self._m_accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
with tf.name_scope("optimizer"):
self._m_global_step = tf.Variable(0, name="global_step", trainable=False)
self._m_optimizer = tf.train.AdamOptimizer(self._m_config["learning_rate"])
self._m_train_op = self._m_optimizer.minimize(
self._m_loss, global_step=self._m_global_step)
孿生網(wǎng)絡(luò)是先建模再匹配坑资,LSTMa一直到h3(a)才只知道有h4(b),有沒有可能句子一開始就知道另外一條句子,并記性匹配了穆端? 下一節(jié) Match Pyramid是先匹配再建模袱贮。