利用TensorFlow進(jìn)行分類

使用tensorflow進(jìn)行單詞劃分類別，依賴于sklearn自帶的已經(jīng)標(biāo)注好的數(shù)據(jù)集（有監(jiān)督學(xué)習(xí)）

參考https://www.oschina.net/translate/big-picture-machine-learning?lang=chs&page=1#
完整代碼https://github.com/pursedream/tensorflow_1

導(dǎo)入相關(guān)包

from collections import Counter
import numpy as np
from sklearn.datasets import fetch_20newsgroups
import tensorflow as tf
import pandas as pd

獲取訓(xùn)練數(shù)據(jù)和測試數(shù)據(jù)

# 導(dǎo)入sklearn集合中的數(shù)據(jù)集,有監(jiān)督學(xué)習(xí)，即里面的數(shù)據(jù)已經(jīng)分好了類別
# 具體參見http://scikit-learn.org/stable/datasets/twenty_newsgroups.html
categories = ["comp.graphics","sci.space","rec.sport.baseball"]
train_set = fetch_20newsgroups(subset='train',categories=categories)
test_set = fetch_20newsgroups(subset='test', categories=categories)
# print('total texts in train:',len(train_set.data))
# print('total texts in test:',len(test_set.data))
# 建立數(shù)據(jù)集單詞字典，最終形式是text_index['the'] = 數(shù)量
vocab = Counter()
for data in train_set.data:
    for word in data.split(' '):
        vocab[word.lower()] += 1
for test_data in test_set.data:
    for word in test_data.split(' '):
        vocab[word] += 1
print(len(vocab))
total_words = len(vocab)
def get_index(vocab):
    # 先聲明word是字典，否則word[element]報錯
    word={}
    for i, element in enumerate(vocab):
        word[element.lower()] = i
    return word
text_index = get_index(vocab)
print("the is %s" % text_index['the'])

神經(jīng)網(wǎng)絡(luò)的參數(shù)設(shè)定

# 每層神經(jīng)元數(shù)，包括輸入神經(jīng)元，隱藏神經(jīng)元瞧挤，輸出神經(jīng)元"comp.graphics","sci.space","rec.sport.baseball"
n_hidden1 = 100
n_hiddent2 = 100
n_input_number = total_words
n_class = 3
# 在神經(jīng)網(wǎng)絡(luò)的術(shù)語里，一次 epoch = 一個向前傳遞（得到輸出的值）和一個所有訓(xùn)練示例的向后傳遞（更新權(quán)重）儡湾。
training_epochs = 10
learning_rate = 0.01
# 批數(shù)量訓(xùn)練數(shù)據(jù)和測試數(shù)據(jù)
batch_size = 150
display_step = 1
# shape的None元素對應(yīng)于大小可變的維度
# 在測試模型時特恬，我們將用更大的批處理來提供字典，這就是為什么需要定義一個可變的批處理維度徐钠。
input_tensor = tf.placeholder(tf.float32, [None, n_input_number], name='input')
output_tensor = tf.placeholder(tf.float32, [None, n_class], name='output')

建立模型

# 神經(jīng)元計算
def out_prediction(input_tensor, weights, biases):
    # 定義乘法運(yùn)算矩陣乘法
    # relu是激活函數(shù)
    layer_1_multiplication = tf.matmul(input_tensor,weights['h1'])
    layer_1_addition = tf.add(layer_1_multiplication, biases['b1'])
    layer_1_activation = tf.nn.relu(layer_1_addition)

    layer_2_multiplication = tf.matmul(layer_1_activation, weights['h2'])
    layer_2_addition = tf.add(layer_2_multiplication, biases['b2'])
    layer_2_activation = tf.nn.relu(layer_2_addition)

    out_layer_multiplication = tf.matmul(layer_2_activation, weights['out'])
    out_layer_addition = out_layer_multiplication + biases['out']

    return out_layer_addition
# shape參數(shù)含義癌刽；[]表示一個數(shù)，[3]表示長為3的向量尝丐，
# [2,3]表示矩陣或者張量(tensor)同一個線性變換在不同的基下的表示
# https://www.zhihu.com/question/20695804
# 利用正態(tài)分布啟動權(quán)值和偏差值
weights = {
    'h1':tf.Variable(tf.random_normal([n_input_number, n_hidden1])),
    'h2':tf.Variable(tf.random_normal([n_hidden1, n_hiddent2])),
    'out':tf.Variable(tf.random_normal([n_hiddent2, n_class]))
}
biases = {
    'b1':tf.Variable(tf.random_normal([n_hidden1])),
    'b2':tf.Variable(tf.random_normal([n_hiddent2])),
    'out':tf.Variable(tf.random_normal([n_class]))
}
prediction = out_prediction(input_tensor, weights, biases)

由于是分類問題显拜，使用交叉熵誤差權(quán)值更新

prediction = out_prediction(input_tensor, weights, biases)
# 由于分類問題，所以使用交叉熵誤差進(jìn)行優(yōu)化爹袁，不斷更新權(quán)值和output_tensor
cross_loss = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=output_tensor)
loss = tf.reduce_mean(cross_loss)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
# 數(shù)據(jù)初始化
init = tf.global_variables_initializer()

批處理函數(shù)

def get_batch(df, i, batch_size):
    batches = []
    results = []
    texts = df.data[i * batch_size:i * batch_size + batch_size]
    categories = df.target[i * batch_size:i * batch_size + batch_size]
# 構(gòu)建矩陣索引
    for text in texts:
        layer = np.zeros(total_words, dtype=float)
        for word in text.split(' '):
            layer[text_index[word.lower()]] += 1

        batches.append(layer)

    for category in categories:
        y = np.zeros((3), dtype=float)
        if category == 0:
            y[0] = 1
        elif category == 1:
            y[1] = 1
        else:
            y[2] = 1
        results.append(y)

    return np.array(batches), np.array(results)

在Session環(huán)境中訓(xùn)練模型

with tf.Session() as sess:
    sess.run(init)

    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(len(train_set.data)/batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_x,batch_y = get_batch(train_set,i,batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            # op（source op）远荠，當(dāng)運(yùn)行該函數(shù)，啟動默認(rèn)圖失息，即運(yùn)行out_prediction譬淳，并不斷更新權(quán)值和分類結(jié)果
            # tf.Session.run(fetches, feed_dict=None, options=None, run_metadata=None)
            # feed_dict 參數(shù)是我們?yōu)槊坎竭\(yùn)行所輸入的數(shù)據(jù)。為了傳遞這個數(shù)據(jù)盹兢，我們需要定義tf.placeholders（提供給 feed_dict）
            c,_ = sess.run([loss,optimizer], feed_dict={input_tensor: batch_x,output_tensor:batch_y})
            # Compute average loss
            avg_cost += c / total_batch
        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "loss=", \
                "{:.9f}".format(avg_cost))
    print("Optimization Finished!")

利用測試數(shù)據(jù)進(jìn)行模型評價

 # Test model
    correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(output_tensor, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    total_test_data = len(train_set.target)
    batch_x_test, batch_y_test = get_batch(test_set,0,total_test_data)
    print("Accuracy:", accuracy.eval({input_tensor: batch_x_test, output_tensor: batch_y_test}))

最后編輯于：2017.12.09 21:56:07

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者

人面猴
序言：七十年代末邻梆，一起剝皮案震驚了整個濱河市，隨后出現(xiàn)的幾起案子绎秒，更是在濱河造成了極大的恐慌浦妄，老刑警劉巖，帶你破解...
沈念sama閱讀 206,214評論 6贊 481
死咒
序言：濱河連續(xù)發(fā)生了三起死亡事件见芹，死亡現(xiàn)場離奇詭異剂娄，居然都是意外死亡，警方通過查閱死者的電腦和手機(jī)玄呛，發(fā)現(xiàn)死者居然都...
沈念sama閱讀 88,307評論 2贊 382
救了他兩次的神仙讓他今天三更去死
文/潘曉璐我一進(jìn)店門阅懦，熙熙樓的掌柜王于貴愁眉苦臉地迎上來，“玉大人把鉴，你說我怎么就攤上這事故黑。” “怎么了庭砍？”我有些...
開封第一講書人閱讀 152,543評論 0贊 341
道士緝兇錄：失蹤的賣姜人
文/不壞的土叔我叫張陵场晶，是天一觀的道長。經(jīng)常有香客問我怠缸，道長诗轻，這世上最難降的妖魔是什么？我笑而不...
開封第一講書人閱讀 55,221評論 1贊 279
?港島之戀（遺憾婚禮）
正文為了忘掉前任揭北，我火速辦了婚禮扳炬，結(jié)果婚禮上，老公的妹妹穿的比我還像新娘搔体。我一直安慰自己恨樟，他們只是感情好，可當(dāng)我...
茶點故事閱讀 64,224評論 5贊 371
惡毒庶女頂嫁案：這布局不是一般人想出來的
文/花漫我一把揭開白布疚俱。她就那樣靜靜地躺著劝术，像睡著了一般。火紅的嫁衣襯著肌膚如雪呆奕。梳的紋絲不亂的頭發(fā)上养晋，一...
開封第一講書人閱讀 49,007評論 1贊 284
城市分裂傳說
那天，我揣著相機(jī)與錄音梁钾，去河邊找鬼绳泉。笑死，一個胖子當(dāng)著我的面吹牛姆泻，可吹牛的內(nèi)容都是我干的零酪。我是一名探鬼主播，決...
沈念sama閱讀 38,313評論 3贊 399
雙鴛鴦連環(huán)套：你想象不到人心有多黑
文/蒼蘭香墨我猛地睜開眼拇勃，長吁一口氣：“原來是場噩夢啊……” “哼蛾娶！你這毒婦竟也來了？” 一聲冷哼從身側(cè)響起潜秋，我...
開封第一講書人閱讀 36,956評論 0贊 259
萬榮殺人案實錄
序言：老撾萬榮一對情侶失蹤蛔琅，失蹤者是張志新（化名）和其女友劉穎，沒想到半個月后峻呛，有當(dāng)?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體罗售，經(jīng)...
沈念sama閱讀 43,441評論 1贊 300
?護(hù)林員之死
正文獨居荒郊野嶺守林人離奇死亡，尸身上長有42處帶血的膿包…… 初始之章·張勛以下內(nèi)容為張勛視角年9月15日...
茶點故事閱讀 35,925評論 2贊 323
?白月光啟示錄
正文我和宋清朗相戀三年钩述，在試婚紗的時候發(fā)現(xiàn)自己被綠了寨躁。大學(xué)時的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片。...
茶點故事閱讀 38,018評論 1贊 333
活死人
序言：一個原本活蹦亂跳的男人離奇死亡牙勘，死狀恐怖职恳，靈堂內(nèi)的尸體忽然破棺而出所禀，到底是詐尸還是另有隱情，我是刑警寧澤放钦，帶...
沈念sama閱讀 33,685評論 4贊 322
?日本核電站爆炸內(nèi)幕
正文年R本政府宣布色徘，位于F島的核電站，受9級特大地震影響操禀，放射性物質(zhì)發(fā)生泄漏褂策。R本人自食惡果不足惜，卻給世界環(huán)境...
茶點故事閱讀 39,234評論 3贊 307
男人毒藥：我在死后第九天來索命
文/蒙蒙一颓屑、第九天我趴在偏房一處隱蔽的房頂上張望斤寂。院中可真熱鬧，春花似錦揪惦、人聲如沸遍搞。這莊子的主人今日做“春日...
開封第一講書人閱讀 30,240評論 0贊 19
一樁弒父案器腋，背后竟有這般陰謀
文/蒼蘭香墨我抬頭看了看天上的太陽尾抑。三九已至，卻和暖如春蒂培，著一層夾襖步出監(jiān)牢的瞬間再愈，已是汗流浹背。一陣腳步聲響...
開封第一講書人閱讀 31,464評論 1贊 261
情欲美人皮
我被黑心中介騙來泰國打工护戳，沒想到剛下飛機(jī)就差點兒被人妖公主榨干…… 1. 我叫王不留翎冲，地道東北人。一個月前我還...
沈念sama閱讀 45,467評論 2贊 352
代替公主和親
正文我出身青樓媳荒，卻偏偏與公主長得像抗悍，于是被迫代替她去往敵國和親。傳聞我的和親對象是個殘疾皇子钳枕，可洞房花燭夜當(dāng)晚...
茶點故事閱讀 42,762評論 2贊 345

利用TensorFlow進(jìn)行分類

使用tensorflow進(jìn)行單詞劃分類別，依賴于sklearn自帶的已經(jīng)標(biāo)注好的數(shù)據(jù)集（有監(jiān)督學(xué)習(xí)）

推薦閱讀更多精彩內(nèi)容