本文之編寫程序涉及到API介紹捷犹,程序的完整實現(xiàn)红选,具體算法原理請查看之前所寫的K-Means算法介紹
一澜公、基礎準備
1、python 基礎
2喇肋、numpy 基礎
3坟乾、tensorflow 基礎
initialized_value
作用:
返回已經(jīng)初始化變量的值.你應該使用這個函數(shù)來代替使用變量自己來初始化依賴這個變量的值的其他變量。
# 原始的變量
weights = tf.Variable(tf.random_normal([784, 200], stddev=0.35),name="weights")
# 創(chuàng)造相同內(nèi)容的變量
w2 = tf.Variable(weights.initialized_value(), name="w2")
# 也可以直接乘以比例
w_twice = tf.Variable(weights.initialized_value() * 0.2, name="w_twice")
tf.slice:
切割數(shù)組
y=np.arange(36).reshape([3,3,4])
sess=tf.Session()
begin_y=[1,1,0] #切割的起始位置
size_y=[2,2,2] # 2:第一個維度取幾個數(shù)據(jù), 2:第一個維度取幾個數(shù)據(jù), 3:第一個維度取幾個數(shù)據(jù),
###
#y[1+0, 1+0, 0+0], y[1+0, 1+0, 0+1], y[1+0, 1+0, 0+3] = [16 17 18]
#y[1+0, 1+1, 0+0], y[1+0, 1+0, 0+1], y[1+0, 1+0, 0+3] = [20 21 22]
#y[1+1, 1+0, 0+0], y[1+0, 1+0, 0+1], y[1+0, 1+0, 0+3] = [28 29 30]
#y[1+1, 1+1, 0+0], y[1+0, 1+0, 0+1], y[1+0, 1+0, 0+3] = [28 29 30]
print("y")
print(y)
out=tf.slice(y,begin_y,size_y)
print(sess.run(out)) # 結果:[[[16 17 18][20 21 22]][[28 29 30][32 33 34]]]
print("---------------")
tf.tile
復制數(shù)組
tf.tile(input, multiples, name = None)
sess = tf.Session()
data = tf.constant([[1, 2, 3, 4], [9, 8, 7, 6]])
d = tf.tile(data, [2,2])
print(sess.run(d))
----------
[[1 2 3 4 1 2 3 4]
[9 8 7 6 9 8 7 6]
[1 2 3 4 1 2 3 4]
[9 8 7 6 9 8 7 6]]
sess = tf.Session()
data = tf.constant([1, 2, 3, 4])
d = tf.tile(data, [2)
print(sess.run(d))
----------
[1 2 3 4 1 2 3 4]
tf.reduce_sum
分組計算tensor中各數(shù)組的和蝶防,reduction_indices等于0和1時取的維度值就不一樣甚侣。
inputs = [[1,0,2],[3,2,4]]
B = tf.reduce_sum(inputs, reduction_indices=0)
with tf.Session() as sess:
print(sess.run(B))
# >> [4 2 6]
B = tf.reduce_sum(inputs, reduction_indices=1)
with tf.Session() as sess:
print(sess.run(B))
# >> [3 9]
tf.arg_min
求數(shù)組最小值的下標,如axis=0,代表第一維度],如axis=1慧脱,代表第二維度
data = tf.constant([[8,1,2],[2,3,4]])
sess = tf.Session()
print(sess.run(tf.arg_min(data,0)))
# >> [1 0 0]
print(sess.run(tf.arg_min(data,1)))
# >>[1 0]
tf.reduce_any
計算tensor中各個元素的邏輯或(or運算)
inputs = [[True,False],[True,False]]
with tf.Session() as sess:
inputs = np.array(inputs)
A = tf.reduce_any(inputs,0)
print(sess.run(A))
# >>[ True False]
A = tf.reduce_any(inputs,1)
print(sess.run(A))
# >>[ True True]
tf.unsorted_segment_sum
根據(jù)segment_ids的分段計算各個片段的和,
num_segments, name=None) 與tf.segment_sum函數(shù)類似渺绒,
不同在于segment_ids中id順序可以是無序的
t1 = tf.constant([[1,2,3,4], [-1,-2,-3,-4],[-1,-2,-8,-4]])
t2 = tf.unsorted_segment_sum(t1, tf.constant([0, 1,0]),2)
with tf.Session() as sess1:
print(sess1.run(t2))
實際上就把對應下表的數(shù)組進行計算
如[0, 1,0],則是0:[1,2,3,4] + [-1,-2,-8,-4],1: [-1,-2,-3,-4]
assign
tf.assign是用來更新模型中變量的值的菱鸥。ref是待賦值的變量宗兼,value是要更新的值。即效果等同于 ref = value
sess = tf.Session()
a = tf.Variable(0.0)
b = tf.placeholder(dtype=tf.float32,shape=[])
op = tf.assign(a,b)
sess.run(tf.initialize_all_variables())
print(sess.run(a))
# 0.0
sess.run(op,feed_dict={b:5.})
print(sess.run(a))
# 5.0
二氮采、完整程序
# -*- coding: utf-8 -*-
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
K = 4 # 類別數(shù)目
MAX_ITERS = 1000 # 最大迭代次數(shù)
# MAX_ITERS = 2 # 最大迭代次數(shù)
N = 200 # 樣本點數(shù)目
centers = [[-2, -2], [-2, 1.5], [1.5, -2], [2, 1.5]] # 簇中心
print("1殷绍、加載數(shù)據(jù)")
dataSet = []
fileIn = open('data\\testData.txt')
for line in fileIn.readlines():
lineArr = line.strip().split(' ')
dataSet.append([float(lineArr[0]), float(lineArr[1])])
N = len(dataSet)
# print("2、數(shù)據(jù)歸一化")
# print(dataSet[0])
# # dataSet = np.mat(dataSet)
# # print(dataSet[0])
#展示數(shù)據(jù)
def showCluster(dataSet, k, clusterAssment):
dataSet = np.array(dataSet)
numSamples= np.shape(dataSet)[0]
dim = np.shape(dataSet)[1]
if dim != 2:
print("Sorry! I can not draw because the dimension of your data is not 2!")
return 1
mark = ['or', 'ob', 'og', 'ok', '^r', '+r', 'sr', 'dr', '<r', 'pr']
if k > len(mark):
print("")
return 1
# draw all samples
for i in range(numSamples):
markIndex = int(clusterAssment[i])
plt.plot(dataSet[i, 0], dataSet[i, 1], mark[markIndex])
plt.show()
# 計算類內(nèi)平均值函數(shù)
def clusterMean(data, id, num):
# 第一個參數(shù)是tensor鹊漠,第二個參數(shù)是簇標簽主到,第三個是簇數(shù)目
total = tf.unsorted_segment_sum(data, id, num)
count = tf.unsorted_segment_sum(tf.ones_like(data), id, num)
return total/count
# 構建graph
points = tf.Variable(dataSet)
cluster = tf.Variable(tf.zeros([N], dtype=tf.int64))
# 將原始數(shù)據(jù)前k個點當做初始中心
centers = tf.Variable(tf.slice(points.initialized_value(), [0, 0], [K, 2]))
# 復制操作,便于矩陣批量計算距離
repCenters = tf.reshape(tf.tile(centers, [N, 1]), [N, K, 2])
repPoints = tf.reshape(tf.tile(points, [1, K]), [N, K, 2])
# 計算距離
sumSqure = tf.reduce_sum(tf.square(repCenters-repPoints), reduction_indices=2)
# 尋找最近的簇中心
bestCenter = tf.argmin(sumSqure, axis=1)
# 檢測簇中心是否還在變化
change = tf.reduce_any(tf.not_equal(bestCenter, cluster))
# 計算簇內(nèi)均值
means = clusterMean(points, bestCenter, K)
# 將粗內(nèi)均值變成新的簇中心躯概,同時分類結果也要更新
with tf.control_dependencies([change]):
# 復制函數(shù)
update = tf.group(centers.assign(means), cluster.assign(bestCenter))
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
changed = True
iterNum = 0
while changed and iterNum < MAX_ITERS:
iterNum += 1
# 運行graph
[changed, _] = sess.run([change, update])
[centersArr, clusterArr] = sess.run([centers, cluster])
print(clusterArr)
print(centersArr)
showCluster(dataSet, K, clusterArr)
# # 顯示圖像
# fig, ax = plt.subplots()
# ax.scatter(dataSet.transpose()[0], dataSet.transpose()[1], marker='o', s=100, c=clusterArr)
# plt.plot()
# plt.show()