- Apriori算法
- 線性回歸
- UCI分類KNN
- 決策樹
- Naive_Bayes
- K-Means圖像分割
Apriori算法
流程
源代碼
# coding=utf-8
def load_data_set():
"""
一個(gè)項(xiàng)目集合的列表
表示買的東西
"""
data_set = [['i1', 'i2', 'i5'], ['i2', 'i4'], ['i2', 'i3'],
['i1', 'i2', 'i4'], ['i1', 'i3'], ['i2', 'i3'],
['i1', 'i3'], ['i1', 'i2', 'i3', 'i5'], ['i1', 'i2', 'i3'],
['i1','i4'],['i2','i3']]
return data_set
def create_C1(data_set):
"""
創(chuàng)建只有一個(gè)項(xiàng)目的鎖定候選集合
"""
C1 = set()
for t in data_set:
for item in t:
item_set = frozenset([item])
C1.add(item_set)
return C1
def is_apriori(Ck_item, Lksub1):
"""
評(píng)價(jià)這個(gè)候選集合是不是滿足apriori算法
"""
for item in Ck_item:
sub_Ck = Ck_item - frozenset([item])
if sub_Ck not in Lksub1:
return False
return True
def create_Ck(Lksub1, k):
"""
創(chuàng)建候選集合
"""
Ck = set()
len_Lksub1 = len(Lksub1)
list_Lksub1 = list(Lksub1)
for i in range(len_Lksub1):
for j in range(1, len_Lksub1):
l1 = list(list_Lksub1[i])
l2 = list(list_Lksub1[j])
l1.sort()
l2.sort()
if l1[0:k-2] == l2[0:k-2]:
Ck_item = list_Lksub1[i] | list_Lksub1[j]
# pruning
if is_apriori(Ck_item, Lksub1):
Ck.add(Ck_item)
return Ck
def generate_Lk_by_Ck(data_set, Ck, min_support, support_data):
"""
用刪除策略從候選集合中選出頻繁集
"""
Lk = set()
item_count = {}
# 計(jì)數(shù) 每個(gè)項(xiàng)目出現(xiàn)次數(shù)
for t in data_set:
for item in Ck:
if item.issubset(t):
if item not in item_count:
item_count[item] = 1
else:
item_count[item] += 1
t_num = float(len(data_set))
for item in item_count:
if (item_count[item] / t_num) >= min_support:
Lk.add(item)
support_data[item] = item_count[item] / t_num
return Lk
def generate_L(data_set, k, min_support):
'''
產(chǎn)生頻繁集
:param data_set: 一個(gè)列表度事務(wù)滤愕。每個(gè)事務(wù)包含幾個(gè)項(xiàng)。
:param k: 所有頻繁項(xiàng)集的最大項(xiàng)數(shù)怜校。
:param min_support: 最小支持度
:return:L頻繁集列表间影,support_data <頻繁集:支持度>
'''
support_data = {}
C1 = create_C1(data_set)
L1 = generate_Lk_by_Ck(data_set, C1, min_support, support_data)
Lksub1 = L1.copy()
L = []
L.append(Lksub1)
for i in range(2, k+1):
Ci = create_Ck(Lksub1, i)
Li = generate_Lk_by_Ck(data_set, Ci, min_support, support_data)
Lksub1 = Li.copy()
L.append(Lksub1)
return L, support_data
def generate_big_rules(L, support_data, min_conf):
"""
產(chǎn)生關(guān)聯(lián)規(guī)則
Args:
L: 項(xiàng)目列表
support_data: 頻繁集的支持度
min_conf: 最小置信度
Returns:
big_rule_list: 三元組列表。
"""
big_rule_list = []
sub_set_list = []
for i in range(0, len(L)):
for freq_set in L[i]:
for sub_set in sub_set_list:
if sub_set.issubset(freq_set):
# 計(jì)算置信度
conf = support_data[freq_set] / support_data[freq_set - sub_set]
big_rule = (freq_set - sub_set, sub_set, conf)
if conf >= min_conf and big_rule not in big_rule_list:
# print freq_set-sub_set, " => ", sub_set, "conf: ", conf
big_rule_list.append(big_rule)
sub_set_list.append(freq_set)
return big_rule_list
if __name__ == "__main__":
data_set = load_data_set()
L, support_data = generate_L(data_set, k=3, min_support=0.2)#支持度
big_rules_list = generate_big_rules(L, support_data, min_conf=0.6)#置信度
for Lk in L:
if(Lk==set([])):
break
print("="*50)
print("frequent " + str(len(list(Lk)[0])) + "-itemsets\t\tsupport")
print("="*50)
for freq_set in Lk:
print(freq_set, support_data[freq_set])
print()
print("Rules")
for item in big_rules_list:
print(item[0], "=>", item[1], "conf: ", item[2])
結(jié)果
==================================================
frequent 1-itemsets support
==================================================
frozenset({'i2'}) 0.7272727272727273
frozenset({'i1'}) 0.6363636363636364
frozenset({'i3'}) 0.6363636363636364
frozenset({'i4'}) 0.2727272727272727
==================================================
frequent 2-itemsets support
==================================================
frozenset({'i1', 'i2'}) 0.36363636363636365
frozenset({'i3', 'i2'}) 0.45454545454545453
frozenset({'i1', 'i3'}) 0.36363636363636365
Rules
frozenset({'i3'}) => frozenset({'i2'}) conf: 0.7142857142857143
frozenset({'i2'}) => frozenset({'i3'}) conf: 0.625
線性回歸
流程
源代碼
from matplotlib import pyplot as plt
import numpy as np
from sklearn.preprocessing import minmax_scale
from sklearn.model_selection import KFold
import os
os.chdir('./2_LR/')
def generate_data(N):
import random
student=[]
delta=(10-0.4)/N
y=lambda x:int(((-5/8)*x**2)+(25/2)*x+305/8)
lt=[]
for i in range(N):
learn_time=0.+delta*i
lt.append(learn_time)
# print(learn_time)
score=y(learn_time)
# score=random.random(0,5)
# print(score)
student.append(score)
lt,student=np.array(lt)[:,np.newaxis],np.array(student)[:,np.newaxis]
return lt,student
def predict(theta, x):
'''
theta:(d,1)
x:(n,d)
'''
# x = x.reshape((len(x), -1))
# print(x.shape)
return x.dot(theta).reshape((-1, 1))
def train(X,Y):
def predict(theta, x):
'''
theta:(d,1)
x:(n,d)
'''
# x = x.reshape((len(x), -1))
# print(x.shape)
return x.dot(theta).reshape((-1, 1))
def const_error(h, y):
return h - y
# 均方誤差
# h:pred_y
def cost(h, y, con):
return (np.mean(con ** 2)) / 2
def grad(x, con):
return np.mean(con * x, axis=0, keepdims=True).transpose()
gamma=1e-3
epoch = 10000
epsilon = 1e-8
XY=np.concatenate((X,np.ones((len(X),1)),Y),axis=1)
np.random.seed(2)
np.random.shuffle(XY)
X, Y = XY[:, :2], XY[:, 2:]
kfold = KFold(5)
thetas = [] # 每一次的參數(shù)
for j, (train_index, test_index) in enumerate(kfold.split(X)):
# print(train_index)
train_x, test_x, train_y, test_y = X[train_index], X[test_index], Y[train_index], Y[test_index]
sc = [] # 訓(xùn)練集誤差
vc = [] # 交叉集預(yù)測(cè)誤差
theta = np.random.randn(2,1) # 隨機(jī)初始化
# print(theta)
this_time_con = 10000 # 損失
# if j!=3:
# continue
# print('?')
for i in range(epoch):
# print(i,train_x.shape)
h = predict(theta, train_x)
con = const_error(h, train_y)
g = grad(train_x, con)
pre_y = predict(theta, test_x)
if gamma >= 500:
gamma *= 0.95
theta = theta - gamma * g # 更新公式
# 存一下?lián)p失
valdation_cost = cost(pre_y, test_y, const_error(pre_y, test_y))
sc.append(cost(h, train_y, con))
vc.append(valdation_cost)
if abs(this_time_con - valdation_cost) <= epsilon:
break
else:
this_time_con = valdation_cost
plt.plot(np.arange(len(sc)), np.array(sc), label="training_cost")
plt.legend()
plt.plot(np.arange(len(vc)), np.array(vc), label="validating_cost")
plt.legend()
plt.xlabel("epoch")
plt.ylabel("cost")
plt.title(str(j) + 'time_cost.png')
# print(i,valdation_cost)
plt.text(i, valdation_cost, 'val_cost:\n(%d,%.3f)' % (i, valdation_cost), fontsize=8)
# plt.show()
plt.savefig(str(j) + 'time_cost.png')
plt.clf()
thetas.append(theta)
plt.scatter(X[:,0], Y, marker='x',c='red')
plt.grid()
x = np.arange(-0.2, 8, 0.1)[:,np.newaxis]
x=np.concatenate((x,np.ones((len(x),1))),axis=1)
y = predict(theta, x)
plt.plot(x[:,0], y,c='blue')
plt.title("%dtime_predict" % (j))
plt.savefig("%dtime_predict" % (j))
plt.clf()
return thetas[-1]
if __name__ == '__main__':
learn_time, score=generate_data(100)
theta=train(learn_time,score)
plt.plot(learn_time, score)
x=np.arange(0,10,0.1)[:,np.newaxis]
x = np.concatenate((x, np.ones((len(x), 1))), axis=1)
plt.plot(x[:,0],predict(theta,x))
# plt.show()
plt.savefig('curve.tiff')
結(jié)果
UCI分類KNN
流程
step.1---初始化距離為最大值
step.2---計(jì)算未知樣本和每個(gè)訓(xùn)練樣本的距離dist
step.3---得到目前K個(gè)最臨近樣本中的最大距離maxdist
step.4---如果dist小于maxdist茄茁,則將該訓(xùn)練樣本作為K-最近鄰樣本
step.5---重復(fù)步驟2魂贬、3、4裙顽,直到未知樣本和所有訓(xùn)練樣本的距離都算完
step.6---統(tǒng)計(jì)K-最近鄰樣本中每個(gè)類標(biāo)號(hào)出現(xiàn)的次數(shù)
step.7---選擇出現(xiàn)頻率最大的類標(biāo)號(hào)作為未知樣本的類標(biāo)號(hào)
源代碼
# coding: utf-8
# 作者:
# ## 顯示圖片
# In[7]: get_ipython().magic('matplotlib inline')
# ## 導(dǎo)入庫
# In[17]:
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
# ## 讀取數(shù)據(jù)
# In[2]:
iris = load_iris()
X = iris.data
y = iris.target
# ## 每次可視化兩維數(shù)據(jù)
# In[5]:
X_sepal = X[:, :2]
plt.scatter(X_sepal[:, 0], X_sepal[:, 1], c=y, cmap=plt.cm.gnuplot)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
# In[6]:
X_petal = X[:, 2:4]
plt.scatter(X_petal[:, 0], X_petal[:, 1], c=y, cmap=plt.cm.gnuplot) plt.xlabel('Petal length')
plt.ylabel('Petal width') # ## 初始化分類器
# ### 最近鄰
# In[18]:
knn1 = KNeighborsClassifier(n_neighbors=1) knn1.fit(X, y)
y_pred = knn1.predict(X) print((metrics.accuracy_score(y, y_pred)))
# 為什么當(dāng) K=1 的時(shí)候 KNN 算法的訓(xùn)練準(zhǔn)確度為 1付燥,KNN 會(huì)查找在訓(xùn)練數(shù)據(jù)集中的最近的觀測(cè),訓(xùn)練 得到的模型會(huì)在相同的數(shù)據(jù)集中找到相同的觀測(cè)愈犹。換句話說键科,KNN 算法已經(jīng)記住了訓(xùn)練數(shù)據(jù)集,因?yàn)槲?們使用同樣的數(shù)據(jù)作為測(cè)試的數(shù)據(jù)。
### 將X和y分割成訓(xùn)練和測(cè)試集
# In[15]:
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=4) # 我們能找到一個(gè)比較好的 K 值嗎?
# In[19]:
# 測(cè)試從 K=1 到 K=25勋颖,記錄測(cè)試準(zhǔn)確率
k_range = list(range(1, 26))
test_accuracy = [] for k in k_range:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test) test_accuracy.append(metrics.accuracy_score(y_test, y_pred))
# In[20]:
plt.plot(k_range, test_accuracy)
plt.xlabel("Value of K for KNN") plt.ylabel("Testing Accuracy") # 所以我們選擇 k=9 比較好
# ## 考慮交叉驗(yàn)證
# In[21]:
from sklearn.cross_validation
import KFold
import numpy as np
def cv_estimate(k, kfold=5):
cv = KFold(n = X.shape[0], n_folds=kfold) clf = KNeighborsClassifier(n_neighbors=k) score = 0
for train, test in cv:
clf.fit(X[train], y[train])
score += clf.score(X[test], y[test]) #print clf.score(X[test], y[test])
score /= kfold return score
# In[22]:
k_range = list(range(1, 26)) test_accuracy = []
for k in k_range:
test_accuracy.append(cv_estimate(k, 5)) # In[23]:
plt.plot(k_range, test_accuracy) plt.xlabel("Value of K for KNN")
plt.ylabel("Average Accuracy of Kfold CV")
# 所以我們可以選擇 10 折以內(nèi)梆掸,都會(huì)取得不錯(cuò)的效果。
結(jié)果
決策樹
流程
- 創(chuàng)建數(shù)據(jù)集
- 計(jì)算數(shù)據(jù)集的信息熵
- 遍歷所有特征牙言,選擇信息熵最小的特征酸钦,即為最好的分類特征
- 根據(jù)上一步得到的分類特征分割數(shù)據(jù)集,并將該特征從列表中移除
- 執(zhí)行遞歸函數(shù)咱枉,返回第三步卑硫,不斷分割數(shù)據(jù)集,直到分類結(jié)束
- 使用決策樹執(zhí)行分類蚕断,返回分類結(jié)果
源代碼
from math import log
import operator
def calcShannonEnt(dataSet): # 計(jì)算數(shù)據(jù)的熵(entropy)
numEntries=len(dataSet) # 數(shù)據(jù)條數(shù)
labelCounts={}
for featVec in dataSet:
currentLabel=featVec[-1] # 每行數(shù)據(jù)的最后一個(gè)字(類別)
if currentLabel not in labelCounts.keys():
labelCounts[currentLabel]=0
labelCounts[currentLabel]+=1 # 統(tǒng)計(jì)有多少個(gè)類以及每個(gè)類的數(shù)量
shannonEnt=0
for key in labelCounts:
prob=float(labelCounts[key])/numEntries # 計(jì)算單個(gè)類的熵值
shannonEnt-=prob*log(prob,2) # 累加每個(gè)類的熵值
return shannonEnt
def createDataSet1(): # 創(chuàng)造示例數(shù)據(jù)
dataSet = [['長(zhǎng)', '粗', '男'],
['短', '粗', '男'],
['短', '粗', '男'],
['長(zhǎng)', '細(xì)', '女'],
['短', '細(xì)', '女'],
['短', '粗', '女'],
['長(zhǎng)', '粗', '女'],
['長(zhǎng)', '粗', '女']]
labels = ['頭發(fā)','聲音'] #兩個(gè)特征
return dataSet,labels
def splitDataSet(dataSet,axis,value): # 按某個(gè)特征分類后的數(shù)據(jù)
retDataSet=[]
for featVec in dataSet:
if featVec[axis]==value:
reducedFeatVec =featVec[:axis]
reducedFeatVec.extend(featVec[axis+1:])
retDataSet.append(reducedFeatVec)
return retDataSet
def chooseBestFeatureToSplit(dataSet): # 選擇最優(yōu)的分類特征
numFeatures = len(dataSet[0])-1
baseEntropy = calcShannonEnt(dataSet) # 原始的熵
bestInfoGain = 0
bestFeature = -1
for i in range(numFeatures):
featList = [example[i] for example in dataSet]
uniqueVals = set(featList)
newEntropy = 0
for value in uniqueVals:
subDataSet = splitDataSet(dataSet,i,value)
prob =len(subDataSet)/float(len(dataSet))
newEntropy +=prob*calcShannonEnt(subDataSet) # 按特征分類后的熵
infoGain = baseEntropy - newEntropy # 原始熵與按特征分類后的熵的差值
if (infoGain>bestInfoGain): # 若按某特征劃分后欢伏,熵值減少的最大,則次特征為最優(yōu)分類特征
bestInfoGain=infoGain
bestFeature = i
return bestFeature
def majorityCnt(classList): #按分類后類別數(shù)量排序亿乳,比如:最后分類為2男1女硝拧,則判定為男;
classCount={}
for vote in classList:
if vote not in classCount.keys():
classCount[vote]=0
classCount[vote]+=1
sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
return sortedClassCount[0][0]
def createTree(dataSet,labels):
classList=[example[-1] for example in dataSet] # 類別:男或女
if classList.count(classList[0])==len(classList):
return classList[0]
if len(dataSet[0])==1:
return majorityCnt(classList)
bestFeat=chooseBestFeatureToSplit(dataSet) #選擇最優(yōu)特征
bestFeatLabel=labels[bestFeat]
myTree={bestFeatLabel:{}} #分類結(jié)果以字典形式保存
del(labels[bestFeat])
featValues=[example[bestFeat] for example in dataSet]
uniqueVals=set(featValues)
for value in uniqueVals:
subLabels=labels[:]
myTree[bestFeatLabel][value]=createTree(splitDataSet\
(dataSet,bestFeat,value),subLabels)
return myTree
if __name__=='__main__':
dataSet, labels=createDataSet1() # 創(chuàng)造示列數(shù)據(jù)
print(createTree(dataSet, labels)) # 輸出決策樹模型結(jié)果
結(jié)果
{'聲音': {'粗': {'頭發(fā)': {'長(zhǎng)': '女', '短': '男'}}, '細(xì)': '女'}
Naive_Bayes
流程
源代碼
import pandas as pd
import numpy as np
import math
from functools import reduce
# from scipy import stats
'''
天氣因素有溫度葛假、濕度和刮風(fēng)等障陶,通過給出數(shù)據(jù),使用貝葉斯算法學(xué)習(xí)分類聊训,輸出一個(gè)人是運(yùn)動(dòng)和不運(yùn)動(dòng)與天氣之間的概率關(guān)系抱究。
# 求先驗(yàn)概率p c,這里的c有兩類
# 求聯(lián)合概率evidence p(f1,f2...) 大家都一樣無所謂
# 似然概率p(f1,f2...|c)=p(f1|c)*p(f2|c)...
## 對(duì)連續(xù)樣本假設(shè)為高斯分布
'''
class Naive_Bayes():
def __init__(self):
self.data=pd.read_csv('3_data.csv')
self.P_category()
self.likelihood()
def category_extract(self,values):
# prior
labels = set(values)
self.__labels_Lenth=len(labels)
for s in labels:
indices = np.where(values == s)[0]
yield s,indices
def P_category(self):
self.prior = dict()
L = len(self.data['運(yùn)動(dòng)'].values)
for s,indices in self.category_extract(self.data['運(yùn)動(dòng)'].values):
self.prior[s] = len(indices) / L
# {'不適合': 0.35714285714285715, '適合': 0.6428571428571429}
def likelihood(self):
# 提取每一類索引
# 有兩種情況带斑,object鼓寺、int
self.Pfeature = dict()#似然概率
for info in self.data._info_axis.values[:-1]:
self.Pfeature[info]=dict()
for c,indices in self.category_extract(self.data['運(yùn)動(dòng)'].values):
temp_values=self.data[info].values[indices]#在數(shù)據(jù)中 <天氣> 這一類 <不合適-運(yùn)動(dòng)> 的值
if (self.data[info].dtype==np.int64):
# 整型-假設(shè)高斯分布
u=np.mean(temp_values)
theta=np.var(temp_values)
self.Pfeature[info][c]=[u,theta] # <溫度> <不合適運(yùn)動(dòng)c> 高斯函數(shù)的參數(shù)[u,theta]
pass
elif (self.data[info].dtype==np.object):
# 文字可以直接抽取-離散
self.Pfeature[info][c] = dict()
L = len(temp_values)+self.__labels_Lenth
for s, inds in self.category_extract(temp_values):
self.Pfeature[info][c][s] = (len(inds)+1)/ L # <天氣info> <不合適c>(運(yùn)動(dòng)) <多云s> 的似然概率
pass
else:
print(self.data[info].dtype, 'gg')
exit()
pass
def __gauss(self,x,u,theta):
# x=u
# theta=1
# print(x,u,theta,1 / math.sqrt(2 * math.pi * theta ** 2) * math.exp(-(x - u) ** 2 / (2 * theta ** 2)))
# a=1 / math.sqrt(2 * math.pi * theta ** 2) * math.exp(-(x - u) ** 2 / (2 * theta ** 2))
# b=stats.norm.pdf(x,u,theta)
# print(a,b)
x=np.arange(x-0.5,x+0.5,0.1)
return np.sum(1 / math.sqrt(2 * math.pi * theta ** 2) * np.exp(-(x - u) ** 2 / (2 * theta ** 2)))
def predict(self,n=-1):
x,y=list(self.data[info].values[n] for info in self.data._info_axis.values[:-1]),self.data[self.data._info_axis.values[-1]].values[-1]
max_p=[0,0,0]#第一個(gè)存率勋磕,第二個(gè)存類別妈候,第三個(gè)求和
for c,d in self.prior.items():
# print('?')
temp_possibility=[]#連乘用的分子
temp_possibility.append(d)
for i,info in enumerate(self.data._info_axis.values[:]):
# print(info, c, x[i])
if (self.data[info].dtype==np.int64):
temp_possibility.append(self.__gauss(x[i],self.Pfeature[info][c][0],self.Pfeature[info][c][1]))
elif (self.data[info].dtype==np.object):
try:
temp_possibility.append(self.Pfeature[info][c][x[i]])
except KeyError:
temp_possibility.append(0.00001)
# print(c,temp_possibility)
# temp=abs(reduce(lambda a,b:a+b,map(math.log,temp_possibility)))
temp=reduce(lambda a,b:a*b,temp_possibility)
max_p[2]+=temp
if temp>max_p[0]:
max_p[:2]=temp,c
# print(max_p)
# print(temp)
# print(self.Pfeature)
'''
{'天氣':
{'不適合': {'有雨': 0.42857142857142855, '晴': 0.5714285714285714},
'適合': {'有雨': 0.36363636363636365, '多云': 0.45454545454545453, '晴': 0.2727272727272727}},
'溫度':
{'不適合': [74.599999999999994, 49.839999999999996],
'適合': [73.0, 33.777777777777779]},
'濕度':
{'不適合': [84.0, 74.0],
'適合': [78.222222222222229, 86.839506172839506]},
'風(fēng)況':
{'不適合': {'有': 0.5714285714285714, '無': 0.42857142857142855},
'適合': {'有': 0.36363636363636365, '無': 0.6363636363636364}}}
'''
print(x,max_p[1],max_p[0]/max_p[2])
return max_p[1]
pass
if __name__ == '__main__':
myNB=Naive_Bayes()
# myNB.predict(-4)
y=list(map(myNB.predict,list(range(14))))
# print(y)
# print(np.where(y==myNB.data['運(yùn)動(dòng)'].values)[0])
print(len(np.where(y==myNB.data['運(yùn)動(dòng)'].values)[0])/len(y))
結(jié)果
['晴', 85, 85, '無'] 適合 0.605515228032
['晴', 80, 90, '有'] 不適合 0.597763996605
['多云', 83, 78, '無'] 適合 0.999993280941
['有雨', 70, 96, '無'] 適合 0.738902944045
['有雨', 68, 80, '無'] 適合 0.740164275592
['有雨', 65, 70, '有'] 適合 0.550906281645
['多云', 64, 65, '有'] 適合 0.99998485259
['晴', 72, 95, '無'] 適合 0.614379338515
['晴', 69, 70, '無'] 適合 0.618965026052
['有雨', 75, 80, '無'] 適合 0.74028592022
['晴', 75, 70, '有'] 不適合 0.589771190225
['多云', 72, 90, '有'] 適合 0.999984648989
['多云', 81, 75, '無'] 適合 0.999993369057
['有雨', 71, 80, '有'] 適合 0.550487083686
0.6428571428571429
K-Means圖像分割
流程
源代碼
# 基于K-means算法的圖像分割
import cv2
import numpy as np
# 讀取圖像
start=0
img0=cv2.imread('./kmean/'+str(start)+'.tiff')
zz=np.load('./kmean/zz.npy')
C = 3 # 聚類數(shù)量
def kmean(img0):
img = np.copy(img0)
img=np.array(img,dtype=np.float32)
shape = img.shape[:2]
# zz=np.array([[60,80,100],[140,160,180],[220,240,260]])#聚類中心
zz=np.array([[ 107.42873407,165.75729793,149.6303364],[51.03919928,54.9071066,45.18418758],[304.08849809,230.56291292,161.19507833]])#聚類中心
# zz=np.tile(z,[1,3])
# zz=np.random.randint(0,255,(3,3))
# print(zz)
L=shape[0]*shape[1]#數(shù)據(jù)長(zhǎng)度
kinds=np.empty((L,),dtype=np.uint8)#類別數(shù)組
data=np.reshape(img,(L,3)) # reshape
# z_last=zz.copy()#中心
for time in range(start,5+start):
print(time)
# 逐點(diǎn)
for i in range(L):
mdzz = np.linalg.norm(data[i] - zz,axis=1)
# print(mdzz)
min_mdzz_index = np.argmin(mdzz)
# print(min_mdzz_index)
kinds[i] = min_mdzz_index
# print(kinds)
# 更新聚類中心
for k in range(C):
tmp_where = np.where(kinds == k)[0]
# print('tmp_where',tmp_where)
# print(data[tmp_where])
zz[k] = np.mean(data[tmp_where],axis=0)
print('zz\n',zz)
# 賦值
for k in range(C):
tmp_where = np.where(kinds == k)[0]
data[tmp_where] = zz[k]
# re-reshpae
temp = np.reshape(data, (shape[0],shape[1],3))
temp=np.uint8(temp)
print(temp.shape)
print('保存%d.tiff'%(time+1))
# temp=cv2.cvtColor(temp,cv2.COLOR_GRAY2BGR)
cv2.imwrite('./kmean/' + str(time+1) + '.tiff', temp)
np.save('./kmean/zz.npy',zz)
return kinds
if __name__ == '__main__':
kinds=kmean(img0)
gray=[0,178,255]
img_gray=cv2.cvtColor(img0,cv2.COLOR_BGR2GRAY)
shape=img_gray.shape
img_gray=np.reshape(img_gray,(img_gray.size,))
for k in range(C):
tmp_where = np.where(kinds == k)[0]
img_gray[tmp_where]=gray[k]
img_gray=np.reshape(img_gray,shape)
cv2.imwrite('./kmean/gray.tiff', img_gray)