算法流程
算法的原理可參考周志華老師的《機(jī)器學(xué)習(xí)》钳枕,概念很簡(jiǎn)單,也總結(jié)了算法的流程藐俺。
下面就看一下算法的流程
完整代碼
代碼比較復(fù)雜炊甲,但是完全對(duì)照算法的思路,就可看懂我的代碼欲芹,一步一步按照思路理解算法卿啡。
import numpy as np
from numpy import *
import math
import matplotlib.pyplot as plt
def load_data_set(file_name):
D = []
fr = open(file_name)
for line in fr.readlines():
line_data = line.strip().split()
D.append([float(line_data[0]), float(line_data[1])])
return D
def get_dist(x1, x2):
n = len(x1)
dist = 0
for i in range(n):
dist += (abs(float(x1[i] - x2[i]))) ** 2
return sqrt(dist)
def get_partision(x1, X, sigma):
m = len(X)
N = []
for i in range(m):
dist = get_dist(x1, X[i])
print(X[i])
if(dist <= sigma):
N.append(X[i])
return N
def list_compare(x1_list, x2_list):
n1 = len(x1_list)
n2 = len(x2_list)
equal = True
if(n1 != n2):
return False
for i in range(n1):
if(x1_list[i] != x2_list[i]):
equal = False
return equal
def get_intersection(x1, x2):
n = len(x2)
m = len(x1)
W = []
for i in range(m):
for j in range(n):
if(list_compare(x1[i], x2[j]) == True):
W.append(x1[i])
return W
def delete_aggregate(X, x1):
n = len(x1)
m = len(X)
del_list = []
for i in range(n):
for j in range(m):
if(list_compare(x1[i], X[j]) == True):
del_list.append(X[j])
for j in range(len(del_list)):
print("del_list[", j, "] = ", del_list[j])
X.remove([del_list[j][0], del_list[j][1]])
return X
def dbscan(X, sigma, min_pts):
m = len(X)
W = []
C = []
for i in range(m):
N = get_partision(X[i], X, sigma)
if(len(N) >= min_pts):
W.append(X[i])
k = 0
gama = copy(X).tolist()
while(len(W) != 0):
old_gama = copy(gama).tolist()
index = random.randint(0, len(W))
Q = []
Q.append(W[index])
del W[index]
while(len(Q) != 0):
q = copy(Q[0]).tolist()
del Q[0]
N = get_partision(q, X, sigma)
if(len(N) >= min_pts):
deta = get_intersection(N, gama)
for i in range(len(deta)):
Q.append(deta[i])
gama = delete_aggregate(gama, deta)
k += 1
Ck = delete_aggregate(old_gama, gama)
C.append(Ck)
W = delete_aggregate(W, Ck)
return C
def show_experiment_plot(D):
X1 = mat(array(D[0]))
X2 = mat(array(D[1]))
X3 = mat(array(D[2]))
X4 = mat(array(D[3]))
plt.plot(X1[:, 0], X1[:, 1], "or")
plt.plot(X2[:, 0], X2[:, 1], "ob")
plt.plot(X3[:, 0], X3[:, 1], "og")
plt.plot(X4[:, 0], X4[:, 1], "oy")
plt.xlabel("X1")
plt.ylabel("X2")
plt.show()
if __name__ == "__main__":
D = load_data_set("data_set.txt")
C = dbscan(D, 0.11, 5)
print(D)
show_experiment_plot(C)
輸入數(shù)據(jù)
本文的輸入數(shù)據(jù)和書上的輸入數(shù)據(jù)是一樣的
0.697 0.460
0.774 0.376
0.634 0.264
0.608 0.318
0.556 0.215
0.403 0.237
0.481 0.149
0.437 0.211
0.666 0.091
0.243 0.267
0.245 0.057
0.343 0.099
0.639 0.161
0.657 0.198
0.360 0.370
0.593 0.042
0.719 0.103
0.359 0.188
0.339 0.241
0.282 0.257
0.748 0.232
0.714 0.346
0.483 0.312
0.478 0.437
0.525 0.369
0.751 0.489
0.532 0.472
0.473 0.376
0.725 0.445
0.446 0.459
書上的輸入數(shù)據(jù)
實(shí)驗(yàn)結(jié)果
本文的實(shí)驗(yàn)結(jié)果
結(jié)論
兩者的實(shí)驗(yàn)結(jié)果對(duì)比,完全一模一樣菱父。本文結(jié)果中沒(méi)有畫出的點(diǎn)則是噪聲颈娜,畫出的則是最終的聚類結(jié)果。