本文之編寫程序涉及到API介紹绵脯,程序的完整實(shí)現(xiàn),具體算法原理請(qǐng)查看之前所寫的KNN算法介紹
一、基礎(chǔ)準(zhǔn)備
1嫌套、python 基礎(chǔ)
2、numpy 基礎(chǔ)
np.mean
求平均值
print(np.mean([1,2,3,4]))
# >> 2.5
3圾旨、scikit 基礎(chǔ)
fit
(X, y)
符合模型使用X作為訓(xùn)練數(shù)據(jù)和y值作為目標(biāo)
get_params
([deep])
得到的參數(shù)估計(jì)量踱讨。
.
kneighbors
([X, n_neighbors, return_distance])
發(fā)現(xiàn)的K-neighbors點(diǎn)。
kneighbors_graph
([X, n_neighbors, mode])
計(jì)算(加權(quán))圖k-Neighbors X點(diǎn)
predict
(X)
預(yù)測(cè)類標(biāo)簽所提供的數(shù)據(jù)
predict_proba
(X)
回歸測(cè)試數(shù)據(jù)的概率估計(jì)X砍的。
score
(X, y[, sample_weight])
返回意味著在給定的精度測(cè)試數(shù)據(jù)和標(biāo)簽痹筛。
set_params
(**params)
設(shè)置的參數(shù)估計(jì)量。
.
二廓鞠、完整程序
# -*- coding: utf-8 -*-
import numpy as np
from sklearn import neighbors, preprocessing
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import classification_report
from sklearn.cross_validation import train_test_split
def file2Mat(testFileName, parammterNumber):
fr = open(testFileName)
lines = fr.readlines()
lineNums = len(lines)
resultMat = np.zeros((lineNums, parammterNumber))
classLabelVector = []
for i in range(lineNums):
line = lines[i].strip()
itemMat = line.split('\t')
resultMat[i, :] = itemMat[0:parammterNumber]
classLabelVector.append(itemMat[-1])
fr.close()
return resultMat, classLabelVector;
# 為了防止某個(gè)屬性對(duì)結(jié)果產(chǎn)生很大的影響帚稠,所以有了這個(gè)優(yōu)化,比如:10000,4.5,6.8 10000就對(duì)結(jié)果基本起了決定作用
def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normMat = np.zeros(np.shape(dataSet))
size = normMat.shape[0]
normMat = dataSet - np.tile(minVals, (size, 1))
normMat = normMat / np.tile(ranges, (size, 1))
return normMat, minVals, ranges
if __name__=='__main__':
trainigSetFileName = 'data\\datingTrainingSet.txt'
testFileName = 'data\\datingTestSet.txt'
# 讀取訓(xùn)練數(shù)據(jù)
trianingMat, classLabel = file2Mat(trainigSetFileName, 3)
# 對(duì)數(shù)據(jù)進(jìn)行歸一化的處理
autoNormTrianingMat, minVals, ranges = autoNorm(trianingMat)
# 讀取測(cè)試數(shù)據(jù)
testMat, testLabel = file2Mat(testFileName, 3)
autoNormTestMat = []
for i in range(len(testLabel)):
autoNormTestMat.append( (testMat[i] - minVals) / ranges)
# testMat = preprocessing.normalize(testMat)
print(autoNormTestMat)
# ''''' 訓(xùn)練KNN分類器 '''
clf = neighbors.KNeighborsClassifier(n_neighbors=5, algorithm='kd_tree')
clf.fit(autoNormTrianingMat, classLabel)
answer = clf.predict(autoNormTestMat)
print(np.sum(answer != testLabel))
# 計(jì)算分?jǐn)?shù)
print(clf.score(autoNormTestMat, testLabel))
print(np.mean(answer == testLabel))
print(clf.predict([0.44832535, 0.39805139, 0.56233353]))
print(clf.predict_proba([0.44832535, 0.39805139, 0.56233353]))
# '''''準(zhǔn)確率與召回率'''
# precision, recall, thresholds = precision_recall_curve(testLabel, clf.predict(testMat))