文章作者:Tyan
博客:noahsnail.com ?|? CSDN ?|? 簡書
本文主要介紹scikit-learn中的交叉驗證。通過交叉驗證來選取KNN算法中的K值央拖。
- Demo 1
import numpy as np
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cross_validation import cross_val_score
# 加載iris數(shù)據(jù)集
iris = datasets.load_iris()
# 讀取特征
X = iris.data
# 讀取分類標(biāo)簽
y = iris.target
# 定義分類器
knn = KNeighborsClassifier(n_neighbors = 5)
# 進(jìn)行交叉驗證數(shù)據(jù)評估, 數(shù)據(jù)分為5部分, 每次用一部分作為測試集
scores = cross_val_score(knn, X, y, cv = 5, scoring = 'accuracy')
# 輸出5次交叉驗證的準(zhǔn)確率
print scores
- 結(jié)果
[ 0.96666667 1. 0.93333333 0.96666667 1. ]
- Demo 2
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cross_validation import cross_val_score
# 確定knn中k的取值
# 加載iris數(shù)據(jù)集
iris = datasets.load_iris()
# 讀取特征
X = iris.data
# 讀取分類標(biāo)簽
y = iris.target
# 定義knn中k的取值, 0-10
k_range = range(1, 30)
# 保存k對應(yīng)的準(zhǔn)確率
k_scores = []
# 計算每個k取值對應(yīng)的準(zhǔn)確率
for k in k_range:
# 獲得knn分類器
knn = KNeighborsClassifier(n_neighbors = k)
# 對數(shù)據(jù)進(jìn)行交叉驗證求準(zhǔn)確率
scores = cross_val_score(knn, X, y, cv = 10, scoring = 'accuracy')
# 保存交叉驗證結(jié)果的準(zhǔn)確率均值
k_scores.append(scores.mean())
# 繪制k取不同值時的準(zhǔn)確率變化圖像
plt.plot(k_range, k_scores)
plt.xlabel('K Value in KNN')
plt.ylabel('Cross-Validation Mean Accuracy')
plt.show()
- 結(jié)果