決策樹(shù)
1躬贡、畫決策樹(shù)
from sklearn.datasets import load_iris #使用iris數(shù)據(jù)集
from sklearn import tree
#iris.data #iris的分類依據(jù)
#iris.target #iris的分類結(jié)果
clf = tree.DecisionTreeClassifier(max_depth=2) #設(shè)置最大深度為2層
clf.fit(iris.data,iris.target)
clf.predict(iris.data)
# 將決策樹(shù)輸出到圖片
from sklearn.externals.six import StringIO
import pydotplus
dot_data = StringIO()
tree.export_graphviz(clf, out_file=dot_data)
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
graph.write_jpg('tree.jpg') # 生成tree.jpg
2蚪腐、畫決策邊界
只能使用2個(gè)變量
第一步,建立模型
from itertools import product
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn import tree
iris = load_iris()
X = iris.data[:,[2,3]] #選取iris.data中第三、第四個(gè)變量
y = iris.target
clf = tree.DecisionTreeClassifier(max_depth = 2)
clf.fit(X,y)
x_min ,x_max = X[:,0].min()-1,X[:,0].max()+1 #邊界圖橫坐標(biāo)
y_min ,y_max = X[:,1].min()-1,X[:,1].max()+1 #邊界圖縱坐標(biāo)
xx,yy = np.meshgrid(np.arange(x_min,x_max,0.1),np.arange(y_min,y_max,0.1))
Z = clf.predict(np.c_[xx.ravel(),yy.ravel()])
Z = Z.reshape(xx.shape)
plt.plot()
plt.contourf(xx,yy,Z,alpha=0.4,cmap=plt.cm.rainbow) #邊界圖背景
plt.scatter(X[:,0],X[:,1],c=y,alpha=1,cmap=plt.cm.RdYlBu)
plt.title('Decision Tree')
plt.xlabel('Petal.Length')
plt.ylabel('Petal.Width')
plt.show()
邏輯回歸分析
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
iris = load_iris()
clf = LogisticRegression()
clf.fit(iris.data,iris.target)
clf.predict(iris.data)
邏輯回歸畫決策邊界圖
x_min ,x_max = X[:,0].min()-1,X[:,0].max()+1
y_min ,y_max = X[:,1].min()-1,X[:,1].max()+1
xx,yy = np.meshgrid(np.arange(x_min,x_max,0.1),np.arange(y_min,y_max,0.1))
Z = clf.predict(np.c_[xx.ravel(),yy.ravel()])
Z = Z.reshape(xx.shape)
plt.plot()
plt.contourf(xx,yy,Z,alpha=0.4,cmap=plt.cm.rainbow) #alpha 透明度俭嘁,cmap 配色
plt.scatter(X[:,0],X[:,1],c=y,alpha=1,cmap=plt.cm.RdYlBu)
plt.title('Logistic Regression')
plt.xlabel('Petal.Length')
plt.ylabel('Petal.Width')
plt.show()
SVM
from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
iris = load_iris()
clf = SVC(C=100,kernel='linear') #kernel 可選,參考函數(shù)說(shuō)明服猪;C 正則項(xiàng)供填,C數(shù)值小,margin大罢猪,允許數(shù)據(jù)跨界
clf.fit(iris.data,iris.target)
clf.predict(iris.data)
SVM 與邏輯回歸對(duì)比
from itertools import product
import numpy as np
import matplotlib.pyplot as plt
def plot_estimator(estimator,X,y):
x_min ,x_max = X[:,0].min()-1,X[:,0].max()+1
y_min ,y_max = X[:,1].min()-1,X[:,1].max()+1
xx,yy = np.meshgrid(np.arange(x_min,x_max,0.1),np.arange(y_min,y_max,0.1))
Z = estimator.predict(np.c_[xx.ravel(),yy.ravel()])
Z = Z.reshape(xx.shape)
plt.plot()
plt.contourf(xx,yy,Z,alpha=0.4,cmap=plt.cm.rainbow)
plt.scatter(X[:,0],X[:,1],c=y,alpha=1,cmap=plt.cm.RdYlBu)
plt.xlabel('Petal.Length')
plt.ylabel('Petal.Width')
plt.show()
X = iris.data[0:100,[2,3]]
y = iris.target[0:100]
clf1 = SVC(kernel='linear')
clf1.fit(X,y)
clf2 = LogisticRegression()
clf2.fit(X,y)
plot_estimator(clf1,X,y)
plot_estimator(clf2,X,y)
SVM不同kernel對(duì)比
from itertools import product
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.svm import SVC
iris = load_iris()
X =iris.data[:,[2,3]]
y = iris.target
clf1 = SVC(kernel = 'rbf')
clf1.fit(X,y)
clf2 = SVC(kernel = 'poly')
clf2.fit(X,y)
clf3 = SVC(kernel = 'linear')
clf3.fit(X,y)
#rbf 和poly 非線性kernel近她,耗時(shí)久
x_min ,x_max = X[:,0].min()-1,X[:,0].max()+1
y_min ,y_max = X[:,1].min()-1,X[:,1].max()+1
xx,yy = np.meshgrid(np.arange(x_min,x_max,0.1),np.arange(y_min,y_max,0.1))
f,axarr = plt.subplots(1,3,sharex='col',sharey='row',figsize=(20,5))
for idx,clf,title in zip([0,1,2],[clf1,clf2,clf3],['rbf','poly','linear']):
Z = clf.predict(np.c_[xx.ravel(),yy.ravel()])
Z = Z.reshape(xx.shape)
axarr[idx].contourf(xx,yy,Z,alpha=0.4,cmap=plt.cm.RdYlBu)
axarr[idx].scatter(X[:,0],X[:,1],c=y,cmap=plt.cm.brg)
axarr[idx].set_title(title)
類神經(jīng)網(wǎng)絡(luò)
import itertools
from sklearn.datasets import load_digits
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
import numpy as np
digits = load_digits() #使用自帶dataset,辨別手寫數(shù)字
fig = plt.figure(figsize=(6,6))
fig.subplots_adjust(left=0,right=1,bottom=0,top=1,hspace=0.05,wspace=0.05)
for i in range(36):
ax = fig.add_subplot(6,6,i+1,xticks=[],yticks=[])
ax.imshow(digits.images[i],cmap=plt.cm.binary,interpolation='nearest')
ax.text(0,7,str(digits.target[i]),color='red',fontsize=20)
scaler = StandardScaler()
scaler.fit(digits.data)
X_scaled = scaler.transform(digits.data)
# 對(duì)數(shù)據(jù)進(jìn)行標(biāo)準(zhǔn)話
mlp = MLPClassifier(hidden_layer_sizes =(30,30,30),activation='logistic',max_iter= 100)
# 查看函數(shù)幫助
mlp.fit(X_scaled,digits.target)
predicted = mlp.predict(X_scaled)
fig = plt.figure(figsize=(6,6))
fig.subplots_adjust(left=0,right=1,bottom=0,top=1,hspace=0.05,wspace=0.05)
for i in range(36):
ax = fig.add_subplot(6,6,i+1,xticks=[],yticks=[])
ax.imshow(digits.images[i],cmap=plt.cm.binary,interpolation='nearest')
ax.text(0,7,str('{}-{}'.format(digits.target[i],predicted[i])),color='red',fontsize=20)
# 查看準(zhǔn)確率
res = [i==j for i,j in zip(digits.target,predicted)]
print(sum(res)/len(digits.target)) # max_iter = 100時(shí)準(zhǔn)確率94.5%膳帕,max_iter=1000時(shí)準(zhǔn)確率達(dá)到100%
隨機(jī)森林
def plot_estimator(estimator,X,y,title):
x_min ,x_max = X[:,0].min()-1,X[:,0].max()+1
y_min ,y_max = X[:,1].min()-1,X[:,1].max()+1
xx,yy = np.meshgrid(np.arange(x_min,x_max,0.1),np.arange(y_min,y_max,0.1))
Z = estimator.predict(np.c_[xx.ravel(),yy.ravel()])
Z = Z.reshape(xx.shape)
plt.plot()
plt.contourf(xx,yy,Z,alpha=0.4,cmap=plt.cm.rainbow)
plt.scatter(X[:,0],X[:,1],c=y,alpha=1,cmap=plt.cm.RdYlBu)
plt.title(title)
plt.xlabel('Sepal.Length')
plt.ylabel('Sepal.Width')
plt.show()
from sklearn.ensemble import RandomForestClassifier
iris = load_iris()
X = iris.data[:,[0,1]]
y = iris.target
clf = RandomForestClassifier(n_estimators=100,criterion='gini',random_state=None)
# n_estimators 樹(shù)的數(shù)量粘捎,n越大,分類越準(zhǔn)確
clf.fit(X,y)
plot_estimator(clf,X,y,'RandomForestClassifier') # 畫決策邊界圖
各種分類方法對(duì)比
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
x = iris.data[:,[0,1]]
y = iris.target
clf1 = SVC(kernel='rbf')
clf1.fit(x,y)
clf2 = DecisionTreeClassifier()
clf2.fit(x,y)
clf3 = RandomForestClassifier(n_estimators=10,criterion='entropy')
clf3.fit(x,y)
clf4 = LogisticRegression()
clf4.fit(x,y)
plot_estimator(clf1,x,y,'rbf')
plot_estimator(clf2,x,y,'DecisionTree')
plot_estimator(clf3,x,y,'RandomForest')
plot_estimator(clf4,x,y,'LogisticRegression')