機(jī)器學(xué)習(xí)入門-分類問(wèn)題

決策樹(shù)

1躬贡、畫決策樹(shù)

from sklearn.datasets import load_iris  #使用iris數(shù)據(jù)集
from sklearn import tree 
#iris.data #iris的分類依據(jù)
#iris.target #iris的分類結(jié)果
clf = tree.DecisionTreeClassifier(max_depth=2) #設(shè)置最大深度為2層
clf.fit(iris.data,iris.target)
clf.predict(iris.data)
# 將決策樹(shù)輸出到圖片
from sklearn.externals.six import StringIO  
import pydotplus 
dot_data = StringIO() 
tree.export_graphviz(clf, out_file=dot_data) 
graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) 
graph.write_jpg('tree.jpg') # 生成tree.jpg

tree.jpg

2蚪腐、畫決策邊界
只能使用2個(gè)變量
第一步，建立模型

from itertools import product
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn import tree
iris = load_iris()
X = iris.data[:,[2,3]]  #選取iris.data中第三、第四個(gè)變量
y = iris.target
clf = tree.DecisionTreeClassifier(max_depth = 2)
clf.fit(X,y)
x_min ,x_max = X[:,0].min()-1,X[:,0].max()+1 #邊界圖橫坐標(biāo)
y_min ,y_max = X[:,1].min()-1,X[:,1].max()+1 #邊界圖縱坐標(biāo)
xx,yy = np.meshgrid(np.arange(x_min,x_max,0.1),np.arange(y_min,y_max,0.1))
Z = clf.predict(np.c_[xx.ravel(),yy.ravel()])
Z = Z.reshape(xx.shape)
plt.plot()
plt.contourf(xx,yy,Z,alpha=0.4,cmap=plt.cm.rainbow) #邊界圖背景
plt.scatter(X[:,0],X[:,1],c=y,alpha=1,cmap=plt.cm.RdYlBu)
plt.title('Decision Tree')
plt.xlabel('Petal.Length')
plt.ylabel('Petal.Width')
plt.show()

決策樹(shù)決策邊界圖

邏輯回歸分析

from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
iris = load_iris()
clf = LogisticRegression()
clf.fit(iris.data,iris.target)
clf.predict(iris.data)

邏輯回歸畫決策邊界圖

x_min ,x_max = X[:,0].min()-1,X[:,0].max()+1
y_min ,y_max = X[:,1].min()-1,X[:,1].max()+1
xx,yy = np.meshgrid(np.arange(x_min,x_max,0.1),np.arange(y_min,y_max,0.1))
Z = clf.predict(np.c_[xx.ravel(),yy.ravel()])
Z = Z.reshape(xx.shape)
plt.plot()
plt.contourf(xx,yy,Z,alpha=0.4,cmap=plt.cm.rainbow) #alpha 透明度俭嘁，cmap 配色
plt.scatter(X[:,0],X[:,1],c=y,alpha=1,cmap=plt.cm.RdYlBu)
plt.title('Logistic Regression')
plt.xlabel('Petal.Length')
plt.ylabel('Petal.Width')
plt.show()

邏輯回歸決策邊界圖

SVM

from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
iris = load_iris()
clf = SVC(C=100,kernel='linear') #kernel 可選，參考函數(shù)說(shuō)明服猪；C 正則項(xiàng)供填，C數(shù)值小，margin大罢猪，允許數(shù)據(jù)跨界
clf.fit(iris.data,iris.target)
clf.predict(iris.data)

SVM 與邏輯回歸對(duì)比

from itertools import product
import numpy as np
import matplotlib.pyplot as plt

def plot_estimator(estimator,X,y):
    x_min ,x_max = X[:,0].min()-1,X[:,0].max()+1
    y_min ,y_max = X[:,1].min()-1,X[:,1].max()+1
    xx,yy = np.meshgrid(np.arange(x_min,x_max,0.1),np.arange(y_min,y_max,0.1))
    Z = estimator.predict(np.c_[xx.ravel(),yy.ravel()])
    Z = Z.reshape(xx.shape)
    plt.plot()
    plt.contourf(xx,yy,Z,alpha=0.4,cmap=plt.cm.rainbow)
    plt.scatter(X[:,0],X[:,1],c=y,alpha=1,cmap=plt.cm.RdYlBu)
    plt.xlabel('Petal.Length')
    plt.ylabel('Petal.Width')
    plt.show()

X = iris.data[0:100,[2,3]]
y = iris.target[0:100]
clf1 = SVC(kernel='linear')
clf1.fit(X,y)
clf2 = LogisticRegression()
clf2.fit(X,y)

plot_estimator(clf1,X,y)
plot_estimator(clf2,X,y)

SVM與邏輯回歸對(duì)比

SVM不同kernel對(duì)比

from itertools import product
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.svm import SVC

iris = load_iris()
X =iris.data[:,[2,3]]
y = iris.target
clf1 = SVC(kernel = 'rbf') 
clf1.fit(X,y)
clf2 = SVC(kernel = 'poly')
clf2.fit(X,y)
clf3 = SVC(kernel = 'linear')
clf3.fit(X,y)
#rbf 和poly 非線性kernel近她，耗時(shí)久
x_min ,x_max = X[:,0].min()-1,X[:,0].max()+1
y_min ,y_max = X[:,1].min()-1,X[:,1].max()+1
xx,yy = np.meshgrid(np.arange(x_min,x_max,0.1),np.arange(y_min,y_max,0.1))
f,axarr = plt.subplots(1,3,sharex='col',sharey='row',figsize=(20,5))
for idx,clf,title in zip([0,1,2],[clf1,clf2,clf3],['rbf','poly','linear']):
    Z = clf.predict(np.c_[xx.ravel(),yy.ravel()])
    Z = Z.reshape(xx.shape)
    axarr[idx].contourf(xx,yy,Z,alpha=0.4,cmap=plt.cm.RdYlBu)
    axarr[idx].scatter(X[:,0],X[:,1],c=y,cmap=plt.cm.brg)
    axarr[idx].set_title(title)

SVM 不同kernel對(duì)比

類神經(jīng)網(wǎng)絡(luò)

import itertools
from sklearn.datasets import load_digits
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
import numpy as np
digits = load_digits() #使用自帶dataset，辨別手寫數(shù)字
fig = plt.figure(figsize=(6,6))
fig.subplots_adjust(left=0,right=1,bottom=0,top=1,hspace=0.05,wspace=0.05)
for i in range(36):
    ax = fig.add_subplot(6,6,i+1,xticks=[],yticks=[])
    ax.imshow(digits.images[i],cmap=plt.cm.binary,interpolation='nearest')
    ax.text(0,7,str(digits.target[i]),color='red',fontsize=20)

手寫數(shù)字

scaler = StandardScaler()
scaler.fit(digits.data)
X_scaled = scaler.transform(digits.data)
# 對(duì)數(shù)據(jù)進(jìn)行標(biāo)準(zhǔn)話
mlp = MLPClassifier(hidden_layer_sizes =(30,30,30),activation='logistic',max_iter= 100)
# 查看函數(shù)幫助
mlp.fit(X_scaled,digits.target)
predicted = mlp.predict(X_scaled)
fig = plt.figure(figsize=(6,6))
fig.subplots_adjust(left=0,right=1,bottom=0,top=1,hspace=0.05,wspace=0.05)
for i in range(36):
    ax = fig.add_subplot(6,6,i+1,xticks=[],yticks=[])
    ax.imshow(digits.images[i],cmap=plt.cm.binary,interpolation='nearest')
    ax.text(0,7,str('{}-{}'.format(digits.target[i],predicted[i])),color='red',fontsize=20)

數(shù)字和預(yù)測(cè)值對(duì)比

# 查看準(zhǔn)確率
res = [i==j for i,j in zip(digits.target,predicted)] 
print(sum(res)/len(digits.target)) # max_iter = 100時(shí)準(zhǔn)確率94.5%膳帕，max_iter=1000時(shí)準(zhǔn)確率達(dá)到100%

隨機(jī)森林

def plot_estimator(estimator,X,y,title): 
    x_min ,x_max = X[:,0].min()-1,X[:,0].max()+1
    y_min ,y_max = X[:,1].min()-1,X[:,1].max()+1
    xx,yy = np.meshgrid(np.arange(x_min,x_max,0.1),np.arange(y_min,y_max,0.1))
    Z = estimator.predict(np.c_[xx.ravel(),yy.ravel()])
    Z = Z.reshape(xx.shape)
    plt.plot()
    plt.contourf(xx,yy,Z,alpha=0.4,cmap=plt.cm.rainbow)
    plt.scatter(X[:,0],X[:,1],c=y,alpha=1,cmap=plt.cm.RdYlBu)
    plt.title(title)
    plt.xlabel('Sepal.Length')
    plt.ylabel('Sepal.Width')
    plt.show()
from sklearn.ensemble import RandomForestClassifier
iris = load_iris()
X = iris.data[:,[0,1]]
y = iris.target
clf = RandomForestClassifier(n_estimators=100,criterion='gini',random_state=None)
# n_estimators 樹(shù)的數(shù)量粘捎，n越大，分類越準(zhǔn)確
clf.fit(X,y)
plot_estimator(clf,X,y,'RandomForestClassifier') # 畫決策邊界圖

n_estimators=100時(shí)的決策邊界圖

各種分類方法對(duì)比

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

x = iris.data[:,[0,1]]
y = iris.target
clf1 = SVC(kernel='rbf')
clf1.fit(x,y)
clf2 = DecisionTreeClassifier()
clf2.fit(x,y)
clf3 = RandomForestClassifier(n_estimators=10,criterion='entropy')
clf3.fit(x,y)
clf4 = LogisticRegression()
clf4.fit(x,y)
plot_estimator(clf1,x,y,'rbf')
plot_estimator(clf2,x,y,'DecisionTree')
plot_estimator(clf3,x,y,'RandomForest')
plot_estimator(clf4,x,y,'LogisticRegression')

最后編輯于：2017.12.08 18:50:18

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者

人面猴
序言：七十年代末危彩，一起剝皮案震驚了整個(gè)濱河市攒磨，隨后出現(xiàn)的幾起案子，更是在濱河造成了極大的恐慌汤徽，老刑警劉巖咧纠，帶你破解...
沈念sama閱讀 216,919評(píng)論 6贊 502
死咒
序言：濱河連續(xù)發(fā)生了三起死亡事件，死亡現(xiàn)場(chǎng)離奇詭異泻骤，居然都是意外死亡漆羔，警方通過(guò)查閱死者的電腦和手機(jī)梧奢，發(fā)現(xiàn)死者居然都...
沈念sama閱讀 92,567評(píng)論 3贊 392
救了他兩次的神仙讓他今天三更去死
文/潘曉璐我一進(jìn)店門，熙熙樓的掌柜王于貴愁眉苦臉地迎上來(lái)演痒，“玉大人亲轨，你說(shuō)我怎么就攤上這事∧袼常” “怎么了惦蚊？”我有些...
開(kāi)封第一講書(shū)人閱讀 163,316評(píng)論 0贊 353
道士緝兇錄：失蹤的賣姜人
文/不壞的土叔我叫張陵，是天一觀的道長(zhǎng)讯嫂。經(jīng)常有香客問(wèn)我蹦锋，道長(zhǎng)，這世上最難降的妖魔是什么欧芽？我笑而不...
開(kāi)封第一講書(shū)人閱讀 58,294評(píng)論 1贊 292
?港島之戀（遺憾婚禮）
正文為了忘掉前任莉掂，我火速辦了婚禮，結(jié)果婚禮上千扔，老公的妹妹穿的比我還像新娘憎妙。我一直安慰自己，他們只是感情好曲楚，可當(dāng)我...
茶點(diǎn)故事閱讀 67,318評(píng)論 6贊 390
惡毒庶女頂嫁案：這布局不是一般人想出來(lái)的
文/花漫我一把揭開(kāi)白布厘唾。她就那樣靜靜地躺著，像睡著了一般龙誊。火紅的嫁衣襯著肌膚如雪抚垃。梳的紋絲不亂的頭發(fā)上，一...
開(kāi)封第一講書(shū)人閱讀 51,245評(píng)論 1贊 299
城市分裂傳說(shuō)
那天趟大，我揣著相機(jī)與錄音讯柔，去河邊找鬼。笑死护昧，一個(gè)胖子當(dāng)著我的面吹牛，可吹牛的內(nèi)容都是我干的粗截。我是一名探鬼主播惋耙，決...
沈念sama閱讀 40,120評(píng)論 3贊 418
雙鴛鴦連環(huán)套：你想象不到人心有多黑
文/蒼蘭香墨我猛地睜開(kāi)眼，長(zhǎng)吁一口氣：“原來(lái)是場(chǎng)噩夢(mèng)啊……” “哼熊昌！你這毒婦竟也來(lái)了绽榛？” 一聲冷哼從身側(cè)響起，我...
開(kāi)封第一講書(shū)人閱讀 38,964評(píng)論 0贊 275
萬(wàn)榮殺人案實(shí)錄
序言：老撾萬(wàn)榮一對(duì)情侶失蹤婿屹，失蹤者是張志新（化名）和其女友劉穎灭美，沒(méi)想到半個(gè)月后，有當(dāng)?shù)厝嗽跇?shù)林里發(fā)現(xiàn)了一具尸體昂利，經(jīng)...
沈念sama閱讀 45,376評(píng)論 1贊 313
?護(hù)林員之死
正文獨(dú)居荒郊野嶺守林人離奇死亡届腐，尸身上長(zhǎng)有42處帶血的膿包…… 初始之章·張勛以下內(nèi)容為張勛視角年9月15日...
茶點(diǎn)故事閱讀 37,592評(píng)論 2贊 333
?白月光啟示錄
正文我和宋清朗相戀三年铁坎，在試婚紗的時(shí)候發(fā)現(xiàn)自己被綠了。大學(xué)時(shí)的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片犁苏。...
茶點(diǎn)故事閱讀 39,764評(píng)論 1贊 348
活死人
序言：一個(gè)原本活蹦亂跳的男人離奇死亡硬萍，死狀恐怖，靈堂內(nèi)的尸體忽然破棺而出围详，到底是詐尸還是另有隱情朴乖，我是刑警寧澤，帶...
沈念sama閱讀 35,460評(píng)論 5贊 344
?日本核電站爆炸內(nèi)幕
正文年R本政府宣布助赞，位于F島的核電站买羞，受9級(jí)特大地震影響，放射性物質(zhì)發(fā)生泄漏雹食。R本人自食惡果不足惜畜普，卻給世界環(huán)境...
茶點(diǎn)故事閱讀 41,070評(píng)論 3贊 327
男人毒藥：我在死后第九天來(lái)索命
文/蒙蒙一、第九天我趴在偏房一處隱蔽的房頂上張望婉徘。院中可真熱鬧漠嵌，春花似錦、人聲如沸盖呼。這莊子的主人今日做“春日...
開(kāi)封第一講書(shū)人閱讀 31,697評(píng)論 0贊 22
一樁弒父案，背后竟有這般陰謀
文/蒼蘭香墨我抬頭看了看天上的太陽(yáng)几晤。三九已至约炎，卻和暖如春，著一層夾襖步出監(jiān)牢的瞬間蟹瘾，已是汗流浹背圾浅。一陣腳步聲響...
開(kāi)封第一講書(shū)人閱讀 32,846評(píng)論 1贊 269
情欲美人皮
我被黑心中介騙來(lái)泰國(guó)打工，沒(méi)想到剛下飛機(jī)就差點(diǎn)兒被人妖公主榨干…… 1. 我叫王不留憾朴，地道東北人狸捕。一個(gè)月前我還...
沈念sama閱讀 47,819評(píng)論 2贊 370
代替公主和親
正文我出身青樓，卻偏偏與公主長(zhǎng)得像众雷，于是被迫代替她去往敵國(guó)和親灸拍。傳聞我的和親對(duì)象是個(gè)殘疾皇子，可洞房花燭夜當(dāng)晚...
茶點(diǎn)故事閱讀 44,665評(píng)論 2贊 354