支持向量機(jī)(Support Vector Machine,SVM)是一個(gè)非常優(yōu)雅的算法熊楼,具有非常完善的數(shù)學(xué)理論斗忌,常用于數(shù)據(jù)分
類,也可以用于數(shù)據(jù)的回歸預(yù)測(cè)中市栗,由于其其優(yōu)美的理論保證和利用核函數(shù)對(duì)于線性不可分問題的處理技巧缀拭,在
上世紀(jì)90年代左右,SVM曾紅極一時(shí)填帽。
本文將不涉及非常嚴(yán)格和復(fù)雜的理論知識(shí)蛛淋,力求于通過直覺來(lái)感受 SVM。
## 基礎(chǔ)函數(shù)庫(kù)
import numpy as np
## 導(dǎo)入畫圖庫(kù)
import matplotlib.pyplot as plt
import seaborn as sns
## 導(dǎo)入邏輯回歸模型函數(shù)
from sklearn import svm
##Demo演示LogisticRegression分類
## 構(gòu)造數(shù)據(jù)集
x_fearures = np.array([[-1, -2], [-2, -1], [-3, -2], [1, 3], [2, 1], [3, 2]])
y_label = np.array([0, 0, 0, 1, 1, 1])
## 調(diào)用SVC模型 (支持向量機(jī)分類)
svc = svm.SVC(kernel='linear')
## 用SVM模型擬合構(gòu)造的數(shù)據(jù)集
svc = svc.fit(x_fearures, y_label)
## 查看其對(duì)應(yīng)模型的w
print('the weight of Logistic Regression:',svc.coef_)
## 查看其對(duì)應(yīng)模型的w0
print('the intercept(w0) of Logistic Regression:',svc.intercept_)
## 模型預(yù)測(cè)
y_train_pred = svc.predict(x_fearures)
print('The predction result:',y_train_pred)
# 最佳函數(shù)
x_range = np.linspace(-3, 3)
w = svc.coef_[0]
a = -w[0] / w[1]
y_3 = a*x_range - (svc.intercept_[0]) / w[1]
# 可視化決策邊界
plt.figure()
plt.scatter(x_fearures[:,0],x_fearures[:,1], c=y_label, s=50, cmap='viridis')
plt.plot(x_range, y_3, '-c')
plt.show()
# 支持向量機(jī)介紹
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_blobs
%matplotlib inline
# 畫圖
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, s=60, cmap=plt.cm.Paired)
# 畫散點(diǎn)圖
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
x_fit = np.linspace(0, 3)
# 畫函數(shù)
y_1 = 1 * x_fit + 0.8
plt.plot(x_fit, y_1, '-c')
y_2 = -0.3 * x_fit + 3
plt.plot(x_fit, y_2, '-k')
# 畫散點(diǎn)圖
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
plt.scatter([3], [2.8], c='#cccc00', marker='<', s=100, cmap=plt.cm.Paired)
x_fit = np.linspace(0, 3)
# 畫函數(shù)
y_1 = 1 * x_fit + 0.8
plt.plot(x_fit, y_1, '-c')
y_2 = -0.3 * x_fit + 3
plt.plot(x_fit, y_2, '-k')
# 畫散點(diǎn)圖
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
x_fit = np.linspace(0, 3)
# 畫函數(shù)
y_1 = 1 * x_fit + 0.8
plt.plot(x_fit, y_1, '-c')
# 畫邊距
plt.fill_between(x_fit, y_1 - 0.6, y_1 + 0.6, edgecolor='none', color='#AAAAAA', alpha=0.4)
y_2 = -0.3 * x_fit + 3
plt.plot(x_fit, y_2, '-k')
plt.fill_between(x_fit, y_2 - 0.4, y_2 + 0.4, edgecolor='none', color='#AAAAAA', alpha=0.4)
# 畫散點(diǎn)圖
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
# 畫圖
y_1 = 1 * x_fit + 0.8
plt.plot(x_fit, y_1, '-c')
# 畫邊距
plt.fill_between(x_fit, y_1 - 0.6, y_1 + 0.6, edgecolor='none', color='#AAAAAA', alpha=0.4)
from sklearn.svm import SVC
# SVM 函數(shù)
clf = SVC(kernel='linear')
clf.fit(X, y)
SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
max_iter=-1, probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False)
# 最佳函數(shù)
w = clf.coef_[0]
a = -w[0] / w[1]
y_3 = a*x_fit - (clf.intercept_[0]) / w[1]
# 最大邊距 下屆
b_down = clf.support_vectors_[0]
y_down = a* x_fit + b_down[1] - a * b_down[0]
# 最大邊距 上屆
b_up = clf.support_vectors_[-1]
y_up = a* x_fit + b_up[1] - a * b_up[0]
# 畫散點(diǎn)圖
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
# 畫函數(shù)
plt.plot(x_fit, y_3, '-c')
# 畫邊距
plt.fill_between(x_fit, y_down, y_up, edgecolor='none', color='#AAAAAA', alpha=0.4)
# 畫支持向量
plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], edgecolor='b',
s=80, facecolors='none')
# 軟間隔
# 畫散點(diǎn)圖
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.9)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
# 畫散點(diǎn)圖
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.9)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
# 懲罰參數(shù):C=1
clf = SVC(C=1, kernel='linear')
clf.fit(X, y)
# 最佳函數(shù)
w = clf.coef_[0]
a = -w[0] / w[1]
y_3 = a*x_fit - (clf.intercept_[0]) / w[1]
# 最大邊距 下屆
b_down = clf.support_vectors_[0]
y_down = a* x_fit + b_down[1] - a * b_down[0]
# 最大邊距 上屆
b_up = clf.support_vectors_[-1]
y_up = a* x_fit + b_up[1] - a * b_up[0]
# 畫散點(diǎn)圖
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
# 畫函數(shù)
plt.plot(x_fit, y_3, '-c')
# 畫邊距
plt.fill_between(x_fit, y_down, y_up, edgecolor='none', color='#AAAAAA', alpha=0.4)
# 畫支持向量
plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], edgecolor='b',
s=80, facecolors='none')
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.9)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
# 懲罰參數(shù):C=0.2
clf = SVC(C=0.2, kernel='linear')
clf.fit(X, y)
x_fit = np.linspace(-1.5, 4)
# 最佳函數(shù)
w = clf.coef_[0]
a = -w[0] / w[1]
y_3 = a*x_fit - (clf.intercept_[0]) / w[1]
# 最大邊距 下屆
b_down = clf.support_vectors_[10]
y_down = a* x_fit + b_down[1] - a * b_down[0]
# 最大邊距 上屆
b_up = clf.support_vectors_[1]
y_up = a* x_fit + b_up[1] - a * b_up[0]
# 畫散點(diǎn)圖
X, y = make_blobs(n_samples=60, centers=2, random_state=0, cluster_std=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
# 畫函數(shù)
plt.plot(x_fit, y_3, '-c')
# 畫邊距
plt.fill_between(x_fit, y_down, y_up, edgecolor='none', color='#AAAAAA', alpha=0.4)
# 畫支持向量
plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], edgecolor='b',
s=80, facecolors='none')
from sklearn.datasets.samples_generator import make_circles
# 畫散點(diǎn)圖
X, y = make_circles(100, factor=.1, noise=.1, random_state=2019)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
clf = SVC(kernel='linear').fit(X, y)
# 最佳函數(shù)
x_fit = np.linspace(-1.5, 1.5)
w = clf.coef_[0]
a = -w[0] / w[1]
y_3 = a*X - (clf.intercept_[0]) / w[1]
plt.plot(X, y_3, '-c')
# 數(shù)據(jù)映射
r = np.exp(-(X[:, 0] ** 2 + X[:, 1] ** 2))
ax = plt.subplot(projection='3d')
ax.scatter3D(X[:, 0], X[:, 1], r, c=y, s=50, cmap=plt.cm.Paired)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
x_1, y_1 = np.meshgrid(np.linspace(-1, 1), np.linspace(-1, 1))
z = 0.01*x_1 + 0.01*y_1 + 0.5
ax.plot_surface(x_1, y_1, z, alpha=0.3)
# 畫圖
X, y = make_circles(100, factor=.1, noise=.1, random_state=2019)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.Paired)
clf = SVC(kernel='rbf')
clf.fit(X, y)
ax = plt.gca()
x = np.linspace(-1, 1)
y = np.linspace(-1, 1)
x_1, y_1 = np.meshgrid(x, y)
P = np.zeros_like(x_1)
for i, xi in enumerate(x):
for j, yj in enumerate(y):
P[i, j] = clf.decision_function(np.array([[xi, yj]]))
ax.contour(x_1, y_1, P, colors='k', levels=[-1, 0, 0.9], alpha=0.5,linestyles=['--', '-', '--'])
plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], edgecolor='b',
s=80, facecolors='none');
image.png
image.png
image.png
image.png
image.png