# -*- coding: utf-8 -*-
# 所需數(shù)據(jù)請在這里下載:https://video.mugglecode.com/wine_quality.csv
"""
任務(wù):紅酒質(zhì)量預(yù)測
"""
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
DATA_FILE = './data/wine_quality.csv'
def main():
"""
主函數(shù)
"""
wine_data = pd.read_csv(DATA_FILE)
# 處理數(shù)據(jù)
wine_data.loc[wine_data['quality'] <= 5, 'quality'] = 0
wine_data.loc[wine_data['quality'] >= 6, 'quality'] = 1
all_cols = wine_data.columns.tolist()
feat_cols = all_cols[:-1]
# 11列紅酒的屬性作為樣本特征
X = wine_data[feat_cols].values
# label列為樣本標(biāo)簽
y = wine_data['quality'].values
# 將原始數(shù)據(jù)集拆分成訓(xùn)練集和測試集,測試集占總樣本數(shù)的1/3
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, random_state=10)
# 特征預(yù)處理
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# 構(gòu)建組件分類器
clf1 = DecisionTreeClassifier(max_depth=10)
clf2 = LogisticRegression(C=0.1)
clf3 = SVC(kernel='linear', probability=True)
clfs = [('決策樹', clf1), ('邏輯回歸', clf2), ('支持向量機', clf3)]
for clf_tup in clfs:
clf_name, clf = clf_tup
clf.fit(X_train_scaled, y_train)
acc = clf.score(X_test_scaled, y_test)
print('模型:{}, 準(zhǔn)確率:{:.2f}%'.format(clf_name, acc * 100))
# hard voting
hard_clf = VotingClassifier(estimators=clfs, voting='hard')
hard_clf.fit(X_train_scaled, y_train)
print('hard voting: {:.2f}%'.format(hard_clf.score(X_test_scaled, y_test) * 100))
# soft voting
soft_clf = VotingClassifier(estimators=clfs, voting='soft')
soft_clf.fit(X_train_scaled, y_train)
print('soft voting: {:.2f}%'.format(soft_clf.score(X_test_scaled, y_test) * 100))
if __name__ == '__main__':
main()
模型:決策樹, 準(zhǔn)確率:75.61%
模型:邏輯回歸, 準(zhǔn)確率:72.23%
模型:支持向量機, 準(zhǔn)確率:73.92%
hard voting: 75.05%
soft voting: 75.05%
聚類無監(jiān)督學(xué)習(xí)
# -*- coding: utf-8 -*-
"""
任務(wù):圖像數(shù)據(jù)進行聚類分析
"""
from sklearn.datasets import load_digits
from sklearn.cluster import KMeans
import pandas as pd
import matplotlib.pyplot as plt
def main():
"""
主函數(shù)
"""
digits = load_digits()
dig_data = digits.data
kmeans = KMeans(n_clusters=10)
cluster_codes = kmeans.fit_predict(dig_data)
# cluster_codes_ser = pd.Series(cluster_codes).value_counts()
# cluster_codes_ser.plot(kind='bar')
# plt.show()
fig, axes = plt.subplots(2, 5, figsize=(8, 3))
centers = kmeans.cluster_centers_.reshape(10, 8, 8)
for ax, center in zip(axes.flat, centers):
ax.set(xticks=[], yticks=[])
ax.imshow(center, interpolation='nearest', cmap=plt.cm.binary)
plt.show()
if __name__ == '__main__':
main()