import pandas as pd
import numpy as np
from sklearn import linear_model
# 讀取數(shù)據(jù)
sports = pd.read_csv(r'C:\Users\Administrator\Desktop\Run or Walk.csv')
# 提取出所有自變量名稱
predictors = sports.columns[4:]
# 構(gòu)建自變量矩陣
X = sports.ix[:,predictors]
# 提取y變量值
y = sports.activity
# 將數(shù)據(jù)集拆分為訓(xùn)練集和測試集
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size = 0.25, random_state = 1234)
# 利用訓(xùn)練集建模
sklearn_logistic = linear_model.LogisticRegression()
sklearn_logistic.fit(X_train, y_train)
# 返回模型的各個參數(shù)
print(sklearn_logistic.intercept_, sklearn_logistic.coef_)
# 模型預(yù)測
sklearn_predict = sklearn_logistic.predict(X_test)
# 預(yù)測結(jié)果統(tǒng)計
pd.Series(sklearn_predict).value_counts()
-------------------------------------------------------------------------------------------------------------------------------------------
# 導(dǎo)入第三方模塊
from sklearn import metrics
# 混淆矩陣
cm = metrics.confusion_matrix(y_test, sklearn_predict, labels = [0,1])
cm
Accuracy = metrics.scorer.accuracy_score(y_test, sklearn_predict)
Sensitivity = metrics.scorer.recall_score(y_test, sklearn_predict)
Specificity = metrics.scorer.recall_score(y_test, sklearn_predict, pos_label=0)
print('模型準確率為%.2f%%:' %(Accuracy*100))
print('正例覆蓋率為%.2f%%' %(Sensitivity*100))
print('負例覆蓋率為%.2f%%' %(Specificity*100))
-------------------------------------------------------------------------------------------------------------------------------------------
# 混淆矩陣的可視化
# 導(dǎo)入第三方模塊
import seaborn as sns
import matplotlib.pyplot as plt
# 繪制熱力圖
sns.heatmap(cm, annot = True, fmt = '.2e',cmap = 'GnBu')
plt.show()
------------------------------------------------------------------------------------------------------------------------------------------
# 繪制ROC曲線
# 計算真正率和假正率
fpr,tpr,threshold = metrics.roc_curve(y_test, sm_y_probability)
# 計算auc的值?
roc_auc = metrics.auc(fpr,tpr)
# 繪制面積圖
plt.stackplot(fpr, tpr, color='steelblue', alpha = 0.5, edgecolor = 'black')
# 添加邊際線
plt.plot(fpr, tpr, color='black', lw = 1)
# 添加對角線
plt.plot([0,1],[0,1], color = 'red', linestyle = '--')
# 添加文本信息
plt.text(0.5,0.3,'ROC curve (area = %0.2f)' % roc_auc)
# 添加x軸與y軸標簽
plt.xlabel('1-Specificity')
plt.ylabel('Sensitivity')
plt.show()
-------------------------------------------------------------------------------------------------------------------------------------------
#ks曲線? ?鏈接:http://www.reibang.com/p/b1b1344bd99f?風(fēng)控數(shù)據(jù)分析學(xué)習(xí)筆記(二)Python建立信用評分卡 - 簡書
fig, ax = plt.subplots()
ax.plot(1 - threshold, tpr, label='tpr')# ks曲線要按照預(yù)測概率降序排列,所以需要1-threshold鏡像
ax.plot(1 - threshold, fpr, label='fpr')
ax.plot(1 - threshold, tpr-fpr,label='KS')
plt.xlabel('score')
plt.title('KS Curve')
plt.ylim([0.0, 1.0])
plt.figure(figsize=(20,20))
legend = ax.legend(loc='upper left')
plt.show()