內(nèi)容:根據(jù)信用卡持卡人背景信息(年齡小作、教育水平亭姥、當(dāng)前工作年限、當(dāng)前居住年限顾稀、家庭收入达罗、債務(wù)占收入比例、信用卡負(fù)債、其他負(fù)債 )預(yù)測(cè)還款拖欠情況粮揉。用分類算法來建模預(yù)測(cè)
數(shù)據(jù)導(dǎo)入巡李;
import pandas as pd
data = pd.read_csv('C:\\Users\\dell\\Desktop\\datamining\\train__UnB.csv')
train_data=data.iloc[:,0:-1]
train_label=data.iloc[:,-1]
data1=pd.read_csv('C:\\Users\\dell\\Desktop\\datamining\\test__UnB.csv')
test_data=data1.iloc[:,0:-1]
test_label=data1.iloc[:,-1]
數(shù)據(jù)源:
機(jī)器學(xué)習(xí)分類預(yù)測(cè)
train_label.unique()
從標(biāo)簽數(shù)值看出,這是一個(gè)二分類問題扶认。
- KNN算法
from sklearn.model_selection import cross_val_score
from sklearn import neighbors
knnModel = neighbors.KNeighborsClassifier(n_neighbors=2)
knnModel.fit(train_data,train_label)
score=np.mean(
cross_val_score(
knnModel,
train_data,train_label,cv=5
)
)
result=knnModel.predict(test_data)
##計(jì)算accuracy,precision,recall,F1
TP=0;FP=0;FN=0;TN=0
for i in range(len(test_label)):
if test_label[i]==1 and result[i]==1:
TP+=1
elif test_label[i]==1 and result[i]==0:
FN+=1
elif test_label[i]==0 and result[i]==1:
FP+=1
elif test_label[i]==0 and result[i]==0:
TN+=1
accuracy=(TP+TN)/len(test_label)
precision=TP/(TP+FP)
recall=TP/(TP+FN)
F1=2*precision*recall/(precision+recall)
print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))
- 樸素貝葉斯
from sklearn.naive_bayes import MultinomialNB
MNBModel = MultinomialNB()
MNBModel.fit(train_data,train_label)
score=np.mean(
cross_val_score(
MNBModel,
train_data,train_label,cv=5
)
)
result=MNBModel.predict(test_data)
##計(jì)算accuracy,precision,recall,F1
TP=0;FP=0;FN=0;TN=0
for i in range(len(test_label)):
if test_label[i]==1 and result[i]==1:
TP+=1
elif test_label[i]==1 and result[i]==0:
FN+=1
elif test_label[i]==0 and result[i]==1:
FP+=1
elif test_label[i]==0 and result[i]==0:
TN+=1
accuracy=(TP+TN)/len(test_label)
precision=TP/(TP+FP)
recall=TP/(TP+FN)
F1=2*precision*recall/(precision+recall)
print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))
- SVM
from sklearn import svm
svcModel=svm.SVC(kernel='rbf')
svcModel.fit(train_data,train_label)
score=np.mean(
cross_val_score(
svcModel,
train_data,train_label,cv=5
)
)
result = svcModel.predict(test_data)
##計(jì)算accuracy,precision,recall,F1
TP=1;FP=1;FN=1;TN=1
for i in range(len(test_label)):
if test_label[i]==1 and result[i]==1:
TP+=1
elif test_label[i]==1 and result[i]==0:
FN+=1
elif test_label[i]==0 and result[i]==1:
FP+=1
elif test_label[i]==0 and result[i]==0:
TN+=1
accuracy=(TP+TN)/len(test_label)
precision=TP/(TP+FP)
recall=TP/(TP+FN)
F1=2*precision*recall/(precision+recall)
print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))
- 決策樹算法
from sklearn.tree import DecisionTreeClassifier
dtModel = DecisionTreeClassifier(max_leaf_nodes=8)
dtModel.fit(train_data, train_label)
score=np.mean(
cross_val_score(
dtModel,
train_data,train_label,cv=5
)
)
result=dtModel.predict(test_data)
##計(jì)算accuracy,precision,recall,F1
TP=1;FP=1;FN=1;TN=1
for i in range(len(test_label)):
if test_label[i]==1 and result[i]==1:
TP+=1
elif test_label[i]==1 and result[i]==0:
FN+=1
elif test_label[i]==0 and result[i]==1:
FP+=1
elif test_label[i]==0 and result[i]==0:
TN+=1
accuracy=(TP+TN)/len(test_label)
precision=TP/(TP+FP)
recall=TP/(TP+FN)
F1=2*precision*recall/(precision+recall)
print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))
- 隨機(jī)森林
from sklearn.ensemble import RandomForestClassifier
rfcModel = RandomForestClassifier(n_estimators=8, max_leaf_nodes=None)
rfcModel.fit(train_data,train_label)
score=np.mean(
cross_val_score(
rfcModel,
train_data,train_label,cv=5
)
)
result=rfcModel.predict(test_data)
##計(jì)算accuracy,precision,recall,F1
TP=1;FP=1;FN=1;TN=1
for i in range(len(test_label)):
if test_label[i]==1 and result[i]==1:
TP+=1
elif test_label[i]==1 and result[i]==0:
FN+=1
elif test_label[i]==0 and result[i]==1:
FP+=1
elif test_label[i]==0 and result[i]==0:
TN+=1
accuracy=(TP+TN)/len(test_label)
precision=TP/(TP+FP)
recall=TP/(TP+FN)
F1=2*precision*recall/(precision+recall)
print('score:%f\naccuracy: %f\nprecision: %f\nrecall: %f \nF1: %f' %(score,accuracy,precision,recall,F1))