sigmoid函數(shù)對(duì)數(shù)據(jù)進(jìn)行歸一化處理藐石。
def Sigmoid (X):
return (1.0 / (1 + np.exp(-float(X)));
數(shù)據(jù)清洗時(shí)塞茅,將dataframe文本轉(zhuǎn)化為數(shù)值美尸。
def Replace (X,columns):
a = X.groupby([columns],as_index=False)[columns].agg({'cnt':'count'})
for i in a[columns]:
X[columns] = X[columns].replace(i,a[(a[columns]== i )].index.tolist()[0])
return (X)
劃分測(cè)試集與訓(xùn)練集(3:7)
def Data(X,columns):
from sklearn import model_selection
from sklearn import cross_validation
Y = X[columns]
X= X.drop([columns],axis = 1)
X_train, X_test, y_train, y_test = \
cross_validation.train_test_split( X, Y, test_size=0.3, random_state=0)
return (X_train, X_test, y_train, y_test)
基礎(chǔ)的機(jī)器學(xué)習(xí)代碼,查看得分
y_train.astype('int')
def RF(X_train, X_test, y_train, y_test): #隨機(jī)森林
from sklearn.ensemble import RandomForestClassifier
model= RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)
predicted= model.predict(X_test)
score = accuracy_score(y_test, predicted)
return (score)
def LOR(X_train, X_test, y_train, y_test): #邏輯回歸
from sklearn.linear_model import LogisticRegression
lor = LogisticRegression(penalty='l1',C=100,multi_class='ovr')
lor.fit(X_train, y_train)
predicted= lor.predict(X_test)
score = accuracy_score(y_test, predicted)
return (score)
def Svm(X_train, X_test, y_train, y_test): #支持向量機(jī)
from sklearn import svm
model = svm.SVC(kernel='rbf')
model.fit(X_train, y_train)
predicted= model.predict(X_test)
score = accuracy_score(y_test, predicted)
return (score)
def LR(X_train, X_test, y_train, y_test): #線性回歸
from sklearn.linear_model import LinearRegression
LR = LinearRegression()
LR.fit(X_train, y_train)
predicted = LR.predict(X_test)
score = accuracy_score(y_test, predicted)
return ( score,LR.intercept_,LR.coef_)
確定自變量和目標(biāo)變量之間的相關(guān)性
def Correlation (df , columns_name):
import six
for i in df.columns:
if not( isinstance(df.select(i).take(1)[0][0], six.string_types)):
print( "Correlation to {} for ".format(columns_name), i, house_df.stat.corr(columns_name,i))