# -*- coding: utf-8 -*-
from numpy import *
import pandas as pd
###線性回歸####
#讀取數(shù)據(jù)
data = pd.read_csv('http://www-bcf.usc.edu/~gareth/ISL/Advertising.csv', index_col=0)
data.head()
data.tail()
#畫散點圖
import seaborn as sns
import matplotlib
%matplotlib inline
sns.pairplot(data, x_vars=['TV','Radio','Newspaper'], y_vars='Sales', size=7, aspect=0.8)
sns.pairplot(data, x_vars=['TV','Radio','Newspaper'], y_vars='Sales', size=7, aspect=0.8, kind='reg')
#計算相關(guān)系數(shù)矩陣
data.corr()
#構(gòu)建X、Y數(shù)據(jù)集
X = data[['TV', 'Radio', 'Newspaper']]
X.head()
y = data['Sales']
y.head()
##直接根據(jù)系數(shù)矩陣公式計算
def standRegres(xArr,yArr):
xMat = mat(xArr); yMat = mat(yArr).T
xTx = xMat.T*xMat
if linalg.det(xTx) == 0.0:
print "This matrix is singular, cannot do inverse"
return
ws = xTx.I * (xMat.T*yMat)
return ws
#求解回歸方程系數(shù)
X2=X
X2['intercept']=[1]*200
standRegres(X2,y)
##利用現(xiàn)有庫求解
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(X, y)
print linreg.intercept_
print linreg.coef_
print zip(['TV','Radio','Newspaper'], linreg.coef_)
##測試集和訓(xùn)練集的構(gòu)建
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
linreg.fit(X_train, y_train)
#結(jié)果
print linreg.intercept_
print linreg.coef_
print zip(['TV','Radio','Newspaper'], linreg.coef_)
#預(yù)測
y_pred = linreg.predict(X_test)
#誤差評估
from sklearn import metrics
# calculate MAE using scikit-learn
print "MAE:",metrics.mean_absolute_error(y_test,y_pred)
# calculate MSE using scikit-learn
print "MSE:",metrics.mean_squared_error(y_test,y_pred)
# calculate RMSE using scikit-learn
print "RMSE:",np.sqrt(metrics.mean_squared_error(y_test,y_pred))
##模型比較
feature_cols = ['TV', 'Radio']
X = data[feature_cols]
y = data.Sales
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
linreg.fit(X_train, y_train)
y_pred = linreg.predict(X_test)
# calculate MAE using scikit-learn
print "MAE:",metrics.mean_absolute_error(y_test,y_pred)
# calculate MSE using scikit-learn
print "MSE:",metrics.mean_squared_error(y_test,y_pred)
# calculate RMSE using scikit-learn
print "RMSE:",np.sqrt(metrics.mean_squared_error(y_test,y_pred))