線性回歸的求解方法是最小二乘法,具體原理:
保證所有數(shù)據(jù)偏差的平方和最小
Paste_Image.png
Paste_Image.png
Paste_Image.png
證明步驟
損失函數(shù):
Paste_Image.png
化簡(jiǎn)
Paste_Image.png
Paste_Image.png
Paste_Image.png
對(duì)L求偏導(dǎo)數(shù)
Paste_Image.png
Paste_Image.png
Paste_Image.png
from numpy import *
import matplotlib.pyplot as plt
#1. Get data from file
def load_data_set(filename):
num_feat = len(open(filename).readline().split('\t')) -1
data_mat = []
label_mat = []
fr = open(filename)
for line in fr.readlines():
line_arr = []
cur_line = line.strip().split('\t')
for i in range(num_feat):
line_arr.append(float(cur_line[i]))
data_mat.append(line_arr)
label_mat.append(line_arr[-1])
return data_mat, label_mat
# 2. Get w of function coefficient
def stand_regres(x_arr, y_arr):
x_mat = mat(x_arr)
y_mat = mat(y_arr)
xTx = x_mat.T*x_mat
# is can inverse
if linalg.det(xTx) == 0.0:
print "this matrix is singular, cannot do inverse!!!"
return
ws = xTx.I * (x_mat.T*y_mat)
return ws
xArr, yArr = load_data_set('ext0.txt')
ws = stand_regres(xArr, xArr)
print ws
xMat = mat(xArr)
yMat = mat(yArr)
xCopy = xMat.copy()
xCopy.sort(0)
yHat = xCopy*ws
fig = plt.figure()
# 1 row 1 col the 1 block
ax = fig.add_subplot(111)
# must array
ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0])
ax.plot(xCopy[:, 1], yHat)
plt.show()
```