自己寫了一個代碼段誊,用線性回歸來預(yù)測葡萄酒的質(zhì)量俯画,雖然結(jié)果來說不太好,不過重在理解過程两踏,后續(xù)寫理解败京。
#!/usr/bin/env?python
#?coding:?utf-8
#?In[1]:
#?葡萄酒數(shù)據(jù)集
url?=?"http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
#?In[2]:
import?numpy?as?np
import?pandas?as?pd
from?matplotlib?import?pyplot?as?plt
from?sklearn?import?preprocessing
from?sklearn.model_selection?import?train_test_split
from?sklearn.linear_model?import?LinearRegression
from?sklearn.metrics?import?mean_squared_error
#?In[3]:
plt.rcParams['font.sans-serif']?=?['SimHei']
#?In[5]:
data?=?pd.read_csv(url,sep=';')
data
#?In[9]:
data?=?data.dropna(how='any')
data?=?data.drop_duplicates()
data
#?In[15]:
data?=?data.T[abs(data.corr()['quality'])>=0.2].T
data
#?In[25]:
plt.scatter(data['alcohol'],?data['quality'])
#?In[16]:
x?=?np.array(data.iloc[:,?:-1])
y?=?np.array(data.iloc[:,?-1:])
#?In[17]:
x_train,?x_test,?y_train,?y_test?=?train_test_split(x,?y,?test_size=0.2,?random_state=0)
#?In[18]:
min_max_scaler?=?preprocessing.MinMaxScaler(feature_range=(0,?1))
x_train?=?min_max_scaler.fit_transform(x_train)
x_test?=?min_max_scaler.fit_transform(x_test)
#?In[20]:
lr?=?LinearRegression()
lr.fit(x_train,?y_train)
y_test_pred?=?lr.predict(x_test)
y_train_pred?=?lr.predict(x_train)
#?In[26]:
def?draw_figure(title,?*datalist):
????plt.figure(facecolor='gray',?figsize=(20,?10))
????for?v?in?datalist:
????????plt.plot(v[0],?'-',?label=v[1],?linewidth=2)
????????plt.plot(v[0],?'o')
????plt.grid()
????plt.title(title,?fontsize=20)
????plt.legend(fontsize=20)
????plt.show()
#?In[32]:
print("The?mean_squared_error?for?train?set?is?{}".format(mean_squared_error(y_train,?y_train_pred)))
print("The?mean_squared_error?for?test?set?is?{}".format(mean_squared_error(y_test,?y_test_pred)))
#?In[33]:
draw_figure("預(yù)測值與真實值圖模型的$R^2={:.4f}$".format(lr.score(x_test,?y_test)),?[y_test,?"True"],?[y_test_pred,?"Pred"])