-- coding: utf-8 --
"""
Created on Sun Sep 30 22:57:44 2018
@author: ltx
"""
采用正則化的方法和隨機(jī)刪除節(jié)點都有助于防止模型過擬合洋腮。
當(dāng)數(shù)據(jù)集特別小劲赠,為了防止訓(xùn)練得出的模型過擬合逞带,使用正則化方法
import numpy as np
import matplotlib.pyplot as plt
import reg_utils #正則化
train_x,train_y,test_x,test_y=reg_utils.load_2D_dataset(is_plot=True)
X=train_x
Y=train_y
m=X.shape[1]
--------------正則化模型(避免模型過度擬合)start----------------------
采用L2正則化方法陌兑,適當(dāng)?shù)男薷某杀竞瘮?shù),添加一個L2正則化成本
def compute_cost_reg(a3,Y,parameters,lambd):
W1 = parameters["W1"]
W2=parameters["W2"]
W3=parameters["W3"]
cost1=reg_utils.compute_cost(a3, Y)
cost2=(1/m)(np.sum(np.square(W1))+np.sum(np.square(W2))+np.sum(np.square(W3)))(lambd/2)
cost=cost1+cost2
return cost
def backward_propagation_reg(X, Y, cache,lambd):
(Z1, A1, W1, b1, Z2, A2, W2, b2, Z3, A3, W3, b3) = cache
dZ3=A3-Y
dW3=(1/m)*np.dot(dZ3,A2.T)+((lambd*W3)/m)
db3=(1/m)*np.sum(dZ3,keepdims=True,axis=1)
dA2=np.dot(W3.T,dZ3)
dZ2=np.multiply(dA2,np.int64(A2>0))
dW2=(1/m)*np.dot(dZ2,A1.T)+((lambd*W2)/m)
db2=(1/m)*np.sum(dZ2,keepdims=True,axis=1)
dA1=np.dot(W2.T,dZ2)
dZ1=np.multiply(dA1,np.int64(A1>0))
dW1=(1/m)*np.dot(dZ1,X.T)+((lambd*W1)/m)
db1=(1/m)*np.sum(dZ1,keepdims=True,axis=1)
grads={"dW1":dW1,
"db1":db1,
"dW2":dW2,
"db2":db2,
"dW3":dW3,
"db3":db3
}
return grads
--------------正則化模型(避免模型過度擬合)end----------------------
-------------采用隨機(jī)刪除節(jié)點來防止模型過擬合start--------------
def forward_DelNode(X,parameters,keep_prob):
W1=parameters["W1"]
b1=parameters["b1"]
W2=parameters["W2"]
b2=parameters["b2"]
W3=parameters["W3"]
b3=parameters["b3"]
np.random.seed(1)
Z1=np.dot(W1,X)+b1
A1=reg_utils.relu(Z1)
D1=np.random.rand(A1.shape[0],A1.shape[1])
D1=D1 < keep_prob
A1=A1 * D1
A1=A1 / keep_prob
Z2=np.dot(W2,A1)+b2
A2=reg_utils.relu(Z2)
D2=np.random.rand(A2.shape[0],A2.shape[1])
D2=D2 < keep_prob
A2=A2 * D2
A2=A2 / keep_prob
Z3=np.dot(W3,A2)+b3
A3=reg_utils.sigmoid(Z3)
cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)
return A3,cache
相應(yīng)的向后傳播模型也要隨機(jī)刪除節(jié)點(在未正則化的基礎(chǔ)上采用隨機(jī)節(jié)點)
def backDelNode(X,Y,cache,keep_prob):
(Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)=cache
dZ3=A3-Y
dW3=(1/m)*np.dot(dZ3,A2.T)
db3=(1/m)*np.sum(dZ3,keepdims=True,axis=1)
dA2=np.dot(W3.T,dZ3)
dA2=dA2*D2
dA2=dA2/keep_prob
dZ2=np.multiply(dA2,np.int64(A2>0))#為啥要根據(jù)A2的正負(fù)來消減dZ2的值偷厦,但最后對結(jié)果沒有造成影響
dW2=(1/m)*np.dot(dZ2,A1.T)
db2=(1/m)*np.sum(dZ2,keepdims=True,axis=1)
dA1=np.dot(W2.T,dZ2)
dA1=dA1*D1
dA1=dA1/keep_prob
dZ1=np.multiply(dA1,np.int64(A1>0))
dW1=(1/m)*np.dot(dZ1,X.T)
db1=(1/m)*np.sum(dZ1,keepdims=True,axis=1)
grads={"dW1":dW1,
"db1":db1,
"dW2":dW2,
"db2":db2,
"dW3":dW3,
"db3":db3
}
return grads
-------------采用隨機(jī)刪除節(jié)點來防止模型過擬合end--------------
def model(X,Y,learning_rate=0.3,num_iterations=20000,print_cost=True,is_plot=True,lambd=0,keep_prob=1):
grads={}
costs=[]
layerdims=[X.shape[0],20,3,1]
parameters= reg_utils.initialize_parameters(layerdims)
for i in range (0,num_iterations):
if(lambd==0 and keep_prob==1):
a3, cache=reg_utils.forward_propagation(X,parameters)
cost=reg_utils.compute_cost(a3,Y)
elif (lambd!=0):
a3, cache=reg_utils.forward_propagation(X,parameters)
cost=compute_cost_reg(a3,Y,parameters,lambd)
elif(keep_prob!=1):
a3, cache=forward_DelNode(X,parameters,keep_prob)
cost=reg_utils.compute_cost(a3,Y)
if(i % 1000==0 and print_cost==True):
costs.append(cost)
print("Cost="+str(cost))
if(lambd==0 and keep_prob==1):
grads=reg_utils.backward_propagation(X, Y, cache)
elif (lambd!=0):
grads=backward_propagation_reg(X, Y, cache,lambd)
elif(keep_prob!=1):
grads=backDelNode(X,Y,cache,keep_prob)
parameters=reg_utils.update_parameters(parameters, grads, learning_rate)
if(is_plot):
plt.plot(costs)
plt.xlabel('iterations (x1,000)')
plt.ylabel('cost')
plt.title("Learning rate =" + str(learning_rate))
plt.show()
return parameters
parameters=model(X,Y,learning_rate=0.3,num_iterations=30000,print_cost=True,is_plot=True,lambd=0,keep_prob=0.86)
print("訓(xùn)練集精度:")
prediction=reg_utils.predict(X,Y,parameters)
print("測試集精度:")
prediction=reg_utils.predict(test_x,test_y,parameters)
-------------畫出預(yù)測結(jié)果決策圖---------------------------
plt.title("Model without regularization")
axes = plt.gca()
axes.set_xlim([-0.75,0.40])
axes.set_ylim([-0.75,0.65])
reg_utils.plot_decision_boundary(lambda x: reg_utils.predict_dec(parameters, x.T), train_x, train_y)
--------------------實驗的結(jié)果--------------------------------------