為了證明BatchNormalization的作用,自己寫了一個(gè)二次函數(shù)回歸的小程序,從loss圖上可以看出,確實(shí)是有BatchNormalization層時(shí)收斂的速度更快,另外本程序還使用了函數(shù)模型的寫法,關(guān)于BatchNormalization的理論可以參考:https://arxiv.org/pdf/1502.03167v3.pdf
#coding:utf-8
from keras.layers import Dense
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Model,Sequential
from keras.layers import Input,Dense,Activation,BatchNormalization
from keras.utils import plot_model
x_data = np.linspace(-5,5,3000)
np.random.shuffle(x_data)
noise = np.random.normal(0,1,x_data.shape)
y_data = x_data**2+5+noise
x_train = x_data[:2500]
y_train = y_data[:2500]
# plt.figure()
plt.scatter(x_train,y_train,marker='.')
# plt.close(1)
x_test = x_data[2500:]
y_test = y_data[2500:]
inputs = Input(shape=(1,))
x=Dense(3)(inputs)
x=BatchNormalization(axis=-1)(x)
x = Activation('sigmoid')(x)
x=Dense(3)(x)
x=BatchNormalization(axis=-1)(x)
x = Activation('sigmoid')(x)
x=Dense(3)(x)
outputs = Dense(1)(x)
model = Model(inputs=inputs,outputs=outputs)
plot_model(model,to_file='model.png',show_shapes=1)
model.compile(optimizer='sgd',loss='mse')
print('Train------------')
sum_cost=[]
for step in range(1001):
cost = model.train_on_batch(x_train,y_train)
sum_cost=np.append(sum_cost,cost)
if step%100==0:
print('cost=',cost)
plt.plot(range(1001),sum_cost)
plt.show()
#用預(yù)測(cè)的模型來(lái)預(yù)測(cè)y_test
loss = model.test_on_batch(x_test,y_test)
y_test = model.predict(x_test)
plt.figure()
plt.scatter(x_train,y_train,c='b',marker='.')
plt.scatter(x_test,y_test,c='r',marker='.')
plt.show()
-
有batchnormalization
圖片.png -
沒(méi)有batchnormalization
圖片.png 擬合效果如下:
Figure_3.png