import numpy as np
def main():
# 2層神經網絡,64個輸入1000維,1個隱藏層100維,輸出10維
N, D_in, H, D_out = 64, 1000, 100, 10
# 隨機初始化樣本
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)
# 隨機初始化權重
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)
# 學習率一般選擇1e-6~1e-4
learning_rate = 1e-6
# 循環迭代學習500次
for t in range(500):
# forward pass
h = x.dot(w1) # N * H
h_relu = np.maximum(h,0) # N * H
y_pred = h_relu.dot(w2) # H * D_out
# compute loss
loss = np.square(y_pred - y).sum()
print(t, loss)
# backward pass
# compute the gradient
grad_y_pred = 2.0 * (y_pred - y)
grad_w2 = h_relu.T.dot(grad_y_pred)
grad_h_relu = grad_y_pred.dot(w2.T)
grad_h = grad_h_relu.copy()
grad_h[h<0] = 0
grad_w1 = x.T.dot(grad_h)
# update weights of w1 and w2
w1 -= learning_rate * grad_w1
w2 -= learning_rate * grad_w2
# 使用訓練好的數據進行預測
h = x.dot(w1)
h_relu = np.maximum(h, 0)
y_pred = h_relu.dot(w2)
# 打印出預測值與實際值的誤差
print(y - y_pred)
if __name__ == '__main__':
main()
經過500次迭代,損失逐漸減小
490 6.488345724242929e-07
491 6.189309834083105e-07
492 5.904337869152525e-07
493 5.632434702894173e-07
494 5.373005118562679e-07
495 5.125660475282727e-07
496 4.889721590543996e-07
497 4.6647564933752185e-07
498 4.4502716144784435e-07
499 4.245642390520534e-07
進行預測測試,部分結果如下,可見誤差已經非常小了
[-4.05827834e-06 -2.08487774e-05 -1.08563393e-05 2.56239401e-05
1.21532941e-05 8.19823028e-06 -3.31557266e-05 -5.22064015e-06
-2.07082664e-05 -9.00997895e-06]
[ 7.89988666e-06 6.81363721e-06 -1.28522000e-06 -1.39193166e-05
-1.49279614e-05 1.09177779e-05 2.73508543e-05 -3.20825669e-06
-6.57302516e-07 4.38394831e-06]]