使用python實現兩層神經網絡(HW3)
記錄一下手動實現一個兩層的全連接神經網絡,主要針對於李宏毅老師HW3的7分類問題。本來是想拿NN SVG畫一下這個結構,結果維度太高,它死機了。。(好吧,是我太菜了,,不太會用),所以用一個小的結構代替一下,就是一個簡單的FCNN
bp算法就不寫在這裏了,其他的大神都講的很不錯的。
網絡大概結構:
源代碼:
import numpy as np
import matplotlib.pyplot as plt
from keras.utils import to_categorical
# 加載笑臉圖片
file_path = r'C:/Users/Desktop/2020kaoyan/ml/2017MLSpring_Hung-yi-Lee-master/2017MLSpring_Hung-yi-Lee-master/HW3/data.csv'
with open(file_path, encoding='utf8') as f:
data = np.loadtxt(f, float, delimiter=",", skiprows=1)
label_path = r'C:/Users/Desktop/2020kaoyan/ml/2017MLSpring_Hung-yi-Lee-master/2017MLSpring_Hung-yi-Lee-master/HW3/label.csv'
with open(label_path, encoding='utf8') as labels_file:
labels = np.loadtxt(labels_file, float, delimiter=",", skiprows=1)
# 處理相應的矩陣數據
data = np.array(data, dtype=float)
labels = to_categorical(np.array(labels, dtype=float), 7)
# 求sigmoid的值
def sigmoid(input_x):
return 1 / (1 + np.exp(-input_x))
# 計算sigmoid函數的gradient值
def sigmoid_gradient(x):
return sigmoid(x) * (1 - sigmoid(x))
# 定義 softmax 函數
def softmax(x):
exps = np.exp(x - np.max(x))
return exps / np.sum(exps)
# 定義cross_entropy loss函數
def cross_entropy(x, y):
return np.sum(np.nan_to_num(-y*np.log(x)-(1-y)*np.log(1-x)))
# 初始化各個參數 神經網主要有兩層,只有一層隱藏層
def init_each_params(input_nums, hidden_nums, output_nums):
# 初試話兩層的bias
bias_one = np.random.randint(-5, 5, (hidden_nums, 1)).astype(np.float)
bias_two = np.random.randint(-5, 5, (output_nums, 1)).astype(np.float)
# 初始化兩層的weight
weight_one = np.random.randint(-5, 5, (hidden_nums, input_nums)).astype(np.float)
weight_two = np.random.randint(-5, 5, (output_nums, hidden_nums)).astype(np.float)
return bias_one, bias_two, weight_one, weight_two
# 訓練數據
def trainning(dataset, labelset, weight1, weight2, bias1, bias2):
# 設置學習率
lr = 0.02
for i in range(len(dataset)):
# feedforward pass
a_one = np.transpose(dataset[i, :]).reshape(2304, 1)
z_one = (np.matmul(weight1, a_one).astype(np.float)) + bias1
# hidden layer 輸出
a_two = sigmoid(z_one).astype(np.float)
# 輸出層的輸入
z_two = (np.matmul(weight2, a_two).astype(np.float)) + bias2
# 輸出層輸出
outputset = softmax(z_two).astype(np.float)
loss = cross_entropy(outputset, np.transpose(labelset[i, :])).astype(np.float)
# backpropgate pass
# 更新兩層誤差項,由於會出現broadcast的問題,所以直接reshape了一下
theta_out = outputset - np.transpose(labelset[i, :]).reshape(7, 1)
theta_first = sigmoid_gradient(z_one) * (np.matmul(weight2.T, theta_out))
# 更新第二層的 weight 和 bias
weight2 = weight2 - lr * (np.matmul(theta_out, a_two.T).astype(np.float))
bias2 = bias2 - lr * theta_out
# 更新第一層的 weight 和 bias
weight1 = weight1 - lr * (np.matmul(theta_first, a_one.T).astype(np.float))
bias1 = bias1 - lr * theta_first
print("The loss of %d times trainning is:%f" % (i, loss))
return weight1, weight2, bias1, bias2
# 定義初始化的參數
# 輸入層是一個 48*48 的灰白圖像的 flatten ,沒有降維,所以輸入層的dim爲2304
# 隱藏層自定義了3000個神經元,最後輸出層有7個神經元,因爲是7分類模型
# 整個網絡是模擬全連接神經網
bias_one, bias_two, weight_one, weight_two = init_each_params(2304, 3000, 7)
model_weight1, model_weight2, model_bias1, model_bias2 = trainning(data, labels, weight_one, weight_two, bias_one, bias_two)