參照大佬,完成的代碼。做一下記錄
一、 Neural Networks Learning
- checkNNGradients.py
- computeNumericalGradient.py
- debugInitializeWeights.py 前三個都是用來使用梯度下降測試優化算法的結果是否正確
- displayData.py 可視化數據
- ML_Exe_04.py 主函數,實現其他函數的統籌調度
- predict.py 預測函數,主要算法實現
- sigmoid.py s型函數及其導數計算
checkNNGradients.py
from debugInitializeWeights import *
from computeNumericalGradient import *
'''
梯度檢測函數,使用梯度下降檢測得到的神經網絡參數解
是否正確
直接提供
'''
def checkNNGradients(lamb):
#設置測試參數
input_layer_size = 3;
hidden_layer_size = 5;
num_labels = 3;
lamb = 1
m = 5;
sizeList = {'theta1_x': hidden_layer_size,
'theta1_y': input_layer_size + 1,
'theta2_x': num_labels,
'theta2_y': hidden_layer_size + 1} # 保存θ大小的參數
theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size)
theta2 = debugInitializeWeights(num_labels, hidden_layer_size)
theta = np.r_[theta1.flatten(), theta2.flatten()]
X = debugInitializeWeights(m, input_layer_size - 1)
y = np.random.randint(0, num_labels, (m, 1))
# 對y進行改寫,改爲 m*num_labels 規格的矩陣
Y = np.zeros((m, num_labels))
for i in range(0, m):
Y[i, y[i, 0]] = 1
grad = nnGradient(theta, X, Y, lamb, sizeList)
numGrad = computeNumericalGradient(theta, X, Y, lamb, sizeList)
diff = np.linalg.norm(numGrad - grad) / np.linalg.norm(numGrad + grad)
print('check NN Gradient: diff = ', diff)
computeNumericalGradient.py
from predict import *
import numpy as np
#數值方法計算梯度,直接提供
def computeNumericalGradient(theta, X, Y ,lamb, sizeList):
numgrad = np.zeros(np.size(theta))
perturb = np.zeros(np.size(theta))
e = 1e-4
for p in range(0, np.size(theta)):
perturb[p] = e
theta_minus = theta - perturb
theta_plus = theta + perturb
loss1 = nnCostFunction(theta_minus, X, Y, lamb, sizeList)
loss2 = nnCostFunction(theta_plus, X, Y, lamb, sizeList)
numgrad[p] = (loss2 - loss1) / (2 * e)
perturb[p] = 0
return numgrad
debugInitializeWeights.py
import numpy as np
#測試參數的初始化
def debugInitializeWeights(L_out, L_in):
W = np.arange(1, L_out * (L_in + 1)+1)
W = np.sin(W)
W = np.array(W).reshape(L_out, (L_in + 1)) / 10;
return W
displayData.py
import numpy as np
import matplotlib.pyplot as plt
#顯示圖片數據
def displayData(X):
m = np.size(X, 0) #X的行數,即樣本數量
n = np.size(X, 1) #X的列數,即單個樣本大小
example_width = int(np.round(np.sqrt(n))) #單張圖片寬度
example_height = int(np.floor(n / example_width)) #單張圖片高度
display_rows = int(np.floor(np.sqrt(m))) #顯示圖中,一行多少張圖
display_cols = int(np.ceil(m / display_rows)) #顯示圖中,一列多少張圖片
pad = 1 #圖片間的間隔
display_array = - np.ones((pad + display_rows * (example_height + pad),
pad + display_cols * (example_width + pad))) #初始化圖片矩陣
curr_ex = 0 #當前的圖片計數
#將每張小圖插入圖片數組中
for j in range(0, display_rows):
for i in range(0, display_cols):
if curr_ex >= m:
break
max_val = np.max(abs(X[curr_ex, :]))
jstart = pad + j * (example_height + pad)
istart = pad + i * (example_width + pad)
display_array[jstart: (jstart + example_height), istart: (istart + example_width)] = \
np.array(X[curr_ex, :]).reshape(example_height, example_width) / max_val
curr_ex = curr_ex + 1
if curr_ex >= m:
break
display_array = display_array.T
plt.imshow(display_array,cmap=plt.cm.gray)
plt.axis('off')
plt.show()
ML_Exe_04.py
import numpy as np
import scipy.optimize as op
import scipy.io as sio
from displayData import *
from predict import *
from checkNNGradients import *
#加載訓練集
data = sio.loadmat('ex4data1.mat')
X = data['X']
y = data['y']
#標籤數量
label_size = 10
#樣本數量
m = y.shape[0]
print(y.shape)
#對y進行改寫,改成5000*10規格的矩陣,0-9位置分別表示1,2,...,9,0
Y = np.zeros((m,label_size))
for i in range(0,m):
Y[i,y[i,0]-1]=1
#隨機選擇數據集可視化
rand = np.random.randint(0,m,100)
sx = X[rand,...]
displayData(sx)
#初始化數據,隱藏層和輸出層theta的行數和列數
sizeList={'theta1_x':25,
'theta1_y':401,
'theta2_x':10,
'theta2_y':26};
lamb = 1
nn_params = randInitializeWeights(sizeList)
#訓練模型
res = op.minimize(fun=nnCostFunction,
x0=nn_params,
args=(X,Y,lamb,sizeList),
method='TNC',
jac=nnGradient,
options={'maxiter':100})
print(res)
# 梯度檢測
checkNNGradients(lamb)
#計算準確率
all_theta = changeForm(res.x, sizeList['theta1_x'], sizeList['theta1_y'],
sizeList['theta2_x'], sizeList['theta2_y'])
res_theta1 = all_theta['Theta1']
res_theta2 = all_theta['Theta2']
pred = predict(res_theta1, res_theta2, X)
acc = np.mean(pred == y.flatten())*100
print('Accuracy:',acc,'%')
#顯示中間隱藏層
displayData(res_theta1[:, 1:])
predict.py
import numpy as np
from sigmoid import *
#隨機初始化theta,在[-epsilon,epsilon]
def randInitializeWeights(sizeList):
epsilon_init = 0.12
theta1_x = sizeList['theta1_x']
theta1_y = sizeList['theta1_y']
theta2_x = sizeList['theta2_x']
theta2_y = sizeList['theta2_y']
theta_number = theta1_x*theta1_y+theta2_x*theta2_y
#在-epsilon和epsilon中間隨機抽樣theta_number,生成ndarray
W = np.random.uniform(-epsilon_init,epsilon_init,theta_number)
return W
#將初始化時的向量,改寫成矩陣型theta
def changeForm(theta_vector,theta1_x,theta1_y,theta2_x,theta2_y):
theta1 = np.array(theta_vector[0:theta1_x*theta1_y]).reshape(theta1_x,theta1_y)
theta2 = np.array(theta_vector[theta1_x*theta1_y:theta1_x*theta1_y+theta2_x*theta2_y]).reshape(theta2_x,theta2_y)
return {'Theta1':theta1,'Theta2':theta2}
#計算各層的unit值
def nnCostFunction(nn_params,X,Y,lamb,sizeList):
theta = changeForm(nn_params,sizeList['theta1_x'], sizeList['theta1_y'],
sizeList['theta2_x'], sizeList['theta2_y'])
theta1 = theta['Theta1']
theta2 = theta['Theta2']
#樣本數量
m = Y.shape[0]
#計算h(theta),需要走一遍神經網絡
#第二層計算,插入偏差單元
a1 = np.insert(X,0,values = np.ones(m),axis=1)
a2 = sigmoid(np.dot(a1,theta1.T))
#第三層計算,插入偏差單元
a2 = np.insert(a2,0,values=np.ones(a2.shape[0]),axis=1)
a3 = sigmoid(np.dot(a2,theta2.T))
#代價函數
J = 1 / m * np.sum(-np.multiply(Y, np.log(a3)) - np.multiply((1 - Y), np.log(1 - a3)))
#規格化theta0不參加
theta1_copy = theta1[:, 1:]
theta2_copy = theta2[:, 1:]
#規格化
J = J + lamb / (2 * m) * (np.sum(theta1_copy ** 2) + np.sum(theta2_copy ** 2))
return J
def nnGradient(nn_params,X,Y,lamb,sizeList):
theta = changeForm(nn_params,sizeList['theta1_x'], sizeList['theta1_y'],sizeList['theta2_x'], sizeList['theta2_y'])
theta1 = theta['Theta1']
theta2 = theta['Theta2']
#樣本數量
m = Y.shape[0]
#計算h(theta),需要走一遍神經網絡
#第二層計算,插入偏差單元
a1 = np.insert(X,0,values = np.ones(m),axis=1)
a2 = sigmoid(np.dot(a1,theta1.T))
#第三層計算,插入偏差單元
a2 = np.insert(a2,0,values=np.ones(a2.shape[0]),axis=1)
a3 = sigmoid(np.dot(a2,theta2.T))
#規格化theta0不參加
theta1_copy = theta1[:, 1:]
theta2_copy = theta2[:, 1:]
#TODO 反向傳播
#計算δ
delta3 = a3 - Y
delta2 = np.multiply(np.dot(delta3, theta2_copy), sigmoidGradient(np.dot(a1, theta1.T)))
#計算Δ
bigDeilta1 = np.dot(delta2.T, a1)
bigDeilta2 = np.dot(delta3.T, a2)
#計算D
theta1_grad = bigDeilta1 / m + lamb / m * theta1
theta2_grad = bigDeilta2 / m + lamb / m * theta2
theta1_grad[:, 0] = bigDeilta1[:, 0] / m
theta2_grad[:, 0] = bigDeilta2[:, 0] / m
#當使用高級優化方法來優化神經網絡時,需要將多個參數矩陣展開,才能傳入優化函數
grad = np.r_[theta1_grad.flatten(), theta2_grad.flatten()]
return grad
#使用模型進行預測
#預測
def predict(theta1,theta2,X):
#樣本數量
m = X.shape[0];
#第二層計算,插入一列X0
X = np.insert(X,0,np.ones(m),axis=1)
#計算隱藏層unit,sigmoid作爲激活函數
a2 = sigmoid(np.dot(X,theta1.T))
#第三層計算,插入偏移單元
a2 = np.insert(a2,0,np.ones(a2.shape[0]),axis=1)
#計算輸出層unit,sigmoid作爲激活函數
a3 = sigmoid(np.dot(a2,theta2.T))
# 0~9====1~10
p = a3.argmax(axis=1) +1
#矩陣展開成向量
return p.flatten()
sigmoid.py
import numpy as np
#計算激勵函數
def sigmoid(z):
return 1/(1+np.exp(-z))
#計算激勵函數(S型函數)的導數
def sigmoidGradient(z):
return np.multiply(sigmoid(z),1-sigmoid(z))