Andrew NG機器學習 練習四 Neural Networks Learning PYTHON實現

參照大佬,完成的代碼。做一下記錄

一、 Neural Networks Learning

  •   checkNNGradients.py 
  •   computeNumericalGradient.py
  •   debugInitializeWeights.py  前三個都是用來使用梯度下降測試優化算法的結果是否正確
  •   displayData.py   可視化數據
  •   ML_Exe_04.py  主函數,實現其他函數的統籌調度
  •   predict.py   預測函數,主要算法實現
  •   sigmoid.py  s型函數及其導數計算

 

  checkNNGradients.py

from debugInitializeWeights import *
from computeNumericalGradient import *

'''
梯度檢測函數,使用梯度下降檢測得到的神經網絡參數解
是否正確
直接提供
'''
def checkNNGradients(lamb):
    #設置測試參數
    input_layer_size = 3;
    hidden_layer_size = 5;
    num_labels = 3;
    lamb = 1
    m = 5;
    sizeList = {'theta1_x': hidden_layer_size,
                'theta1_y': input_layer_size + 1,
                'theta2_x': num_labels,
                'theta2_y': hidden_layer_size + 1}  # 保存θ大小的參數
    theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size)
    theta2 = debugInitializeWeights(num_labels, hidden_layer_size)
    theta = np.r_[theta1.flatten(), theta2.flatten()]
    X = debugInitializeWeights(m, input_layer_size - 1)
    y = np.random.randint(0, num_labels, (m, 1))
    # 對y進行改寫,改爲 m*num_labels 規格的矩陣
    Y = np.zeros((m, num_labels))
    for i in range(0, m):
        Y[i, y[i, 0]] = 1
    grad = nnGradient(theta, X, Y, lamb, sizeList)
    numGrad = computeNumericalGradient(theta, X, Y, lamb, sizeList)
    diff = np.linalg.norm(numGrad - grad) / np.linalg.norm(numGrad + grad)
    print('check NN Gradient: diff = ', diff)

 

  computeNumericalGradient.py

from predict import *
import numpy as np

#數值方法計算梯度,直接提供
def computeNumericalGradient(theta, X, Y ,lamb, sizeList):
    numgrad = np.zeros(np.size(theta))
    perturb = np.zeros(np.size(theta))
    e = 1e-4
    for p in range(0, np.size(theta)):
        perturb[p] = e
        theta_minus = theta - perturb
        theta_plus = theta + perturb
        loss1 = nnCostFunction(theta_minus, X, Y, lamb, sizeList)
        loss2 = nnCostFunction(theta_plus, X, Y, lamb, sizeList)
        numgrad[p] = (loss2 - loss1) / (2 * e)
        perturb[p] = 0
    return numgrad

  debugInitializeWeights.py


import numpy as np

#測試參數的初始化
def debugInitializeWeights(L_out, L_in):
    W = np.arange(1, L_out * (L_in + 1)+1)
    W = np.sin(W)
    W = np.array(W).reshape(L_out, (L_in + 1)) / 10;
    return W

  displayData.py

import numpy as np
import matplotlib.pyplot as plt


#顯示圖片數據
def displayData(X):
    m = np.size(X, 0)  #X的行數,即樣本數量
    n = np.size(X, 1)  #X的列數,即單個樣本大小
    example_width = int(np.round(np.sqrt(n)))  #單張圖片寬度
    example_height = int(np.floor(n / example_width))  #單張圖片高度
    display_rows = int(np.floor(np.sqrt(m)))  #顯示圖中,一行多少張圖
    display_cols = int(np.ceil(m / display_rows))  #顯示圖中,一列多少張圖片
    pad = 1  #圖片間的間隔
    display_array = - np.ones((pad + display_rows * (example_height + pad),
                            pad + display_cols * (example_width + pad)))  #初始化圖片矩陣
    curr_ex = 0  #當前的圖片計數
    #將每張小圖插入圖片數組中
    for j in range(0, display_rows):
        for i in range(0, display_cols):
            if curr_ex >= m:
                break
            max_val = np.max(abs(X[curr_ex, :]))
            jstart = pad + j * (example_height + pad)
            istart = pad + i * (example_width + pad)
            display_array[jstart: (jstart + example_height), istart: (istart + example_width)] = \
                np.array(X[curr_ex, :]).reshape(example_height, example_width) / max_val
            curr_ex = curr_ex + 1
        if curr_ex >= m:
            break
    display_array = display_array.T
    plt.imshow(display_array,cmap=plt.cm.gray)
    plt.axis('off')
    plt.show()

  ML_Exe_04.py

import numpy as np
import scipy.optimize as op
import scipy.io as sio
from displayData import *
from predict import *
from checkNNGradients import *



#加載訓練集
data = sio.loadmat('ex4data1.mat')
X = data['X']
y = data['y']
#標籤數量
label_size = 10
#樣本數量
m = y.shape[0]
print(y.shape)
#對y進行改寫,改成5000*10規格的矩陣,0-9位置分別表示1,2,...,9,0
Y = np.zeros((m,label_size))
for i in range(0,m):
    Y[i,y[i,0]-1]=1
#隨機選擇數據集可視化
rand = np.random.randint(0,m,100)
sx = X[rand,...]
displayData(sx)

#初始化數據,隱藏層和輸出層theta的行數和列數
sizeList={'theta1_x':25,
          'theta1_y':401,
          'theta2_x':10,
          'theta2_y':26};
lamb = 1
nn_params = randInitializeWeights(sizeList)
#訓練模型
res = op.minimize(fun=nnCostFunction,
                  x0=nn_params,
                  args=(X,Y,lamb,sizeList),
                  method='TNC',
                  jac=nnGradient,
                  options={'maxiter':100})
print(res)

# 梯度檢測
checkNNGradients(lamb)

#計算準確率
all_theta = changeForm(res.x, sizeList['theta1_x'], sizeList['theta1_y'],
                       sizeList['theta2_x'], sizeList['theta2_y'])
res_theta1 = all_theta['Theta1']
res_theta2 = all_theta['Theta2']
pred = predict(res_theta1, res_theta2, X)
acc = np.mean(pred == y.flatten())*100
print('Accuracy:',acc,'%')

#顯示中間隱藏層
displayData(res_theta1[:, 1:])


  predict.py

import numpy as np
from sigmoid import *

#隨機初始化theta,在[-epsilon,epsilon]
def randInitializeWeights(sizeList):
    epsilon_init = 0.12
    theta1_x = sizeList['theta1_x']
    theta1_y = sizeList['theta1_y']
    theta2_x = sizeList['theta2_x']
    theta2_y = sizeList['theta2_y']
    theta_number = theta1_x*theta1_y+theta2_x*theta2_y
    #在-epsilon和epsilon中間隨機抽樣theta_number,生成ndarray
    W = np.random.uniform(-epsilon_init,epsilon_init,theta_number)
    return W

#將初始化時的向量,改寫成矩陣型theta
def changeForm(theta_vector,theta1_x,theta1_y,theta2_x,theta2_y):
    theta1 = np.array(theta_vector[0:theta1_x*theta1_y]).reshape(theta1_x,theta1_y)
    theta2 = np.array(theta_vector[theta1_x*theta1_y:theta1_x*theta1_y+theta2_x*theta2_y]).reshape(theta2_x,theta2_y)
    return {'Theta1':theta1,'Theta2':theta2}

#計算各層的unit值
def nnCostFunction(nn_params,X,Y,lamb,sizeList):
    theta = changeForm(nn_params,sizeList['theta1_x'], sizeList['theta1_y'],
                       sizeList['theta2_x'], sizeList['theta2_y'])
    theta1 = theta['Theta1']
    theta2 = theta['Theta2']
    #樣本數量
    m = Y.shape[0]
    #計算h(theta),需要走一遍神經網絡
    #第二層計算,插入偏差單元
    a1 = np.insert(X,0,values = np.ones(m),axis=1)
    a2 = sigmoid(np.dot(a1,theta1.T))
    #第三層計算,插入偏差單元
    a2 = np.insert(a2,0,values=np.ones(a2.shape[0]),axis=1)
    a3 = sigmoid(np.dot(a2,theta2.T))

    #代價函數
    J = 1 / m * np.sum(-np.multiply(Y, np.log(a3)) - np.multiply((1 - Y), np.log(1 - a3)))
    #規格化theta0不參加
    theta1_copy = theta1[:, 1:]
    theta2_copy = theta2[:, 1:]
    #規格化
    J = J + lamb / (2 * m) * (np.sum(theta1_copy ** 2) + np.sum(theta2_copy ** 2))
    return J

def nnGradient(nn_params,X,Y,lamb,sizeList):
    theta = changeForm(nn_params,sizeList['theta1_x'], sizeList['theta1_y'],sizeList['theta2_x'], sizeList['theta2_y'])
    theta1 = theta['Theta1']
    theta2 = theta['Theta2']
    #樣本數量
    m = Y.shape[0]
    #計算h(theta),需要走一遍神經網絡
    #第二層計算,插入偏差單元
    a1 = np.insert(X,0,values = np.ones(m),axis=1)
    a2 = sigmoid(np.dot(a1,theta1.T))
    #第三層計算,插入偏差單元
    a2 = np.insert(a2,0,values=np.ones(a2.shape[0]),axis=1)
    a3 = sigmoid(np.dot(a2,theta2.T))
    #規格化theta0不參加
    theta1_copy = theta1[:, 1:]
    theta2_copy = theta2[:, 1:]
    #TODO 反向傳播
    #計算δ
    delta3 = a3 - Y
    delta2 = np.multiply(np.dot(delta3, theta2_copy), sigmoidGradient(np.dot(a1, theta1.T)))
    #計算Δ
    bigDeilta1 = np.dot(delta2.T, a1)
    bigDeilta2 = np.dot(delta3.T, a2)
    #計算D
    theta1_grad = bigDeilta1 / m + lamb / m * theta1
    theta2_grad = bigDeilta2 / m + lamb / m * theta2
    theta1_grad[:, 0] = bigDeilta1[:, 0] / m
    theta2_grad[:, 0] = bigDeilta2[:, 0] / m
    #當使用高級優化方法來優化神經網絡時,需要將多個參數矩陣展開,才能傳入優化函數
    grad = np.r_[theta1_grad.flatten(), theta2_grad.flatten()]
    return grad

#使用模型進行預測
#預測
def predict(theta1,theta2,X):
    #樣本數量
    m = X.shape[0];

    #第二層計算,插入一列X0
    X = np.insert(X,0,np.ones(m),axis=1)
    #計算隱藏層unit,sigmoid作爲激活函數
    a2 = sigmoid(np.dot(X,theta1.T))

    #第三層計算,插入偏移單元
    a2 = np.insert(a2,0,np.ones(a2.shape[0]),axis=1)
    #計算輸出層unit,sigmoid作爲激活函數
    a3 = sigmoid(np.dot(a2,theta2.T))
    # 0~9====1~10
    p = a3.argmax(axis=1) +1
    #矩陣展開成向量
    return p.flatten()





  sigmoid.py

import numpy as np

#計算激勵函數
def sigmoid(z):
    return 1/(1+np.exp(-z))


#計算激勵函數(S型函數)的導數
def sigmoidGradient(z):
    return np.multiply(sigmoid(z),1-sigmoid(z))

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章