機器學習ex4:neural networks learing(python)

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.io import loadmat
import scipy.optimize as opt

data = loadmat('ex4data1.mat')
weight = loadmat('ex4weights')
X = data['X']
Y = data['y']
theta1 = weight['Theta1']
theta2 = weight['Theta2']
theta = np.vstack((theta1.reshape(-1,1),theta2.reshape(-1,1)))

def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def hypothesis(theta, X):
    theta = theta.T
    h = X.dot(theta)
    return h


def g_function(a):
    return sigmoid(a)


def forward(theta, X):
    theta1 = theta[0:25 * 401].reshape(25, 401)
    theta2 = theta[25 * 401:10285].reshape(10, 26)
    m = X.shape[0]
    ones = np.ones((m, 1))
    a1 = np.concatenate([ones, X], axis=1)
    z2 = hypothesis(theta1, a1)
    a2 = np.concatenate([ones, g_function(z2)], axis=1)
    z3 = hypothesis(theta2, a2)
    a3 = g_function(z3)
    return a1, z2, a2, z3, a3


def cost(theta, X, Y, k):
    theta1 = theta[0:25 * 401].reshape(25, 401)
    theta2 = theta[25 * 401:10285].reshape(10, 26)
    m = X.shape[0]
    a1, z2, a2, z3, a3 = forward(theta, X)
    classY = np.zeros((m, k))
    for i in range(1, k + 1):
        classY[:, i - 1] = np.array([1 if i == lable else 0 for lable in Y])
    j = np.sum((-1) * np.multiply(classY, np.log(a3)) - np.multiply(1 - classY, np.log(1 - a3))) / m
    return j


# print(cost(theta,X,Y,k=10))
# 0.2876291651613189

def regularized_cost(theta, X, Y, k=10 , lam = 1):
    theta1 = theta[0:25 * 401].reshape(25, 401)
    theta2 = theta[25 * 401:10285].reshape(10, 26)
    m = X.shape[0]
    j = cost(theta, X, Y, k)
    _theta1 = theta1.copy()
    _theta1[:, 0] = 0
    _theta2 = theta2.copy()
    _theta2[:, 0] = 0
    reg1 = np.sum(_theta1**2)
    reg2 = np.sum(_theta2**2)
    return j + lam*(reg1+reg2)/(2*m)

# print(regularized_cost(theta, X, Y, k=10 , lam = 1))
# 0.38376985909092365

def sigmoid_gradient(z):
    g = sigmoid(z)
    return g*(1-g)
# print(sigmoid_gradient(0))
# 0.25

def backpropagation(theta, X, Y, k=10, lam = 1):
    theta1 = theta[0:25 * 401].reshape(25, 401)
    theta2 = theta[25 * 401:10285].reshape(10, 26)
    m = X.shape[0]
    a1, z2, a2, z3, a3 = forward(theta, X)
    delta3 = np.zeros((m,k))
    delta2 = np.zeros((m,z2.shape[1]))
    classY = np.zeros((m, k))
    for i in range(1, k + 1):
        classY[:, i - 1] = np.array([1 if i == lable else 0 for lable in Y])
    delta3 = a3 - classY
    delta2 = np.multiply( delta3.dot( theta2[:,1:theta2.shape[1]]),sigmoid_gradient(z2))
    D2 = np.dot(delta3.T, a2) / m
    D1 = np.dot(delta2.T,a1) / m
    return D1,D2

# print(backpropagation(theta, X, Y, k = 10  , lam = 1) .shape)
# ((25, 401), (10, 26))

def gradient_checking(theta,X,Y, epsilon):
    grad = backpropagation(theta, X, Y, k=10, lam=1)
    num_grad = np.zeros((theta.shape[0]))
    e = np.zeros((theta.shape[0]))
    for i in range(theta.shape[0]):
        e[i] = epsilon
        j1 = cost(theta+e[i].reshape(-1,1), X, Y, k=10)
        j2 = cost(theta-e[i].reshape(-1,1),X, Y, k=10)
        num_grad = (j1-j2)/(2*epsilon)
        e[i] = 0
    diff = np.linalg.norm(num_grad - grad) / np.linalg.norm(num_grad+grad)
    print('Relative Difference: {}\n'.format(diff))

#跑了40min
# gradient_checking(theta,X,Y,epsilon=1e-4)
# Relative Difference: 0.9998057514099036


def regularized_gradient(theta, X, Y, k=10 , lam = 1):
    D1, D2 = backpropagation(theta, X, Y, k=10, lam = 1)
    theta1 = theta[0:25 * 401].reshape(25, 401)
    theta2 = theta[25 * 401:10285].reshape(10, 26)
    m = X.shape[0]
    j = cost(theta, X, Y, k)
    _theta1 = theta1.copy()
    _theta1[:,0] = 0
    _theta2 = theta2.copy()
    _theta2[:,0] = 0
    D1 = D1+ (lam/m)*_theta1
    D2 = D2+ (lam/m)*_theta2
    return np.vstack((D1.reshape(-1,1),D2.reshape(-1,1)))

#帶正則化項的梯度檢測
def gradient_checking(theta,X,Y, epsilon):
    grad = regularized_gradient(theta, X, Y, k=10, lam=1)
    num_grad = np.zeros((theta.shape[0]))
    e = np.zeros((theta.shape[0]))
    for i in range(theta.shape[0]):
        e[i] = epsilon
        j1 = regularized_cost(theta+e[i].reshape(-1,1), X, Y, k=10,lam = 1)
        j2 = regularized_cost(theta-e[i].reshape(-1,1),X, Y, k=10,lam = 1)
        num_grad = (j1-j2)/(2*epsilon)
        e[i] = 0
    diff = np.linalg.norm(num_grad - grad) / np.linalg.norm(num_grad+grad)
    print('Relative Difference: {}\n'.format(diff))

# gradient_checking(theta,X,Y, epsilon = 1e-4)
# Relative Difference: 0.9998060640429633


def random_initialization(L_out, L_in ):
    w = np.zeros((L_out,L_in+1))
    epsilon_init = 0.12
    w = np.random.rand(L_out,1+L_in) *2 * epsilon_init - epsilon_init
    return w

initial_theta1 = random_initialization(25,400)
initial_theta2 = random_initialization(10,25)
initial_theta = np.vstack((initial_theta1.reshape(-1,1),initial_theta2.reshape(-1,1)))

res = opt.minimize(fun=regularized_cost,
                   x0=initial_theta, args=(X, Y,10, 1), method='tnc', jac=regularized_gradient)

# print(res)
# fun: 0.2998890271866287
# jac: array([[1.43661603e-07],
#             [-1.89136207e-16],
#             [-9.27484145e-14],
#             ...,
#             [5.57536194e-08],
#             [1.27154957e-07],
#             [1.72745129e-07]])
# message: 'Converged (|f_n-f_(n-1)| ~= 0)'
# nfev: 4136
# nit: 171
# status: 1
# success: True
# x: array([-1.15034591e+00, -9.45681033e-13, -4.63742073e-10, ...,
#           1.61928813e+00, 2.07152753e+00, 4.04669213e-01])

final_theta = res.x

def predict(X):
    m = X.shape[0]
    p = np.zeros(m)
    p = np.argmax(X,axis=1)
    return p+1

a1, z2, a2, z3, a3 = forward(theta, X)
m = X.shape[0]
y_pre = predict(a3)
correct = np.ones(m)
for i in range(m):
    if y_pre[i]==Y[i]:
        correct[i] = 1
    else:
        correct[i] = 0
accuracy = sum(correct) / len(a3)
# print(accuracy)
# 0.9752

def plot_hidden_layer(theta):
    final_theta1 = final_theta[0:25 * 401].reshape(25, 401)
    final_theta2 = final_theta[25 * 401:10285].reshape(10, 26)
    hidden_layer = final_theta1[:, 1:]
    fig, ax_array = plt.subplots(nrows=5, ncols=5, sharey=True, sharex=True, figsize=(5, 5))
    for r in range(5):
        for c in range(5):
            ax_array[r, c].matshow(hidden_layer[5 * r + c].reshape((20, 20)),
                                   cmap=matplotlib.cm.binary)
            plt.xticks(np.array([]))
            plt.yticks(np.array([]))

plot_hidden_layer(final_theta)
plt.show()

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章