深度學習入門:基於Python的理論與實踐 之 純手工手撕數字識別nmist

開篇

其實上研究生一年了, 雖然一直從事深度學習相關項目, 但是真正的理論知識幾乎沒怎麼深入學習過, 往常的工作無非就是拿着大佬的論文和工程代碼, 拿着大佬們做好的GNN, RGIN, RDL等使用朋友TensorFlow等流行的深度學習框架, 照搬照抄, 很多細節, 參數意義, 框架結構都不是很瞭解, 正好趁着假期, 好好學習一下理論基礎, 於是找到了這本純理論, 純手工手撕鬼子的神作, 書和相關代碼點擊這裏進行下,深度學習入門:基於Python的理論與實踐

在這裏說明一些學習技巧, 可以結合CSDN公開課裏面的 <5天搞定深度學習入門系列>
, 簡直不能太開心

1 建立path_config.json

目的是這裏配置工程路徑, 使得後期開發不會有路徑上的bug
在這裏插入圖片描述

import sys, os
import json
import numpy as np
with open(r"../path_config.json" ,"r",encoding="utf-8") as f:
    config = json.loads(f.read())
    root_path = config["root_path"]
sys.path.append(root_path) # 引入根目錄
print("初始化成功")
初始化成功

2 激活函數, 即下圖的z

在這裏插入圖片描述

# 激活函數
# 多分類softmax 返回預測的概率

# y = x
def identity_function(x):
    return x

# 階躍函數
def step_function(x):
    return np.array(x > 0, dtype=np.int)

def softmax(x):
    if x.ndim == 2:
        x = x.T
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T 
    x = x - np.max(x) # 溢出對策
    return np.exp(x) / np.sum(np.exp(x))

# ReLu激活函數 返回激活值f_x 微分值d_x
def ReLU(x):
    f_x = np.maximum(0, x)
    d_x = f_x.copy()
    d_x[d_x>0] = 1
    return f_x, d_x

def relu(x):
    return np.maximum(0, x)

def relu_grad(x):
    grad = np.zeros(x)
    grad[x>=0] = 1
    return grad

# sigmoid激活函數 返回激活值sig 微分值 sig*(1-sig)
def sigmoid(x):
    return 1 / (1 + np.exp(-x))  

def sigmoid_grad(x):
    return (1.0 - sigmoid(x)) * sigmoid(x)

# tanh激活函數 返回激活值tan 微分值 1-np.square(tan)
def tanha(x):
    tan=(np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
    return tan,1-np.square(tan)

3 損失函數

# 損失函數
y = np.array([0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0])
t = np.array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0])

# 均方誤差
def mean_squared_error(y, t):
    return 0.5 * np.sum((y-t)**2)
print("均方誤差計算: ", mean_squared_error(y,t)) # 0.09750000000000003

# 交叉熵誤差
def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
    # 監督數據是one-hot-vector的情況下,轉換爲正確解標籤的索引
    if t.size == y.size:
        t = t.argmax(axis=1)
    batch_size = y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
print("cross_entropy_error交叉熵誤差計算: ",cross_entropy_error(y,t)) # 0.510825457099338

def softmax_loss(X, t):
    y = softmax(X)
    return cross_entropy_error(y, t)
print("softmax_loss誤差計算: ",softmax_loss(y,t))
均方誤差計算:  0.09750000000000003
cross_entropy_error交叉熵誤差計算:  0.510825457099338
softmax_loss誤差計算:  1.8194936854234711

4 梯度

# 梯度
# 梯度實現 ,參數f爲函數,x爲NumPy數組, 注意輸入的應該是浮點型
def numerical_gradient(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x) # 生成和x形狀相同的數組
    for idx in range(len(x)):  # 作者這塊使用的是x.size, 但是x.size針對的是一維列表, 修改爲len(x)可以針對於多維列表進行np的操作
        tmp_val = x[idx]
        # f(x+h)的計算
        x[idx] = tmp_val + h
        fxh1 = f(x)
        # f(x-h)的計算
        x[idx] = tmp_val - h
        fxh2 = f(x)
        grad[idx] = (fxh1 - fxh2) / (2*h)
        x[idx] = tmp_val # 還原值
    return grad

# def numerical_gradient(f, x):
#     h = 1e-4 # 0.0001
#     grad = np.zeros_like(x)
#     it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
#     while not it.finished:
#         idx = it.multi_index
#         tmp_val = x[idx]
#         x[idx] = float(tmp_val) + h
#         fxh1 = f(x) # f(x+h)
#         x[idx] = tmp_val - h 
#         fxh2 = f(x) # f(x-h)
#         grad[idx] = (fxh1 - fxh2) / (2*h)
#         x[idx] = tmp_val # 還原值
#         it.iternext()   
#     return grad
def function_2(x):
    return np.sum(x**2)

r = numerical_gradient(function_2, np.array([[3.0, 4.0], [2.0,3.0],[2.0,3.0]])) #array([6., 8.])
print(r)

r = numerical_gradient(function_2, np.array([3.0,4.0])) #array([6., 8.])
print(r)

r = numerical_gradient(function_2, np.array([[2.0,3.0,4.0],[2.0,3.0,2.0], [4.0,5.0,6.0]])) #array([6., 8.])
print(r)
[[7.0001 7.0001]
 [5.0001 5.0001]
 [5.0001 5.0001]]
[6. 8.]
[[ 9.00015  9.00015  9.00015]
 [ 7.00015  7.00015  7.00015]
 [15.00015 15.00015 15.00015]]

5 神經網絡模型

# 神經網絡兩層
class TwoLayerNet():
    def __init__(self, layers=[784,50,10], seed=200, weight_init_std=0.01): #  
        # 初始化權重
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(layers[0], layers[1])
        self.params['b1'] = np.zeros(layers[1])
        self.params['W2'] = weight_init_std * np.random.randn(layers[1], layers[2])
        self.params['b2'] = np.zeros(layers[2])
        
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        # 前向傳播
        z1 = sigmoid( np.dot(x, W1) + b1 )
        y = softmax( np.dot(z1,W2) + b2 )        
        return y
    
     # x:輸入數據, t:監督數據
    def loss(self, x, t):
        y = self.predict(x)
        return cross_entropy_error(y, t) # 交叉熵計算
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
    
    # x:輸入數據, t:監督數據
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        return grads
    
    def gradient(self, x, t): # 梯度下降法高級版本
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        grads = {}
        batch_num = x.shape[0]
        # forward
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid( a1 )
        a2 = np.dot(z1, W2) + b2
        y = softmax( a2 )
        # backward
        dy = (y - t) / batch_num
        grads['W2'] = np.dot(z1.T, dy)
        grads['b2'] = np.sum(dy, axis=0)
        da1 = np.dot(dy, W2.T)
        dz1 = sigmoid_grad(a1) * da1
        grads['W1'] = np.dot(x.T, dz1)
        grads['b1'] = np.sum(dz1, axis=0)
        return grads    
    
    def train(self, x_train, t_train, x_test, t_test, epochs=10000, batch_size=100, learning_rate=0.1):
        train_size = x_train.shape[0]
        train_loss_list = []
        train_acc_list = []
        test_acc_list = []

        iter_per_epoch = max(train_size / batch_size, 1)

        for i in range(epochs):
            batch_mask = np.random.choice(train_size, batch_size)
            x_batch = x_train[batch_mask]
            t_batch = t_train[batch_mask]
            
            # 計算梯度
            #grad = self.numerical_gradient(x_batch, t_batch)
            grad = self.gradient(x_batch, t_batch) #這裏面首先使用x預測到y, y與t得到loss, 在用loss得到梯度

            # 更新參數
            for key in ('W1', 'b1', 'W2', 'b2'):
                self.params[key] -= learning_rate * grad[key]
            loss = self.loss(x_batch, t_batch)
            train_loss_list.append(loss)

            if i % iter_per_epoch == 0:
                train_acc = self.accuracy(x_train, t_train) #計算所有的訓練數據的準確率
                test_acc = self.accuracy(x_test, t_test) # 計算所有的測試數據的準確率
                train_acc_list.append(train_acc) #這兩句話目的是保存數據用來後面的可視化
                test_acc_list.append(test_acc)
                print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))

        return train_loss_list, train_acc_list, test_acc_list


6 主函數

# 開始訓練
import numpy as np
from data_set.mnist import load_mnist
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label = True) # 數據
network = TwoLayerNet()
train_loss_list,train_acc_list,test_acc_list = network.train(x_train, t_train, x_test, t_test)
(10000, 784)
<class 'numpy.ndarray'>
train acc, test acc | 0.09863333333333334, 0.0958
train acc, test acc | 0.7864833333333333, 0.7901
train acc, test acc | 0.8757833333333334, 0.8787
train acc, test acc | 0.8993166666666667, 0.9018
train acc, test acc | 0.9092166666666667, 0.9125
train acc, test acc | 0.9160166666666667, 0.9172
train acc, test acc | 0.92075, 0.9241
train acc, test acc | 0.9245166666666667, 0.9264
train acc, test acc | 0.9291, 0.9308
train acc, test acc | 0.932, 0.9332
train acc, test acc | 0.9350833333333334, 0.9349
train acc, test acc | 0.9381166666666667, 0.9383
train acc, test acc | 0.9407666666666666, 0.9405
train acc, test acc | 0.9432166666666667, 0.9434
train acc, test acc | 0.9443333333333334, 0.9441
train acc, test acc | 0.9464666666666667, 0.9461
train acc, test acc | 0.9483833333333334, 0.9471

7 數據結果處理

import matplotlib.pyplot as plt
%matplotlib inline
# 繪製圖形
markers = {'train': 'o', 'test': 's'}
x = np.arange(len(train_acc_list))
plt.plot(x, train_acc_list, label='train acc')
plt.plot(x, test_acc_list, label='test acc', linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()

準確率顯示

在這裏插入圖片描述

import matplotlib.pyplot as plt
%matplotlib inline
# 繪製圖形
markers = {'train': 'o', 'test': 's'}
x = np.arange(len(train_loss_list))
plt.plot(x, train_loss_list, label='train acc')
plt.xlabel("epochs")
plt.ylabel("loss")
plt.ylim(0, 3.0)
plt.legend(loc='lower right')
plt.show()

loss分佈顯示

在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章