深度學習入門(八):完整地實現全連接層並進行手寫數字識別

本文爲《深度學習入門 基於Python的理論與實現》的部分讀書筆記
代碼以及圖片均參考此書

全連接多層神經網絡類的實現

import sys
file_path = __file__.replace('\\', '/')
dir_path = file_path[: file_path.rfind('/')] # 當前文件夾的路徑
pardir_path = dir_path[: dir_path.rfind('/')]
sys.path.append(pardir_path) # 添加上上級目錄到python模塊搜索路徑

import numpy as np
from func.gradient import numerical_gradient, gradient_check
from layer.common import *
from collections import OrderedDict
import os
import pickle

class MultiLayerNet:
    """
    Parameters
    ----------
    input_size : 輸入大小(MNIST的情況下爲784)
    hidden_size_list : 隱藏層的神經元數量的列表(e.g. [100, 100, 100])
    output_size : 輸出大小(MNIST的情況下爲10)
    activation : 'relu' or 'sigmoid'
    weight_init_std : 指定權重的標準差(e.g. 0.01)
        指定'relu'或'he'的情況下設定“He的初始值”
        指定'sigmoid'或'xavier'的情況下設定“Xavier的初始值”
    weight_decay_lambda : Weight Decay(L2範數)的強度 權值衰減抑制過擬合
    use_dropout: 是否使用Dropout,如果使用的話則在每個全連接層(除了最後一層)的激活層之後都插入Dropout層
    dropout_ration : Dropout的比例(暫不支持對每個dropout層採用不同的dropout_ration)
    use_batchNorm: 是否使用Batch Normalization,如果使用的話則每個全連接層(除了最後一層)後都跟一個BN層後再接激活函數層
    """
    def __init__(self, input_size, hidden_size_list, output_size,
                 activation='relu', weight_init_std='relu', weight_decay_lambda=0, 
                 use_dropout=False, dropout_ration=0.5, use_batchnorm=False, 
                 pretrain_flag=True, pkl_file_name=None):
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size_list = hidden_size_list
        self.hidden_layer_num = len(hidden_size_list)
        self.use_dropout = use_dropout
        self.weight_decay_lambda = weight_decay_lambda
        self.use_batchnorm = use_batchnorm
        self.pkl_file_name = pkl_file_name
        self.params = {}

        if pretrain_flag == 1 and os.path.exists(self.pkl_file_name):
            self.load_pretrain_model()
        else:
            # 初始化權重
            self.__init_weight(weight_init_std)

            # 生成層
            activation_layer = {'sigmoid': Sigmoid, 'relu': Relu}
            self.layers = OrderedDict()
            for idx in range(1, self.hidden_layer_num + 1):
                self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)],
                                                        self.params['b' + str(idx)])
                if self.use_batchnorm:
                    self.params['gamma' + str(idx)] = np.ones(hidden_size_list[idx-1])
                    self.params['beta' + str(idx)] = np.zeros(hidden_size_list[idx-1])
                    self.layers['BatchNorm' + str(idx)] = BatchNormalization(self.params['gamma' + str(idx)], self.params['beta' + str(idx)])
                    
                self.layers['Activation_function' + str(idx)] = activation_layer[activation]()
                
                if self.use_dropout:
                    self.layers['Dropout' + str(idx)] = Dropout(dropout_ration)

            # 輸出層
            idx = self.hidden_layer_num + 1
            self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)])

            self.last_layer = SoftmaxWithLoss()

    def load_pretrain_model(self):
        with open(self.pkl_file_name, 'rb') as f:
            model = pickle.load(f)
            for key in ('params', 'layers', 'last_layer'):
                exec('self.' + key + '=model.' + key)
            print('params loaded!')

    def __init_weight(self, weight_init_std):
        """設定權重的初始值

        Parameters
        ----------
        weight_init_std : 指定權重的標準差(e.g. 0.01)
            指定'relu'或'he'的情況下設定“He的初始值”
            指定'sigmoid'或'xavier'的情況下設定“Xavier的初始值”
        """
        all_size_list = [self.input_size] + self.hidden_size_list + [self.output_size]
        for idx in range(1, len(all_size_list)):
            scale = weight_init_std
            if str(weight_init_std).lower() in ('relu', 'he'):
                scale = np.sqrt(2.0 / all_size_list[idx - 1])  # 使用ReLU的情況下推薦的初始值
            elif str(weight_init_std).lower() in ('sigmoid', 'xavier'):
                scale = np.sqrt(1.0 / all_size_list[idx - 1])  # 使用sigmoid的情況下推薦的初始值
            self.params['W' + str(idx)] = scale * np.random.randn(all_size_list[idx-1], all_size_list[idx])
            self.params['b' + str(idx)] = np.zeros(all_size_list[idx])

    def predict(self, x, train_flg=False):
        for key, layer in self.layers.items():
            if "Dropout" in key or "BatchNorm" in key:
                x = layer.forward(x, train_flg)
            else:
                x = layer.forward(x)

        return x

    def loss(self, x, t, train_flg=False):
        y = self.predict(x, train_flg)

        weight_decay = 0
        for idx in range(1, self.hidden_layer_num + 2): # 對隱藏層以及輸出層進行權值衰減
            W = self.params['W' + str(idx)]
            weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)

        return self.last_layer.forward(y, t) + weight_decay

    def accuracy(self, X, T):
        Y = self.predict(X, train_flg=False)
        Y = np.argmax(Y, axis=1)
        if T.ndim != 1: 
            T = np.argmax(T, axis=1)

        accuracy = np.sum(Y == T) / float(X.shape[0])
        return accuracy

    def numerical_gradient(self, X, T):
        """求梯度(數值微分)
        Returns
        -------
        具有各層的梯度的字典變量
            grads['W1']、grads['W2']、...是各層的權重
            grads['b1']、grads['b2']、...是各層的偏置
        """
        loss_W = lambda W: self.loss(X, T, train_flg=True)

        grads = {}
        for idx in range(1, self.hidden_layer_num+2):
            grads['W' + str(idx)] = numerical_gradient(loss_W, self.params['W' + str(idx)])
            grads['b' + str(idx)] = numerical_gradient(loss_W, self.params['b' + str(idx)])
            
            if self.use_batchnorm and idx != self.hidden_layer_num+1:
                grads['gamma' + str(idx)] = numerical_gradient(loss_W, self.params['gamma' + str(idx)])
                grads['beta' + str(idx)] = numerical_gradient(loss_W, self.params['beta' + str(idx)])

        return grads
        
    def gradient(self, x, t):
        # forward
        self.loss(x, t, train_flg=True)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)
        for layer_name in reversed(self.layers):
            dout = self.layers[layer_name].backward(dout)

        # 設定
        grads = {}
        for idx in range(1, self.hidden_layer_num+2):
            grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW + self.weight_decay_lambda * self.params['W' + str(idx)]
            grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db

            if self.use_batchnorm and idx != self.hidden_layer_num+1:
                grads['gamma' + str(idx)] = self.layers['BatchNorm' + str(idx)].dgamma
                grads['beta' + str(idx)] = self.layers['BatchNorm' + str(idx)].dbeta

        return grads

訓練類的實現

import sys
file_path = __file__.replace('\\', '/')
dir_path = file_path[: file_path.rfind('/')] # 當前文件夾的路徑
pardir_path = dir_path[: dir_path.rfind('/')]
sys.path.append(pardir_path) # 添加上上級目錄到python模塊搜索路徑

import numpy as np
from optimizer.optimizer import *
import pickle, shelve
import os
import matplotlib.pyplot as plt

class Trainer:
    """
    進行神經網絡的訓練的類
    evaluate_sample_num_per_epoch爲每個epoch結束後計算模型在訓練集與測試集上精度時要測試的樣本數量,默認爲測試全部樣本
    """
    def __init__(self, network, x_train, t_train, x_test, t_test,
                 epochs=20, mini_batch_size=100, optimizer='SGD', optimizer_param={'lr':0.01},
                 save_model_flag=True, pkl_file_name=None, plot_flag=True, fig_name=None,
                 evaluate_sample_num_per_epoch=None, verbose=True):
        self.network = network
        self.verbose = verbose
        self.x_train = x_train
        self.t_train = t_train
        self.x_test = x_test
        self.t_test = t_test
        self.epochs = epochs
        self.save_model_flag = save_model_flag
        self.pkl_file_name = pkl_file_name
        self.plot_flag = plot_flag
        self.fig_name = fig_name
        self.best_loss = 1e10 # 模型在訓練集上的最好表現
        self.batch_size = mini_batch_size
        self.evaluate_sample_num_per_epoch = evaluate_sample_num_per_epoch

        # optimzer
        optimizer_class_dict = {'sgd':SGD, 'momentum':Momentum, 'nesterov':Nesterov,
                                'adagrad':AdaGrad, 'rmsprpo':RMSprop, 'adadelta':AdaDelta, 'adam':Adam}
        self.optimizer = optimizer_class_dict[optimizer.lower()](**optimizer_param)
        
        self.train_size = x_train.shape[0]
        self.iter_per_epoch = max(self.train_size / mini_batch_size, 1)
        self.max_iter = int(epochs * self.iter_per_epoch)
        self.current_iter = 0
        self.current_epoch = 0
        
        self.train_loss_list = []
        self.train_acc_list = []
        self.test_acc_list = []

    def train_step(self):
        batch_mask = np.random.choice(self.train_size, self.batch_size)
        x_batch = self.x_train[batch_mask]
        t_batch = self.t_train[batch_mask]
        
        grads = self.network.gradient(x_batch, t_batch)
        self.optimizer.update(self.network.params, grads)
        
        loss = self.network.loss(x_batch, t_batch)
        self.train_loss_list.append(loss)
        if self.verbose: 
            print("train loss:" + str(loss))
        
        if self.current_iter % self.iter_per_epoch == 0 or self.current_iter == self.max_iter - 1:
            x_train_sample, t_train_sample = self.x_train, self.t_train
            x_test_sample, t_test_sample = self.x_test, self.t_test
            if not self.evaluate_sample_num_per_epoch is None:
                t = self.evaluate_sample_num_per_epoch
                x_train_sample, t_train_sample = self.x_train[:t], self.t_train[:t]
                x_test_sample, t_test_sample = self.x_test[:t], self.t_test[:t]
                
            train_acc = self.network.accuracy(x_train_sample, t_train_sample)
            test_acc = self.network.accuracy(x_test_sample, t_test_sample)
            self.train_acc_list.append(train_acc)
            self.test_acc_list.append(test_acc)

            if self.verbose: 
                print("=== epoch:" + str(self.current_epoch) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc) + " ===")
            
            if self.save_model_flag == True and self.current_epoch > 0:
                current_loss = self.network.loss(x_test_sample, t_test_sample)
                if self.best_loss > current_loss:
                    self.best_loss = current_loss
                    with open(self.pkl_file_name, 'wb') as f:
                        pickle.dump(self.network, f)
                        if self.verbose: 
                            print('net params saved!')

            self.current_epoch += 1

        self.current_iter += 1

    def plot_acc_loss_list(self):
        fig, axes = plt.subplots(1, 2)

        x = np.arange(len(self.train_acc_list))
        axes[0].plot(x, self.train_acc_list, 'r', label='train acc')
        axes[0].plot(x, self.test_acc_list, 'g--', label='test acc')
        
        axes[0].set_xlabel("epochs")
        axes[0].set_ylabel("accuracy")
        axes[0].set_ylim(0, 1.0)
        axes[0].legend(loc='best')

        x = np.arange(len(self.train_loss_list))
        axes[1].plot(x, self.train_loss_list, 'r', label='train loss')
        axes[1].set_xlabel("iters")
        axes[1].set_ylabel("loss")
        axes[1].legend(loc='best')

        plt.savefig(self.fig_name)
        print('fig {0} saved!'.format(self.fig_name))

    def train(self):
        for i in range(self.max_iter):
            self.train_step()

        test_acc = self.network.accuracy(self.x_test, self.t_test)

        if self.verbose:
            print("=============== Final Test Accuracy ===============")
            print("test acc:" + str(test_acc))

        if self.plot_flag:
            self.plot_acc_loss_list()

MNIST數據集

數據集簡介

這裏使用的數據集是MNIST手寫數字圖像集。MNIST是機器學習領域最有名的數據集之一,被應用於從簡單的實驗到發表的論文研究等各種場合。MNIST數據集是由0到9的數字圖像構成的(圖3-24)。訓練圖像有6萬張,測試圖像有1萬張
在這裏插入圖片描述
MNIST的圖像數據是28像素× 28像素的灰度圖像(1 通道),各個像素的取值在0到255之間。每個圖像數據都相應地標有“7”“2”“1”等標籤。數據集中的每張圖片都事先經過了大小歸一化和居中處理,因此需注意用該數據集訓練出的網絡在預測手寫數字時圖片也須經過大小歸一化和居中處理

數據集下載及預處理

  • 數據集下載地址:http://yann.lecun.com/exdb/mnist/
    下載其中的4個文件:
    在這裏插入圖片描述
    將下載下的文件與下面讀取數據集的代碼放在同一個文件夾下即可
import gzip
import pickle
import numpy as np
import os
from PIL import Image

IMG_SIZE = 784 # 28*28

key_file = {
    'train_img':'train-images-idx3-ubyte.gz.gz',
    'train_label':'train-labels-idx1-ubyte.gz',
    'test_img':'t10k-images-idx3-ubyte.gz.gz',
    'test_label':'t10k-labels-idx1-ubyte.gz'
}

file_path = __file__.replace('\\', '/')
dataset_path = file_path[: file_path.rfind('/')] # 當前文件夾的路徑
save_file = dataset_path + "/mnist.pkl"

def _load_label(file_name):
    file_path = dataset_path + '/' + file_name
    
    with gzip.open(file_path, 'rb') as f:
        labels = np.frombuffer(f.read(), np.uint8, offset=8)
    print(file_name, "loaded")
    
    return labels

def _load_img(file_name):
    file_path = dataset_path + '/' + file_name
    
    with gzip.open(file_path, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
    data = data.reshape(-1, IMG_SIZE)
    print(file_name, "loaded")
    
    return data

def _convert_numpy():
    dataset = {}
    dataset['train_img'] =  _load_img(key_file['train_img'])
    dataset['train_label'] = _load_label(key_file['train_label'])    
    dataset['test_img'] = _load_img(key_file['test_img'])
    dataset['test_label'] = _load_label(key_file['test_label'])
    
    return dataset

def init_mnist():
    dataset = _convert_numpy()
    with open(save_file, 'wb') as f:
        pickle.dump(dataset, f, -1)
    print("Done!")

def _change_one_hot_label(x):
    t = np.zeros((x.size, 10))
    for idx, row in enumerate(t):
        row[x[idx]] = 1
        
    return t

def shuffle_dataset(x, t):
    permutation = np.random.permutation(x.shape[0])
    x = x[permutation, :] if x.ndim == 2 else x[permutation,:,:,:]
    t = t[permutation]

    return x, t

def load_mnist(normalize=True, flatten=False, one_hot_label=True, shuffle_data=True):
    """讀入MNIST數據集
    
    Parameters
    ----------
    normalize : 將圖像的像素值正規化爲0.0~1.0
    one_hot_label : 
        one_hot_label爲True的情況下,標籤作爲one-hot數組返回
        one-hot數組是指[0,0,1,0,0,0,0,0,0,0]這樣的數組
    flatten : 是否將圖像展開爲一維數組
    shuffle_data : 是否打亂訓練集
    
    Returns
    -------
    (訓練圖像, 訓練標籤), (測試圖像, 測試標籤)
    """
    if not os.path.exists(save_file):
        init_mnist()
        
    with open(save_file, 'rb') as f:
        dataset = pickle.load(f)
    
    if normalize:
        for key in ('train_img', 'test_img'):
            dataset[key] = dataset[key].astype(np.float32)
            dataset[key] /= 255.0
            
    if one_hot_label:
        dataset['train_label'] = _change_one_hot_label(dataset['train_label'])
        dataset['test_label'] = _change_one_hot_label(dataset['test_label'])
    
    if not flatten:
         for key in ('train_img', 'test_img'):
            dataset[key] = dataset[key].reshape(-1, 1, 28, 28)

    if shuffle_data:
        dataset['train_img'], dataset['train_label'] = shuffle_dataset(dataset['train_img'], dataset['train_label'])

    return (dataset['train_img'], dataset['train_label']), (dataset['test_img'], dataset['test_label']) 

def img_show(img):
    pil_img = Image.fromarray(np.uint8(img))
    pil_img.show()

if __name__ == '__main__':
    (x_train, t_train),  (x_test, t_test) = load_mnist(normalize=False)
    print(x_train.shape, t_train.shape, x_test.shape, t_test.shape)
    img = x_train[0]
    label = t_train[0]
    print(label) 
    img_show(img)

第一次運行的代碼輸出(第一次運行會將壓縮文件中的內容轉換成numpy的ndarray類型後存儲到.pkl文件中,之後運行就只需要讀取.pkl文件即可):

train-images-idx3-ubyte.gz.gz loaded
train-labels-idx1-ubyte.gz loaded
t10k-images-idx3-ubyte.gz.gz loaded
t10k-labels-idx1-ubyte.gz loaded
Done!
(60000, 1, 28, 28) (60000, 10) (10000, 1, 28, 28) (10000, 10)
  • 圖像可視化使用PIL(Python Image Library)模塊:
from PIL import Image

def img_show(img):
    pil_img = Image.fromarray(np.uint8(img))
    pil_img.show()

if __name__ == '__main__':
    (x_train, t_train),  (x_test, t_test) = load_mnist(normalize=False)
    print(x_train.shape, t_train.shape, x_test.shape, t_test.shape)
    img = x_train[0]
    label = t_train[0]
    print(label) 
    img_show(img)
  • 圖像輸出:
    在這裏插入圖片描述

訓練神經網絡並進行預測

if __name__ == '__main__':
    from dataset.mnist import load_mnist
    from trainer.trainer import Trainer

    (x_train, t_train),  (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=True, shuffle_data=True)

    # setting
    train_flag = 1 # 進行訓練還是預測
    gradcheck_flag = 0 # 對已訓練的網絡進行梯度檢驗
    
    pkl_file_name = dir_path + '/multi_layer_net.pkl'
    fig_name = dir_path + '/multi_layer_net.png'

    net = MultiLayerNet(784, [100, 100, 100, 100, 100, 100, 100], 10,
                 activation='relu', weight_init_std='relu', weight_decay_lambda=0, 
                 use_dropout=False, dropout_ration=0.5, use_batchnorm=True, 
                 pretrain_flag=False, pkl_file_name=pkl_file_name)

    trainer = Trainer(net, x_train, t_train, x_test, t_test,
                 epochs=20, mini_batch_size=100,
                 optimizer='SGD', optimizer_param={'lr':0.01}, 
                 save_model_flag=True, pkl_file_name=pkl_file_name, plot_flag=True, fig_name=fig_name,
                 evaluate_sample_num_per_epoch=None, verbose=True)

    if gradcheck_flag == 1:
        # net.load_pretrain_model()  
        gradient_check(net, x_train[0].reshape(1,-1), t_train[0].reshape(1,-1))

    if train_flag:
        trainer.train()
    else:           
        acc = net.accuracy(x_train, t_train)
        print('accuracy:', acc)

將神經網絡設置爲7個隱藏層,每個隱藏層100個神經元,並且使用Batch Norm
在這裏插入圖片描述

=============== Final Test Accuracy ===============
test acc:0.9689

在訓練20個epoch後,訓練精度提高到了0.9689.同時也可以看到網絡出現了過擬合現象

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章