深度学习入门(十):CNN的实现及可视化

本文为《深度学习入门 基于Python的理论与实现》的部分读书笔记,也参考吴恩达深度学习视频
代码以及图片均参考此书

CNN的实现

本节将实现以下结构的简单CNN
在这里插入图片描述
默认在MNIST数据集上工作,输入数据形状为(1,28,28)(1,28,28),卷积层使用 1×5×51 \times 5 \times 5 的滤波器,滤波器个数为30,不进行填充,步长为1,因此卷积层之后数据形状为(30,24,24)(30,24,24)。池化层使用2×22 \times 2 的滤波器,不进行填充,步长为2,因此池化层之后数据形状为(30,12,12)(30,12,12)。第一个隐藏层设为100个神经元,输出层的神经元个数设为10.

import sys
file_path = __file__.replace('\\', '/')
dir_path = file_path[: file_path.rfind('/')] # 当前文件夹的路径
pardir_path = dir_path[: dir_path.rfind('/')]
sys.path.append(pardir_path) # 添加上上级目录到python模块搜索路径

import numpy as np
from func.gradient import numerical_gradient, gradient_check
from layer.common import *
from collections import OrderedDict
import os
import pickle

class SimpleConvNet:
    """
    conv - relu - pool - affine - relu - affine - softmax
    默认传进来的图片宽高相同
    Parameters
    ----------
    input_dim : 输入大小(MNIST的情况下为(1, 28, 28))
    hidden_size : 隐藏层的神经元数量
    output_size : 输出大小(MNIST的情况下为10)
    """
    def __init__(self, input_dim=(1, 28, 28), 
                 conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
                 hidden_size=100, output_size=10,
                 pretrain_flag=True, pkl_file_name=None):
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1] 
        conv_output_size = (input_size - filter_size + 2 * filter_pad) // filter_stride + 1
        pool_output_size = filter_num * (conv_output_size // 2)**2
        self.pkl_file_name = pkl_file_name

        if pretrain_flag == 1 and os.path.exists(self.pkl_file_name):
            self.load_pretrain_model()
        else:
            # 初始化权重
            self.params = {}
            self.params['W1'] = np.sqrt(2.0 / (input_dim[1] * filter_size**2)) * \
                                np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
            self.params['b1'] = np.zeros(filter_num)
            self.params['W2'] = np.sqrt(2 / pool_output_size) * \
                                np.random.randn(pool_output_size, hidden_size)
            self.params['b2'] = np.zeros(hidden_size)
            self.params['W3'] = np.sqrt(2 / hidden_size) * \
                                np.random.randn(hidden_size, output_size)
            self.params['b3'] = np.zeros(output_size)

            # 生成层
            self.layers = OrderedDict()
            self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],
                                            conv_param['stride'], conv_param['pad'])
            self.layers['Relu1'] = Relu()
            self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
            self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
            self.layers['Relu2'] = Relu()
            self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

            self.last_layer = SoftmaxWithLoss()

    def load_pretrain_model(self):
        with open(self.pkl_file_name, 'rb') as f:
            model = pickle.load(f)
            for key in ('params', 'layers', 'last_layer'):
                exec('self.' + key + '=model.' + key)
            print('params loaded!')

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x

    def loss(self, x, t):
        y = self.predict(x)
        return self.last_layer.forward(y, t)

    def accuracy(self, x, t, batch_size=100):
        if t.ndim != 1 :
            t = np.argmax(t, axis=1)
        
        acc = 0.0
        
        for i in range(int(x.shape[0] / batch_size)):
            tx = x[i*batch_size:(i+1)*batch_size]
            tt = t[i*batch_size:(i+1)*batch_size]
            y = self.predict(tx)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt) 
        
        return acc / x.shape[0]

    def numerical_gradient(self, x, t):
        loss_w = lambda w: self.loss(x, t)

        grads = {}
        for idx in range(1, 4):
            grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)])
            grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)])

        return grads

    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)
        for layer_name in reversed(self.layers):
            dout = self.layers[layer_name].backward(dout)

        # 设定
        grads = {}
        grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers['Conv1'].db
        grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
        grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers['Affine2'].db

        return grads
        
if __name__ == '__main__':
    from dataset.mnist import load_mnist
    from trainer.trainer import Trainer

    (x_train, t_train),  (x_test, t_test) = load_mnist(normalize=True, flatten=False, one_hot_label=True, shuffle_data=True)

    # setting
    train_flag = 1 # 进行训练还是预测
    gradcheck_flag = 0 # 对已训练的网络进行梯度检验
    
    pkl_file_name = dir_path + '/convnet.pkl'
    fig_name = dir_path + '/convnet.png'

    net = SimpleConvNet(input_dim=(1, 28, 28), 
                        conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
                        hidden_size=100, output_size=10,
                        pretrain_flag=True, pkl_file_name=pkl_file_name)

    trainer = Trainer(net, x_train, t_train, x_test, t_test,
                 epochs=2, mini_batch_size=128,
                 optimizer='Adam', optimizer_param={}, 
                 save_model_flag=True, pkl_file_name=pkl_file_name, plot_flag=True, fig_name=fig_name,
                 evaluate_sample_num_per_epoch=1000, verbose=True)

    if gradcheck_flag == 1:
        gradient_check(net, x_train[:2], t_train[:2])

    if train_flag:
        trainer.train()
    else:           
        acc = net.accuracy(x_train, t_train)
        print('accuracy:', acc)
=============== Final Test Accuracy ===============
test acc:0.9811

可以看到,使用简单的CNN结构,仅训练了2个epoch之后预测精度就轻易达到了98%,已经超过了之前实现的全连接层网络结构能达到的最高识别精度。

CNN的可视化

比较一下学习前和学习后的卷积层的滤波器

import sys
file_path = __file__.replace('\\', '/')
dir_path = file_path[: file_path.rfind('/')] # 当前文件夹的路径
pardir_path = dir_path[: dir_path.rfind('/')]
sys.path.append(pardir_path) # 添加上上级目录到python模块搜索路径

import numpy as np
import matplotlib.pyplot as plt
from layer.convnet import SimpleConvNet

def filter_show(filters, nx=8):
    """
    可视化所有滤波器的第一个通道
    """
    FN, C, FH, FW = filters.shape
    ny = int(np.ceil(FN / nx)) # np.ceil: 向上取整

    fig, axes = plt.subplots(ny, nx)
    fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)

    for i in range(FN):
        # 该滤波器所属子图位置
        x = i // nx
        y = i % nx

        axes[x, y].set_xticks([])
        axes[x, y].set_yticks([])
        axes[x, y].imshow(filters[i, 0], cmap=plt.cm.gray_r, interpolation='nearest')
    
    plt.show()


pkl_file_name = dir_path + '/convnet.pkl'

net = SimpleConvNet(input_dim=(1, 28, 28), 
                    conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
                    hidden_size=100, output_size=10, weight_init_std=0.01, 
                    pretrain_flag=False, pkl_file_name=pkl_file_name)
# 随机进行初始化后的权重
filter_show(net.params['W1'])

# 学习后的权重
net.load_pretrain_model()
filter_show(net.params['W1'])
  • 学习前的卷积层滤波器
    在这里插入图片描述
  • 学习后的卷积层滤波器
    在这里插入图片描述

对比两图,可以看出学习前的滤波器是随机进行初始化的,所以在黑白的浓淡上没有规律可循,但学习后的滤波器变成了有规律的图像。我们发现,通过学习,滤波器被更新成了有规律的滤波器,比如从白到黑渐变的滤波器、含有块状区域(称为blob)的滤波器等。

如果要问有规律的滤波器在“观察”什么,答案就是它在观察边缘(颜色变化的分界线)和斑块(局部的块状区域)等。比如,左半部分为白色、右半部分为黑色的滤波器会对垂直方向上的边缘有响应。

由此可知,卷积层的滤波器会提取边缘或斑块等原始信息。而刚才实现的CNN会将这些原始信息传递给后面的层

上面的结果是针对第1层的卷积层得出的。第1层的卷积层中提取了边缘或斑块等“低级”信息,根据深度学习的可视化相关的研究,随着层次加深,提取的信息(正确地讲,是反映强烈的神经元)也越来越抽象。如果堆叠了多层卷积层,则随着层次加深,提取的信息也愈加复杂、抽象,也就是说,随着层次加深,神经元从简单的形状向“高级”信息变化

在这里插入图片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章