本文为《深度学习入门 基于Python的理论与实现》的部分读书笔记,也参考吴恩达深度学习视频
代码以及图片均参考此书
CNN的实现
本节将实现以下结构的简单CNN
默认在MNIST数据集上工作,输入数据形状为,卷积层使用 的滤波器,滤波器个数为30,不进行填充,步长为1,因此卷积层之后数据形状为。池化层使用 的滤波器,不进行填充,步长为2,因此池化层之后数据形状为。第一个隐藏层设为100个神经元,输出层的神经元个数设为10.
import sys
file_path = __file__.replace('\\', '/')
dir_path = file_path[: file_path.rfind('/')] # 当前文件夹的路径
pardir_path = dir_path[: dir_path.rfind('/')]
sys.path.append(pardir_path) # 添加上上级目录到python模块搜索路径
import numpy as np
from func.gradient import numerical_gradient, gradient_check
from layer.common import *
from collections import OrderedDict
import os
import pickle
class SimpleConvNet:
"""
conv - relu - pool - affine - relu - affine - softmax
默认传进来的图片宽高相同
Parameters
----------
input_dim : 输入大小(MNIST的情况下为(1, 28, 28))
hidden_size : 隐藏层的神经元数量
output_size : 输出大小(MNIST的情况下为10)
"""
def __init__(self, input_dim=(1, 28, 28),
conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
hidden_size=100, output_size=10,
pretrain_flag=True, pkl_file_name=None):
filter_num = conv_param['filter_num']
filter_size = conv_param['filter_size']
filter_pad = conv_param['pad']
filter_stride = conv_param['stride']
input_size = input_dim[1]
conv_output_size = (input_size - filter_size + 2 * filter_pad) // filter_stride + 1
pool_output_size = filter_num * (conv_output_size // 2)**2
self.pkl_file_name = pkl_file_name
if pretrain_flag == 1 and os.path.exists(self.pkl_file_name):
self.load_pretrain_model()
else:
# 初始化权重
self.params = {}
self.params['W1'] = np.sqrt(2.0 / (input_dim[1] * filter_size**2)) * \
np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
self.params['b1'] = np.zeros(filter_num)
self.params['W2'] = np.sqrt(2 / pool_output_size) * \
np.random.randn(pool_output_size, hidden_size)
self.params['b2'] = np.zeros(hidden_size)
self.params['W3'] = np.sqrt(2 / hidden_size) * \
np.random.randn(hidden_size, output_size)
self.params['b3'] = np.zeros(output_size)
# 生成层
self.layers = OrderedDict()
self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],
conv_param['stride'], conv_param['pad'])
self.layers['Relu1'] = Relu()
self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
self.layers['Relu2'] = Relu()
self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])
self.last_layer = SoftmaxWithLoss()
def load_pretrain_model(self):
with open(self.pkl_file_name, 'rb') as f:
model = pickle.load(f)
for key in ('params', 'layers', 'last_layer'):
exec('self.' + key + '=model.' + key)
print('params loaded!')
def predict(self, x):
for layer in self.layers.values():
x = layer.forward(x)
return x
def loss(self, x, t):
y = self.predict(x)
return self.last_layer.forward(y, t)
def accuracy(self, x, t, batch_size=100):
if t.ndim != 1 :
t = np.argmax(t, axis=1)
acc = 0.0
for i in range(int(x.shape[0] / batch_size)):
tx = x[i*batch_size:(i+1)*batch_size]
tt = t[i*batch_size:(i+1)*batch_size]
y = self.predict(tx)
y = np.argmax(y, axis=1)
acc += np.sum(y == tt)
return acc / x.shape[0]
def numerical_gradient(self, x, t):
loss_w = lambda w: self.loss(x, t)
grads = {}
for idx in range(1, 4):
grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)])
grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)])
return grads
def gradient(self, x, t):
# forward
self.loss(x, t)
# backward
dout = 1
dout = self.last_layer.backward(dout)
for layer_name in reversed(self.layers):
dout = self.layers[layer_name].backward(dout)
# 设定
grads = {}
grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers['Conv1'].db
grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers['Affine2'].db
return grads
if __name__ == '__main__':
from dataset.mnist import load_mnist
from trainer.trainer import Trainer
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=False, one_hot_label=True, shuffle_data=True)
# setting
train_flag = 1 # 进行训练还是预测
gradcheck_flag = 0 # 对已训练的网络进行梯度检验
pkl_file_name = dir_path + '/convnet.pkl'
fig_name = dir_path + '/convnet.png'
net = SimpleConvNet(input_dim=(1, 28, 28),
conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
hidden_size=100, output_size=10,
pretrain_flag=True, pkl_file_name=pkl_file_name)
trainer = Trainer(net, x_train, t_train, x_test, t_test,
epochs=2, mini_batch_size=128,
optimizer='Adam', optimizer_param={},
save_model_flag=True, pkl_file_name=pkl_file_name, plot_flag=True, fig_name=fig_name,
evaluate_sample_num_per_epoch=1000, verbose=True)
if gradcheck_flag == 1:
gradient_check(net, x_train[:2], t_train[:2])
if train_flag:
trainer.train()
else:
acc = net.accuracy(x_train, t_train)
print('accuracy:', acc)
=============== Final Test Accuracy ===============
test acc:0.9811
可以看到,使用简单的CNN结构,仅训练了2个epoch之后预测精度就轻易达到了98%,已经超过了之前实现的全连接层网络结构能达到的最高识别精度。
CNN的可视化
比较一下学习前和学习后的卷积层的滤波器
import sys
file_path = __file__.replace('\\', '/')
dir_path = file_path[: file_path.rfind('/')] # 当前文件夹的路径
pardir_path = dir_path[: dir_path.rfind('/')]
sys.path.append(pardir_path) # 添加上上级目录到python模块搜索路径
import numpy as np
import matplotlib.pyplot as plt
from layer.convnet import SimpleConvNet
def filter_show(filters, nx=8):
"""
可视化所有滤波器的第一个通道
"""
FN, C, FH, FW = filters.shape
ny = int(np.ceil(FN / nx)) # np.ceil: 向上取整
fig, axes = plt.subplots(ny, nx)
fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)
for i in range(FN):
# 该滤波器所属子图位置
x = i // nx
y = i % nx
axes[x, y].set_xticks([])
axes[x, y].set_yticks([])
axes[x, y].imshow(filters[i, 0], cmap=plt.cm.gray_r, interpolation='nearest')
plt.show()
pkl_file_name = dir_path + '/convnet.pkl'
net = SimpleConvNet(input_dim=(1, 28, 28),
conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
hidden_size=100, output_size=10, weight_init_std=0.01,
pretrain_flag=False, pkl_file_name=pkl_file_name)
# 随机进行初始化后的权重
filter_show(net.params['W1'])
# 学习后的权重
net.load_pretrain_model()
filter_show(net.params['W1'])
- 学习前的卷积层滤波器
- 学习后的卷积层滤波器
对比两图,可以看出学习前的滤波器是随机进行初始化的,所以在黑白的浓淡上没有规律可循,但学习后的滤波器变成了有规律的图像。我们发现,通过学习,滤波器被更新成了有规律的滤波器,比如从白到黑渐变的滤波器、含有块状区域(称为blob)的滤波器等。
如果要问有规律的滤波器在“观察”什么,答案就是它在观察边缘(颜色变化的分界线)和斑块(局部的块状区域)等。比如,左半部分为白色、右半部分为黑色的滤波器会对垂直方向上的边缘有响应。
由此可知,卷积层的滤波器会提取边缘或斑块等原始信息。而刚才实现的CNN会将这些原始信息传递给后面的层。
上面的结果是针对第1层的卷积层得出的。第1层的卷积层中提取了边缘或斑块等“低级”信息,根据深度学习的可视化相关的研究,随着层次加深,提取的信息(正确地讲,是反映强烈的神经元)也越来越抽象。如果堆叠了多层卷积层,则随着层次加深,提取的信息也愈加复杂、抽象,也就是说,随着层次加深,神经元从简单的形状向“高级”信息变化。