開篇
其實上研究生一年了, 雖然一直從事深度學習相關項目, 但是真正的理論知識幾乎沒怎麼深入學習過, 往常的工作無非就是拿着大佬的論文和工程代碼, 拿着大佬們做好的GNN, RGIN, RDL等使用朋友TensorFlow等流行的深度學習框架, 照搬照抄, 很多細節, 參數意義, 框架結構都不是很瞭解, 正好趁着假期, 好好學習一下理論基礎, 於是找到了這本純理論, 純手工手撕鬼子的神作, 書和相關代碼點擊這裏進行下,深度學習入門:基於Python的理論與實踐
在這裏說明一些學習技巧, 可以結合CSDN公開課裏面的 <5天搞定深度學習入門系列>
, 簡直不能太開心
1 建立path_config.json
目的是這裏配置工程路徑, 使得後期開發不會有路徑上的bug
import sys, os
import json
import numpy as np
with open(r"../path_config.json" ,"r",encoding="utf-8") as f:
config = json.loads(f.read())
root_path = config["root_path"]
sys.path.append(root_path) # 引入根目錄
print("初始化成功")
初始化成功
2 激活函數, 即下圖的z
# 激活函數
# 多分類softmax 返回預測的概率
# y = x
def identity_function(x):
return x
# 階躍函數
def step_function(x):
return np.array(x > 0, dtype=np.int)
def softmax(x):
if x.ndim == 2:
x = x.T
x = x - np.max(x, axis=0)
y = np.exp(x) / np.sum(np.exp(x), axis=0)
return y.T
x = x - np.max(x) # 溢出對策
return np.exp(x) / np.sum(np.exp(x))
# ReLu激活函數 返回激活值f_x 微分值d_x
def ReLU(x):
f_x = np.maximum(0, x)
d_x = f_x.copy()
d_x[d_x>0] = 1
return f_x, d_x
def relu(x):
return np.maximum(0, x)
def relu_grad(x):
grad = np.zeros(x)
grad[x>=0] = 1
return grad
# sigmoid激活函數 返回激活值sig 微分值 sig*(1-sig)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_grad(x):
return (1.0 - sigmoid(x)) * sigmoid(x)
# tanh激活函數 返回激活值tan 微分值 1-np.square(tan)
def tanha(x):
tan=(np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
return tan,1-np.square(tan)
3 損失函數
# 損失函數
y = np.array([0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0])
t = np.array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0])
# 均方誤差
def mean_squared_error(y, t):
return 0.5 * np.sum((y-t)**2)
print("均方誤差計算: ", mean_squared_error(y,t)) # 0.09750000000000003
# 交叉熵誤差
def cross_entropy_error(y, t):
if y.ndim == 1:
t = t.reshape(1, t.size)
y = y.reshape(1, y.size)
# 監督數據是one-hot-vector的情況下,轉換爲正確解標籤的索引
if t.size == y.size:
t = t.argmax(axis=1)
batch_size = y.shape[0]
return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
print("cross_entropy_error交叉熵誤差計算: ",cross_entropy_error(y,t)) # 0.510825457099338
def softmax_loss(X, t):
y = softmax(X)
return cross_entropy_error(y, t)
print("softmax_loss誤差計算: ",softmax_loss(y,t))
均方誤差計算: 0.09750000000000003
cross_entropy_error交叉熵誤差計算: 0.510825457099338
softmax_loss誤差計算: 1.8194936854234711
4 梯度
# 梯度
# 梯度實現 ,參數f爲函數,x爲NumPy數組, 注意輸入的應該是浮點型
def numerical_gradient(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x) # 生成和x形狀相同的數組
for idx in range(len(x)): # 作者這塊使用的是x.size, 但是x.size針對的是一維列表, 修改爲len(x)可以針對於多維列表進行np的操作
tmp_val = x[idx]
# f(x+h)的計算
x[idx] = tmp_val + h
fxh1 = f(x)
# f(x-h)的計算
x[idx] = tmp_val - h
fxh2 = f(x)
grad[idx] = (fxh1 - fxh2) / (2*h)
x[idx] = tmp_val # 還原值
return grad
# def numerical_gradient(f, x):
# h = 1e-4 # 0.0001
# grad = np.zeros_like(x)
# it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
# while not it.finished:
# idx = it.multi_index
# tmp_val = x[idx]
# x[idx] = float(tmp_val) + h
# fxh1 = f(x) # f(x+h)
# x[idx] = tmp_val - h
# fxh2 = f(x) # f(x-h)
# grad[idx] = (fxh1 - fxh2) / (2*h)
# x[idx] = tmp_val # 還原值
# it.iternext()
# return grad
def function_2(x):
return np.sum(x**2)
r = numerical_gradient(function_2, np.array([[3.0, 4.0], [2.0,3.0],[2.0,3.0]])) #array([6., 8.])
print(r)
r = numerical_gradient(function_2, np.array([3.0,4.0])) #array([6., 8.])
print(r)
r = numerical_gradient(function_2, np.array([[2.0,3.0,4.0],[2.0,3.0,2.0], [4.0,5.0,6.0]])) #array([6., 8.])
print(r)
[[7.0001 7.0001]
[5.0001 5.0001]
[5.0001 5.0001]]
[6. 8.]
[[ 9.00015 9.00015 9.00015]
[ 7.00015 7.00015 7.00015]
[15.00015 15.00015 15.00015]]
5 神經網絡模型
# 神經網絡兩層
class TwoLayerNet():
def __init__(self, layers=[784,50,10], seed=200, weight_init_std=0.01): #
# 初始化權重
self.params = {}
self.params['W1'] = weight_init_std * np.random.randn(layers[0], layers[1])
self.params['b1'] = np.zeros(layers[1])
self.params['W2'] = weight_init_std * np.random.randn(layers[1], layers[2])
self.params['b2'] = np.zeros(layers[2])
def predict(self, x):
W1, W2 = self.params['W1'], self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']
# 前向傳播
z1 = sigmoid( np.dot(x, W1) + b1 )
y = softmax( np.dot(z1,W2) + b2 )
return y
# x:輸入數據, t:監督數據
def loss(self, x, t):
y = self.predict(x)
return cross_entropy_error(y, t) # 交叉熵計算
def accuracy(self, x, t):
y = self.predict(x)
y = np.argmax(y, axis=1)
t = np.argmax(t, axis=1)
accuracy = np.sum(y == t) / float(x.shape[0])
return accuracy
# x:輸入數據, t:監督數據
def numerical_gradient(self, x, t):
loss_W = lambda W: self.loss(x, t)
grads = {}
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
return grads
def gradient(self, x, t): # 梯度下降法高級版本
W1, W2 = self.params['W1'], self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']
grads = {}
batch_num = x.shape[0]
# forward
a1 = np.dot(x, W1) + b1
z1 = sigmoid( a1 )
a2 = np.dot(z1, W2) + b2
y = softmax( a2 )
# backward
dy = (y - t) / batch_num
grads['W2'] = np.dot(z1.T, dy)
grads['b2'] = np.sum(dy, axis=0)
da1 = np.dot(dy, W2.T)
dz1 = sigmoid_grad(a1) * da1
grads['W1'] = np.dot(x.T, dz1)
grads['b1'] = np.sum(dz1, axis=0)
return grads
def train(self, x_train, t_train, x_test, t_test, epochs=10000, batch_size=100, learning_rate=0.1):
train_size = x_train.shape[0]
train_loss_list = []
train_acc_list = []
test_acc_list = []
iter_per_epoch = max(train_size / batch_size, 1)
for i in range(epochs):
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
# 計算梯度
#grad = self.numerical_gradient(x_batch, t_batch)
grad = self.gradient(x_batch, t_batch) #這裏面首先使用x預測到y, y與t得到loss, 在用loss得到梯度
# 更新參數
for key in ('W1', 'b1', 'W2', 'b2'):
self.params[key] -= learning_rate * grad[key]
loss = self.loss(x_batch, t_batch)
train_loss_list.append(loss)
if i % iter_per_epoch == 0:
train_acc = self.accuracy(x_train, t_train) #計算所有的訓練數據的準確率
test_acc = self.accuracy(x_test, t_test) # 計算所有的測試數據的準確率
train_acc_list.append(train_acc) #這兩句話目的是保存數據用來後面的可視化
test_acc_list.append(test_acc)
print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))
return train_loss_list, train_acc_list, test_acc_list
6 主函數
# 開始訓練
import numpy as np
from data_set.mnist import load_mnist
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label = True) # 數據
network = TwoLayerNet()
train_loss_list,train_acc_list,test_acc_list = network.train(x_train, t_train, x_test, t_test)
(10000, 784)
<class 'numpy.ndarray'>
train acc, test acc | 0.09863333333333334, 0.0958
train acc, test acc | 0.7864833333333333, 0.7901
train acc, test acc | 0.8757833333333334, 0.8787
train acc, test acc | 0.8993166666666667, 0.9018
train acc, test acc | 0.9092166666666667, 0.9125
train acc, test acc | 0.9160166666666667, 0.9172
train acc, test acc | 0.92075, 0.9241
train acc, test acc | 0.9245166666666667, 0.9264
train acc, test acc | 0.9291, 0.9308
train acc, test acc | 0.932, 0.9332
train acc, test acc | 0.9350833333333334, 0.9349
train acc, test acc | 0.9381166666666667, 0.9383
train acc, test acc | 0.9407666666666666, 0.9405
train acc, test acc | 0.9432166666666667, 0.9434
train acc, test acc | 0.9443333333333334, 0.9441
train acc, test acc | 0.9464666666666667, 0.9461
train acc, test acc | 0.9483833333333334, 0.9471
7 數據結果處理
import matplotlib.pyplot as plt
%matplotlib inline
# 繪製圖形
markers = {'train': 'o', 'test': 's'}
x = np.arange(len(train_acc_list))
plt.plot(x, train_acc_list, label='train acc')
plt.plot(x, test_acc_list, label='test acc', linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()
準確率顯示
import matplotlib.pyplot as plt
%matplotlib inline
# 繪製圖形
markers = {'train': 'o', 'test': 's'}
x = np.arange(len(train_loss_list))
plt.plot(x, train_loss_list, label='train acc')
plt.xlabel("epochs")
plt.ylabel("loss")
plt.ylim(0, 3.0)
plt.legend(loc='lower right')
plt.show()