實戰七:手把手教你用TensorFlow進行驗證碼識別(中)
目錄
- 準備模型開發環境
- 生成驗證碼數據集
- 輸入與輸出數據處理
- 模型結構設計
- 模型損失函數設計
- 模型訓練過程分析
- 模型部署與效果演示
四、模型結構設計
1.圖像分類模型AlexNet
2.圖像分類模型VGG-16
3.驗證碼識別模型結構
4.驗證碼識別模型實現
a.引入第三方包
from PIL import Image
from keras import backend as K
from keras.utils.vis_utils import plot_model
from keras.models import *
from keras.layers import *
import glob
import pickle
import numpy as np
import tensorflow.gfile as gfile
import matplotlib.pyplot as plt
Using TensorFlow backend.
b.變量以及函數定義
定義超參數和字符集
NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
LOWERCASE = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
'v', 'w', 'x', 'y', 'z']
UPPERCASE = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
'V', 'W', 'X', 'Y', 'Z']
CAPTCHA_CHARSET = NUMBER # 驗證碼字符集
CAPTCHA_LEN = 4 # 驗證碼長度
CAPTCHA_HEIGHT = 60 # 驗證碼高度
CAPTCHA_WIDTH = 160 # 驗證碼寬度
TRAIN_DATA_DIR = '.\\train-data\\' # 驗證碼數據集目錄
TEST_DATA_DIR = '.\\test-data\\'
BATCH_SIZE = 100
EPOCHS = 10
OPT = 'adam'
LOSS = 'binary_crossentropy'
MODEL_DIR = './model/train_demo/'
MODEL_FORMAT = '.h5'
HISTORY_DIR = './history/train_demo/'
HISTORY_FORMAT = '.history'
filename_str = "{}captcha_{}_{}_bs_{}_epochs_{}{}"
# 模型網絡結構文件
MODEL_VIS_FILE = 'captcha_classfication' + '.png'
# 模型文件
MODEL_FILE = filename_str.format(MODEL_DIR, OPT, LOSS, str(BATCH_SIZE), str(EPOCHS), MODEL_FORMAT)
# 訓練記錄文件
HISTORY_FILE = filename_str.format(HISTORY_DIR, OPT, LOSS, str(BATCH_SIZE), str(EPOCHS), HISTORY_FORMAT)
將RGB驗證碼圖像轉爲灰度圖
def rgb2gray(img):
# Y' = 0.299 R + 0.587 G + 0.114 B
# https://en.wikipedia.org/wiki/Grayscale#Converting_color_to_grayscale
return np.dot(img[...,:3], [0.299, 0.587, 0.114])
對驗證碼中每個字符進行one-hot編碼
# 定義one-hot編碼函數
# CAPTCHA_CHARSET = NUMBER # 驗證碼字符集
# CAPTCHA_LEN = 4 # 驗證碼長度
def text2vec(text,length=CAPTCHA_LEN,charset=CAPTCHA_CHARSET):
text_len = len(text)
# 驗證碼長度校驗
if text_len != length:
raise ValueError("Error:length of captcha should be{},but got {}".format(length,text_len))
# 生成一個形如(CAPTCHA_LEN*CAPTCHA_CHARSET)的一維向量
# 例如,4個純數字的驗證碼生成形如(4*10,)的一維向量
vec = np.zeros(length*len(charset))
for i in range(length):
# One-hot編碼驗證碼中的每個數字
# 每個字符的熱碼 = 索引 +偏移量
vec[charset.index(text[i]) + i*len(charset)] = 1
return vec
將驗證碼向量解碼爲對應字符
def vec2text(vector):
if not isinstance(vector, np.ndarray):
vector = np.asarray(vector)
vector = np.reshape(vector, [CAPTCHA_LEN, -1])
text = ''
for item in vector:
text += CAPTCHA_CHARSET[np.argmax(item)]
return text
適配Keras圖像數據格式
def fit_keras_channels(batch, rows=CAPTCHA_HEIGHT, cols=CAPTCHA_WIDTH):
if K.image_data_format() == 'channels_first':
batch = batch.reshape(batch.shape[0], 1, rows, cols)
input_shape = (1, rows, cols)
else:
batch = batch.reshape(batch.shape[0], rows, cols, 1)
input_shape = (rows, cols, 1)
return batch, input_shape
c.讀取訓練集
X_train = []
Y_train = []
for filename in glob.glob(TRAIN_DATA_DIR + "*.png"):
X_train.append(np.array(Image.open(filename)))
Y_train.append(filename.lstrip(TRAIN_DATA_DIR).rstrip(".png"))
d.處理訓練集圖像
# list -> rgb(numpy)
X_train = np.array(X_train, dtype=np.float32)
# rgb -> gray
X_train = rgb2gray(X_train)
# normalize
X_train = X_train / 255
# Fit keras channels
X_train, input_shape = fit_keras_channels(X_train)
print(X_train.shape, type(X_train))
print(input_shape)
(3930, 60, 160, 1) <class 'numpy.ndarray'>
(60, 160, 1)
e.處理訓練集標籤(one-hot編碼)
Y_train = list(Y_train)
for i in range(len(Y_train)):
Y_train[i] = text2vec(Y_train[i])
Y_train = np.asarray(Y_train)
print(Y_train.shape, type(Y_train))
print(Y_train[0])
(3930, 40) <class 'numpy.ndarray'>
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
f.讀取測試集,處理對應圖像和標籤
X_test = []
Y_test = []
for filename in glob.glob(TEST_DATA_DIR + "*.png"):
X_test.append(np.array(Image.open(filename)))
Y_test.append(filename.lstrip(TEST_DATA_DIR).rstrip("*.png"))
# list->numpy(rgb)->gray->normalization->fit keras
X_test = np.array(X_test,dtype=np.float32)
X_test = rgb2gray(X_test)
X_test = X_test / 255
X_test,_ = fit_keras_channels(X_test)
Y_test = list(Y_test)
for i in range(len(Y_test)):
Y_test[i] = text2vec(Y_test[i])
Y_test = np.asarray(Y_test)
print(X_test.shape,type(X_test))
print(Y_test.shape,type(Y_test))
(961, 60, 160, 1) <class 'numpy.ndarray'>
(961, 40) <class 'numpy.ndarray'>
g.創建驗證碼識別模型
# 輸入層
inputs = Input(shape = input_shape,name = "inputs")
# 第1層卷積
conv1 = Conv2D(32,(3,3),name="conv1")(inputs)
relu1 = Activation("relu",name="relu1")(conv1)
# 第2層卷積
conv2 = Conv2D(32,(3,3),name ="conv2")(relu1)
relu2 = Activation("relu",name="relu2")(conv2)
pool2 = MaxPooling2D(pool_size=(2,2),padding="same",name="pool2")(relu2)
# 第3層卷積
conv3 = Conv2D(64,(3,3),name="conv3")(pool2)
relu3 = Activation("relu",name="relu3")(conv3)
pool3 = MaxPooling2D(pool_size=(2,2),padding="same",name="pool3")(relu3)
# 將Pooled feature map 攤平後輸入全連接網絡
x = Flatten()(pool3)
# Dropout
x = Dropout(0.25)(x)
# 4個全連接層分別做10分類,分別對應4個字符
x = [Dense(10,activation="softmax",name="fc%d"%(i+1))(x) for i in range(4)]
# 4個字符向量拼接在一起,與標籤向量形式一致,作爲模型輸出
outs = Concatenate()(x)
# 定義模型的輸入與輸出
model = Model(inputs=inputs,outputs=outs)
model.compile(optimizer=OPT,loss=LOSS,metrics=["accuracy"])
WARNING:tensorflow:From D:\software\Anaconda\workplace\lib\site-packages\tensorflow\python\framework\op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From D:\software\Anaconda\workplace\lib\site-packages\keras\backend\tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
# 查看模型摘要
print(model.summary())
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
inputs (InputLayer) (None, 60, 160, 1) 0
__________________________________________________________________________________________________
conv1 (Conv2D) (None, 58, 158, 32) 320 inputs[0][0]
__________________________________________________________________________________________________
relu1 (Activation) (None, 58, 158, 32) 0 conv1[0][0]
__________________________________________________________________________________________________
conv2 (Conv2D) (None, 56, 156, 32) 9248 relu1[0][0]
__________________________________________________________________________________________________
relu2 (Activation) (None, 56, 156, 32) 0 conv2[0][0]
__________________________________________________________________________________________________
pool2 (MaxPooling2D) (None, 28, 78, 32) 0 relu2[0][0]
__________________________________________________________________________________________________
conv3 (Conv2D) (None, 26, 76, 64) 18496 pool2[0][0]
__________________________________________________________________________________________________
relu3 (Activation) (None, 26, 76, 64) 0 conv3[0][0]
__________________________________________________________________________________________________
pool3 (MaxPooling2D) (None, 13, 38, 64) 0 relu3[0][0]
__________________________________________________________________________________________________
flatten_1 (Flatten) (None, 31616) 0 pool3[0][0]
__________________________________________________________________________________________________
dropout_1 (Dropout) (None, 31616) 0 flatten_1[0][0]
__________________________________________________________________________________________________
fc1 (Dense) (None, 10) 316170 dropout_1[0][0]
__________________________________________________________________________________________________
fc2 (Dense) (None, 10) 316170 dropout_1[0][0]
__________________________________________________________________________________________________
fc3 (Dense) (None, 10) 316170 dropout_1[0][0]
__________________________________________________________________________________________________
fc4 (Dense) (None, 10) 316170 dropout_1[0][0]
__________________________________________________________________________________________________
concatenate_1 (Concatenate) (None, 40) 0 fc1[0][0]
fc2[0][0]
fc3[0][0]
fc4[0][0]
==================================================================================================
Total params: 1,292,744
Trainable params: 1,292,744
Non-trainable params: 0
__________________________________________________________________________________________________
None
h.訓練模型
# 模型可視化
plot_model(model,to_file=MODEL_VIS_FILE,show_shapes=True)
# 訓練模型
history = model.fit(X_train,
Y_train,
batch_size=BATCH_SIZE,
epochs=EPOCHS,
verbose=2,
validation_data=(X_test,Y_test))
WARNING:tensorflow:From D:\software\Anaconda\workplace\lib\site-packages\tensorflow\python\ops\math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Train on 3930 samples, validate on 961 samples
Epoch 1/10
- 57s - loss: 0.3288 - acc: 0.9000 - val_loss: 0.3258 - val_acc: 0.9000
Epoch 2/10
- 50s - loss: 0.3254 - acc: 0.9000 - val_loss: 0.3255 - val_acc: 0.9000
Epoch 3/10
- 58s - loss: 0.3251 - acc: 0.9000 - val_loss: 0.3251 - val_acc: 0.9000
Epoch 4/10
- 53s - loss: 0.3249 - acc: 0.9000 - val_loss: 0.3245 - val_acc: 0.9000
Epoch 5/10
- 54s - loss: 0.3192 - acc: 0.9000 - val_loss: 0.3083 - val_acc: 0.9002
Epoch 6/10
- 54s - loss: 0.2752 - acc: 0.9035 - val_loss: 0.2717 - val_acc: 0.9040
Epoch 7/10
- 52s - loss: 0.2237 - acc: 0.9170 - val_loss: 0.2407 - val_acc: 0.9121
Epoch 8/10
- 53s - loss: 0.1906 - acc: 0.9287 - val_loss: 0.2218 - val_acc: 0.9188
Epoch 9/10
- 53s - loss: 0.1613 - acc: 0.9400 - val_loss: 0.2158 - val_acc: 0.9231
Epoch 10/10
- 52s - loss: 0.1345 - acc: 0.9510 - val_loss: 0.2262 - val_acc: 0.9218
i.預測樣例
# 真實值
print(vec2text(Y_test[5]))
0044
# 預測值
yy = model.predict(X_test[5].reshape(1,60,160,1))
print(vec2text(yy))
0144
j.保存模型
if not gfile.Exists(MODEL_DIR):
gfile.MakeDirs(MODEL_DIR)
model.save(MODEL_FILE)
print("saved trained model at %s" %MODEL_FILE)
saved trained model at ./model/train_demo/captcha_adam_binary_crossentropy_bs_100_epochs_10.h5
k.保存訓練過程記錄
history.history["acc"]
[0.8999997987091997,
0.8999997987091997,
0.8999997987091997,
0.8999997987091997,
0.8999997987091997,
0.903492193943975,
0.9169528651176821,
0.9287086367303785,
0.9400127452748422,
0.9510496704632999]
history.history.keys()
dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
if gfile.Exists(HISTORY_DIR) == False:
gfile.MakeDirs(HISTORY_DIR)
with open(HISTORY_FILE,"wb") as f:
pickle.dump(history.history,f)
print(HISTORY_FILE)
./history/train_demo/captcha_adam_binary_crossentropy_bs_100_epochs_10.history
五、模型損失函數設計
1.交叉熵(Cross-Entropy,CE)
2.Categorical CE Loss(Softmax Loss)
3.Binary CE Loss(Sigmoid CE LOSS)
4.不同損失函數對比
a.引入第三方包
import glob
import pickle
import numpy as np
import matplotlib.pyplot as plt
b.定義過程可視化方法
def plot_training(history=None, metric='acc', title='Model Accuracy', loc='lower right'):
model_list = []
fig = plt.figure(figsize=(10, 8))
for key, val in history.items():
model_list.append(key.replace(HISTORY_DIR, '').rstrip('.history'))
plt.plot(val[metric])
plt.title(title)
plt.ylabel(metric)
plt.xlabel('epoch')
plt.legend(model_list, loc=loc)
plt.show()
c.加載預訓練模型
HISTORY_DIR = './pre-trained/history/loss/'
history = {}
for filename in glob.glob(HISTORY_DIR + '*.history'):
with open(filename, 'rb') as f:
history[filename] = pickle.load(f)
for key, val in history.items():
print(key.replace(HISTORY_DIR, '').rstrip('.history'), val.keys())
./pre-trained/history/loss\captcha_rmsprop_binary_crossentropy_bs_100_epochs_100 dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
./pre-trained/history/loss\captcha_rmsprop_categorical_crossentropy_bs_100_epochs_100 dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
./pre-trained/history/loss\captcha_rmsprop_mse_bs_100_epochs_100 dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
./pre-trained/history/loss\captcha_rmsprop_poisson_bs_100_epochs_100 dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
d.準確率變化(訓練集)
# del history[HISTORY_DIR+'captcha_rmsprop_categorical_crossentropy_bs_100_epochs_100'+'.history']
plot_training(history)
e.損失值變化(訓練集)
plot_training(history, metric='loss', title='Model Loss', loc='upper right')
f.準確率變化(測試集)
plot_training(history, metric='val_acc', title='Model Accuracy (val)')
g.損失值變化(測試集)
plot_training(history, metric='val_loss', title='Model Loss (val)', loc='upper right')
六、模型訓練過程分析
1.優化器介紹:SGD(Stochastic Gradient Descent)
2.優化器介紹:SGD-M(Momentum)
3.優化器介紹:Adagrad-RMSprop-Adam
4.優化器對比
a.引入第三方包
import glob
import pickle
import numpy as np
import matplotlib.pyplot as plt
b.加載訓練過程記錄
history_file = './pre-trained/history/optimizer/binary_ce/captcha_adam_binary_crossentropy_bs_100_epochs_100.history'
with open(history_file,"rb") as f:
history = pickle.load(f)
c.訓練過程可視化
fig = plt.figure()
plt.subplot(2,1,1)
plt.plot(history['acc'])
plt.plot(history['val_acc'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='lower right')
plt.subplot(2,1,2)
plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.tight_layout()
plt.show()
d.定義過程可視化方法
def plot_training(history=None, metric='acc', title='Model Accuracy', loc='lower right'):
model_list = []
fig = plt.figure(figsize=(10, 8))
for key, val in history.items():
model_list.append(key.replace(HISTORY_DIR, '').rstrip('.history'))
plt.plot(val[metric])
plt.title(title)
plt.ylabel(metric)
plt.xlabel('epoch')
plt.legend(model_list, loc=loc)
plt.show()
e.加載預訓練模型記錄
HISTORY_DIR = '.\\pre-trained\\history\\optimizer\\binary_ce\\'
history = {}
for filename in glob.glob(HISTORY_DIR + '*.history'):
with open(filename, 'rb') as f:
history[filename] = pickle.load(f)
for key, val in history.items():
print(key.replace(HISTORY_DIR, '').rstrip('.history'), val.keys())
captcha_adadelta_binary_crossentropy_bs_100_epochs_100 dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
captcha_adagrad_binary_crossentropy_bs_100_epochs_100 dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
captcha_adam_binary_crossentropy_bs_100_epochs_100 dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
captcha_rmsprop_binary_crossentropy_bs_100_epochs_100 dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
f.準確率變化(訓練集)
plot_training(history)
g.損失值變化(訓練集)
plot_training(history, metric='loss', title='Model Loss', loc='upper right')
h.準確率變化(測試集)
plot_training(history, metric='val_acc', title='Model Accuracy (val)')
i.損失值變化(測試集)
plot_training(history, metric='val_loss', title='Model Loss (val)', loc='upper right')