一、前言
這一篇續接前一篇《yolo v2之車牌檢測後續識別字符(一)》,主要是生成模型文件、配置文件以及訓練、測試模型。
二、python接口生成配置文件、模型文件
車牌圖片端到端識別的模型文件參考自這裏,模型圖如下所示:
本來想使用caffe的python接口生成prototxt,結果發現很麻煩,容易出錯,直接在可視化工具netscope上對已有prototxt做修改更方便,寫模型文件時,注意輸入的圖片、卷積核大小、pad大小、stride大小、輸出圖片大小的關係,無論卷積層還是池化層,都有
輸入:n, c_i, h_i, w_i
輸出:n, c_o, h_o, w_o
滿足: h_o = ( h_i + 2*pad_h - kernel_h) / stride_h +1
w_o = ( w_i + 2*pad_w - kernel_w ) / stride_w +1
#lpr_train_val.prototxt
name: "Lpr"
layer {
name: "lpr"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
scale: 0.00390625
mean_file: "/home/jyang/caffe/LPR/Mean/mean.binaryproto"
}
data_param {
source: "/home/jyang/caffe/LPR/Build_lmdb/train_lmdb"
batch_size: 32
backend: LMDB
}
}
layer {
name: "lpr"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
scale: 0.00390625
mean_file: "/home/jyang/caffe/LPR/Mean/mean.binaryproto"
}
data_param {
source: "/home/jyang/caffe/LPR/Build_lmdb/val_lmdb"
batch_size: 32
backend: LMDB
}
}
layer {
name: "slices"
type: "Slice"
bottom: "label"
top: "label_1"
top: "label_2"
top: "label_3"
top: "label_4"
top: "label_5"
top: "label_6"
top: "label_7"
slice_param {
axis: 1
slice_point: 1
slice_point: 2
slice_point: 3
slice_point: 4
slice_point: 5
slice_point: 6
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 32
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "conv2"
type: "Convolution"
bottom: "conv1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 32
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv4"
type: "Convolution"
bottom: "conv3"
top: "conv4"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv5"
type: "Convolution"
bottom: "pool4"
top: "conv5"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "conv6"
type: "Convolution"
bottom: "conv5"
top: "conv6"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "conv6"
top: "conv6"
}
layer {
name: "pool6"
type: "Pooling"
bottom: "conv6"
top: "pool6"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "flat6"
type: "Flatten"
bottom: "pool6"
top: "flat6"
flatten_param {
axis: 1
}
}
layer {
name: "drop6"
type: "Dropout"
bottom: "flat6"
top: "flat6"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7_1"
type: "InnerProduct"
bottom: "flat6"
top: "fc7_1"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 65
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "fc7_2"
type: "InnerProduct"
bottom: "flat6"
top: "fc7_2"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 65
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "fc7_3"
type: "InnerProduct"
bottom: "flat6"
top: "fc7_3"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 65
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "fc7_4"
type: "InnerProduct"
bottom: "flat6"
top: "fc7_4"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 65
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "fc7_5"
type: "InnerProduct"
bottom: "flat6"
top: "fc7_5"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 65
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "fc7_6"
type: "InnerProduct"
bottom: "flat6"
top: "fc7_6"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 65
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "fc7_7"
type: "InnerProduct"
bottom: "flat6"
top: "fc7_7"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 65
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "accuracy_1"
type: "Accuracy"
bottom: "fc7_1"
bottom: "label_1"
top: "accuracy_1"
include {
phase: TEST
}
}
layer {
name: "accuracy_2"
type: "Accuracy"
bottom: "fc7_2"
bottom: "label_2"
top: "accuracy_2"
include {
phase: TEST
}
}
layer {
name: "accuracy_3"
type: "Accuracy"
bottom: "fc7_3"
bottom: "label_3"
top: "accuracy_3"
include {
phase: TEST
}
}
layer {
name: "accuracy_4"
type: "Accuracy"
bottom: "fc7_4"
bottom: "label_4"
top: "accuracy_4"
include {
phase: TEST
}
}
layer {
name: "accuracy_5"
type: "Accuracy"
bottom: "fc7_5"
bottom: "label_5"
top: "accuracy_5"
include {
phase: TEST
}
}
layer {
name: "accuracy_6"
type: "Accuracy"
bottom: "fc7_6"
bottom: "label_6"
top: "accuracy_6"
include {
phase: TEST
}
}
layer {
name: "accuracy_7"
type: "Accuracy"
bottom: "fc7_7"
bottom: "label_7"
top: "accuracy_7"
include {
phase: TEST
}
}
layer {
name: "loss_1"
type: "SoftmaxWithLoss"
bottom: "fc7_1"
bottom: "label_1"
top: "loss_1"
###權重
loss_weight: 0.142857 # 1.0/7=0.142857
}
layer {
name: "loss_2"
type: "SoftmaxWithLoss"
bottom: "fc7_2"
bottom: "label_2"
top: "loss_2"
###權重
loss_weight: 0.142857
}
layer {
name: "loss_3"
type: "SoftmaxWithLoss"
bottom: "fc7_3"
bottom: "label_3"
top: "loss_3"
###權重
loss_weight: 0.142857
}
layer {
name: "loss_4"
type: "SoftmaxWithLoss"
bottom: "fc7_4"
bottom: "label_4"
top: "loss_4"
###權重
loss_weight: 0.142857
}
layer {
name: "loss_5"
type: "SoftmaxWithLoss"
bottom: "fc7_5"
bottom: "label_5"
top: "loss_5"
###權重
loss_weight: 0.142857
}
layer {
name: "loss_6"
type: "SoftmaxWithLoss"
bottom: "fc7_6"
bottom: "label_6"
top: "loss_6"
###權重
loss_weight: 0.142857
}
layer {
name: "loss_7"
type: "SoftmaxWithLoss"
bottom: "fc7_7"
bottom: "label_7"
top: "loss_7"
###權重
loss_weight: 0.142857
}
deploy文件如下:#lpr_deploy.prototxt
name: "Lpr"
layer {
name: "data"
type: "Input"
top: "data"
input_param {
shape: {
dim: 1
dim: 3
dim: 72
dim: 272
}
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 32
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "conv2"
type: "Convolution"
bottom: "conv1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 32
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv4"
type: "Convolution"
bottom: "conv3"
top: "conv4"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv5"
type: "Convolution"
bottom: "pool4"
top: "conv5"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "conv6"
type: "Convolution"
bottom: "conv5"
top: "conv6"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "conv6"
top: "conv6"
}
layer {
name: "pool6"
type: "Pooling"
bottom: "conv6"
top: "pool6"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "flat6"
type: "Flatten"
bottom: "pool6"
top: "flat6"
flatten_param {
axis: 1
}
}
layer {
name: "drop6"
type: "Dropout"
bottom: "flat6"
top: "flat6"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7_1"
type: "InnerProduct"
bottom: "flat6"
top: "fc7_1"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 65
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "fc7_2"
type: "InnerProduct"
bottom: "flat6"
top: "fc7_2"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 65
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "fc7_3"
type: "InnerProduct"
bottom: "flat6"
top: "fc7_3"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 65
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "fc7_4"
type: "InnerProduct"
bottom: "flat6"
top: "fc7_4"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 65
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "fc7_5"
type: "InnerProduct"
bottom: "flat6"
top: "fc7_5"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 65
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "fc7_6"
type: "InnerProduct"
bottom: "flat6"
top: "fc7_6"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 65
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "fc7_7"
type: "InnerProduct"
bottom: "flat6"
top: "fc7_7"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 65
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "prob_1"
type: "Softmax"
bottom: "fc7_1"
top: "prob_1"
}
layer {
name: "prob_2"
type: "Softmax"
bottom: "fc7_2"
top: "prob_2"
}
layer {
name: "prob_3"
type: "Softmax"
bottom: "fc7_3"
top: "prob_3"
}
layer {
name: "prob_4"
type: "Softmax"
bottom: "fc7_4"
top: "prob_4"
}
layer {
name: "prob_5"
type: "Softmax"
bottom: "fc7_5"
top: "prob_5"
}
layer {
name: "prob_6"
type: "Softmax"
bottom: "fc7_6"
top: "prob_6"
}
layer {
name: "prob_7"
type: "Softmax"
bottom: "fc7_7"
top: "prob_7"
}
solver文件如下:#My solver prototxt
net: "/home/jyang/caffe/LPR/Proto/lpr_train_val.prototxt"
test_iter: 338 #10815(張測試圖片)/32(batch_size) 取整得338
test_interval: 2236 #71547(張訓練圖片)/32(batch_size)取整得2236,即2236次迭代後開始一次測試
base_lr: 0.01
display: 100
max_iter: 111800 #50個epoch,50*2236=111800,最大迭代次數爲111800
lr_policy: "step"
gamma: 0.1
stepsize: 8000
momentum: 0.9
weight_decay: 0.0005
snapshot: 20000 #20000次迭代保存一次caffemodel
snapshot_prefix: "/home/jyang/caffe/LPR/lpr"
solver_mode: GPU
snapshot_format: BINARYPROTO
三、訓練模型這裏就不畫出loss函數了,在LPR文件夾下創建lpr_train.py。
#lpr_train.py
#!/usr/bin/env python
#coding=utf-8
import caffe
if __name__ =='__main__':
solver_file = '/home/jyang/caffe/LPR/Proto/lpr_solver.prototxt'
caffe.set_device(0) #select GPU-0
caffe.set_mode_gpu()
solver = caffe.SGDSolver(solver_file)
solver.solve()
四、模型訓練結果執行該 lpr_train.py 文件,即開始訓練,可看到在驗證集上的準確率如下:
可看到第一個字符的識別率較爲低,只有85%左右,其餘的均在93%以上
五、使用訓練得的模型做預測:
由於這裏用的是python 接口,故先將之前的均值文件Mean.binaryproto 轉爲 mean.npy ,在Mean文件夾下新建 binToNpy.py ,使用以下代碼轉換
import numpy as np
import caffe
import sys
blob = caffe.proto.caffe_pb2.BlobProto()
data = open( 'mean.binaryproto' , 'rb' ).read()
blob.ParseFromString(data)
arr = np.array( caffe.io.blobproto_to_array(blob) )
out = arr[0]
np.save( 'mean.npy' , out )
這樣deploy文件、均值文件、 caffemodel文件準備好了,在LPR下創建 predict.py ,載入一張圖片作預測
#!/usr/bin/env python
#coding=utf-8
import cv2
import numpy as np
import sys,os
import time
import caffe
caffe_root = '/home/jyang/caffe/'
net_file = caffe_root + 'LPR/Proto/lpr_deploy.prototxt'
caffe_model = caffe_root + 'LPR/lpr_iter_40000.caffemodel'
mean_file = caffe_root + 'LPR/Mean/mean.npy'
img_path = caffe_root + 'LPR/001.png' #圖片路徑
labels = {0 :"京", 1 :"滬", 2 :"津", 3 :"渝",4 : "冀" , 5: "晉",6: "蒙", 7: "遼",8: "吉",9: "黑",10: "蘇",11: "浙",12: "皖",13:
"閩",14: "贛",15: "魯",16: "豫",17: "鄂",18: "湘",19: "粵",20: "桂", 21: "瓊",22: "川",23: "貴",24: "雲",
25: "藏",26: "陝",27: "甘",28: "青",29: "寧",30: "新",31: "0",32: "1",33: "2",34: "3",35: "4",36: "5",
37: "6",38: "7",39: "8",40: "9",41: "A",42: "B",43: "C",44: "D",45: "E",46: "F",47: "G",48: "H",
49: "J",50: "K",51: "L",52: "M",53: "N",54: "P",55: "Q",56: "R",57: "S",58: "T",59: "U",60: "V",
61: "W",62: "X",63: "Y",64: "Z" };
if __name__=='__main__':
net=caffe.Net(net_file,caffe_model,caffe.TEST)
transformer=caffe.io.Transformer({'data':net.blobs['data'].data.shape})
transformer.set_transpose('data' ,(2, 0, 1) )
#讀入的是H*W*C(0,1,2),但我們需要的是C*H*W(2,0,1 )
transformer.set_mean('data', np.load(mean_file).mean(1).mean(1) )
transformer.set_raw_scale('data' , 255)
#把數據從[0-1] rescale 至 [0-255]
transformer.set_channel_swap('data' ,(2 ,1 , 0))
#在caffe中讀入是BGR(0,1,2),所以要將RGB轉化爲BGR(2,1,0)
start = time.time()
img=caffe.io.load_image(img_path )
img=img[...,::-1]
net.blobs['data'].data[...]=transformer.preprocess('data' , img)
out=net.forward()
prob=('prob_1','prob_2','prob_3','prob_4','prob_5','prob_6','prob_7')
for k in range(7):
index = net.blobs[prob[k]].data[0].flatten().argsort()[-1:-6:-1]
print labels[index[0]],
print("\nDone in %.2f s." % (time.time()-start ))
cv2.imshow( 'demo',img)
cv2.waitKey(0)
預測結果結語
實際測試圖片,發現完全正確識別的準確率很低,雖然訓練得到的模型在驗證集上的識別準確率很高,但是訓練集和驗證集都是經過樣本增強得到的,3922張擴充至80000多張,擴充的樣本和真實樣本還是存在差距,且即是擴充再多,樣本信息還是有限的,導致過擬和了,如果能獲得幾萬張真實的車牌圖片,所訓練出的模型實用性將會更高。