Day03 使用paddle進行車牌識別

繼續我們的百度paddle學習,這次我們我們的項目跟上次其實差別不大,不過這次的數據圖片分辨率比較小,而且類別更多,我們我們接下來學習一下微調AlexNet,使得它更加適合我們的數據集訓練

還是那句話深度學習不外乎四個步驟:

1. 數據標籤處理
2. 構建網絡模型
3. 規劃網絡超參
4. 訓練評估模型

首先導入需要的庫

import numpy as np
import paddle as paddle
import paddle.fluid as fluid
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import os
from multiprocessing import cpu_count
from paddle.fluid.dygraph import Pool2D,Conv2D
from paddle.fluid.dygraph import Linear

一數據標籤處理

paddle爲大家準備的車牌數據集有65個類別,每個類別有500±張黑白圖片,分辨率爲20x20.數據集鏈接

首先是數據集處理

# 生成車牌字符圖像列表
data_path = 'dataset'#此處爲你的數據集解壓路徑
character_folders = os.listdir(data_path)
label = 0
LABEL_temp = {}
if(os.path.exists('./train_data.list')):
    os.remove('./train_data.list')
if(os.path.exists('./test_data.list')):
    os.remove('./test_data.list')
for character_folder in character_folders:
    with open('./train_data.list', 'a') as f_train:
        with open('./test_data.list', 'a') as f_test:
            if character_folder == '.DS_Store' or character_folder == '.ipynb_checkpoints' or character_folder == 'data23617':
                continue
            print(character_folder + " " + str(label))
            LABEL_temp[str(label)] = character_folder #存儲一下標籤的對應關係
            character_imgs = os.listdir(os.path.join(data_path, character_folder))
            for i in range(len(character_imgs)):
                if i%10 == 0: 
                    f_test.write(os.path.join(os.path.join(data_path, character_folder), character_imgs[i]) + "\t" + str(label) + '\n')
                else:
                    f_train.write(os.path.join(os.path.join(data_path, character_folder), character_imgs[i]) + "\t" + str(label) + '\n')
    label = label + 1
print('圖像列表已生成')

講圖片路徑及文件夾標籤寫到list文件方便讀取,接下來使用paddle的reader模塊製作訓練集和測試集

# 用上一步生成的圖像列表定義車牌字符訓練集和測試集的reader
def data_mapper(sample):
    img, label = sample
    img = paddle.dataset.image.load_image(file=img, is_color=False)
    img = img.flatten().astype('float32') / 255.0
    return img, label
def data_reader(data_list_path):
    def reader():
        with open(data_list_path, 'r') as f:
            lines = f.readlines()
            for line in lines:
                img, label = line.split('\t')
                yield img, int(label)
    return paddle.reader.xmap_readers(data_mapper, reader, cpu_count(), 1024)


# 用於訓練的數據提供器
train_reader = paddle.batch(reader=paddle.reader.shuffle(reader=data_reader('./train_data.list'), buf_size=4096), batch_size=128)
# 用於測試的數據提供器
test_reader = paddle.batch(reader=data_reader('./test_data.list'), batch_size=128)

由於數據集較手勢識別更加龐大,這裏的我把buf_size設爲4096,batch_size設置爲128

構建神經網絡

這裏我們以典型的AlexNet構建我們的神經網絡結構,並進行微調
構建代碼如下:


#定義CNN網絡
class AlexNet(fluid.dygraph.Layer):
    def __init__(self, name_scope, num_classes=65):
        super(AlexNet, self).__init__(name_scope)
        name_scope = self.full_name()
		#在conv1中我將filter_size改成了3,stride改爲了2,padding改爲2,目的是使得圖片卷積後尺度不變,(20+2*2(padding參數)-3(filter_size))/2(stride)+1 = 11(10.5+1向下取整)其他地方對照conv1
        self.conv1 = Conv2D(num_channels=1, num_filters=96, filter_size=3, stride=2, padding=2, act='relu')
        self.pool1 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
        self.conv2 = Conv2D(num_channels=96, num_filters=256, filter_size=3, stride=1, padding=2, act='relu')
        self.pool2 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
        self.conv3 = Conv2D(num_channels=256, num_filters=384, filter_size=3, stride=1, padding=2, act='relu')
        self.conv4 = Conv2D(num_channels=384, num_filters=384, filter_size=3, stride=1, padding=2, act='relu')
        self.conv5 = Conv2D(num_channels=384, num_filters=256, filter_size=3, stride=1, padding=2, act='relu')
        self.pool5 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')

        self.fc1 = Linear(input_dim=4096, output_dim=4096, act='relu')
        self.drop_ratio1 = 0.5
        self.fc2 = Linear(input_dim=4096, output_dim=4096, act='relu')
        self.drop_ratio2 = 0.5
        self.fc3 = Linear(input_dim=4096, output_dim=num_classes)

        
    def forward(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.pool5(x)
        x = fluid.layers.reshape(x, [x.shape[0], -1])
        x = self.fc1(x)
        # 在全連接之後使用dropout抑制過擬合
        x= fluid.layers.dropout(x, self.drop_ratio1)
        x = self.fc2(x)
        # 在全連接之後使用dropout抑制過擬合
        x = fluid.layers.dropout(x, self.drop_ratio2)
        x = self.fc3(x)
        return x

規劃網絡超參和訓練評估模型

這裏我們的優化器是

MomentumOptimizer

,損失函數是

softmax_with_cross_entropy

訓練了100輪次

with fluid.dygraph.guard():
    #model=MyLeNet() #模型實例化
    model = AlexNet('AlexNet')
    model.train() #訓練模式
    opt = fluid.optimizer.MomentumOptimizer(learning_rate=0.01,momentum=0.9, parameter_list=model.parameters())
    #opt=fluid.optimizer.AdamOptimizer(learning_rate=0.001, parameter_list=model.parameters())#優化器選用SGD隨機梯度下降,學習率爲0.001.
    epochs_num= 100#迭代次數爲2
    
    for pass_num in range(epochs_num):
        
        for batch_id,data in enumerate(train_reader()):
            images=np.array([x[0].reshape(1,20,20) for x in data],np.float32)
            labels = np.array([x[1] for x in data]).astype('int64')
            labels = labels[:, np.newaxis]
            image=fluid.dygraph.to_variable(images)
            label=fluid.dygraph.to_variable(labels)
            
            predict=model(image)#預測
            
            loss=fluid.layers.softmax_with_cross_entropy(predict,label)
            avg_loss=fluid.layers.mean(loss)#獲取loss值
            
            acc=fluid.layers.accuracy(predict,label)#計算精度


            if batch_id!=0 and batch_id%100==0:
                print("train_pass:{},batch_id:{},train_loss:{},train_acc:{}".format(pass_num,batch_id,avg_loss.numpy(),acc.numpy()))
            
        
            avg_loss.backward()
            opt.minimize(avg_loss)
            model.clear_gradients()            
            
    fluid.save_dygraph(model.state_dict(),'AlexNet')#保存模型


#模型校驗
with fluid.dygraph.guard():
    accs = []
    #model=MyLeNet()#模型實例化
    model = AlexNet('AlexNet')
    model_dict,_=fluid.load_dygraph('AlexNet')
    model.load_dict(model_dict)#加載模型參數
    model.eval()#評估模式
    for batch_id,data in enumerate(test_reader()):#測試集
        images=np.array([x[0].reshape(1,20,20) for x in data],np.float32)
        labels = np.array([x[1] for x in data]).astype('int64')
        labels = labels[:, np.newaxis]
            
        image=fluid.dygraph.to_variable(images)
        label=fluid.dygraph.to_variable(labels)
            
        predict=model(image)#預測
        acc=fluid.layers.accuracy(predict,label)
        accs.append(acc.numpy()[0])
        avg_acc = np.mean(accs)
    print(avg_acc)

到此整個訓練過程就結束了~
我訓練了100個epoch的結果是97%,大家可以嘗試着更高的精度,比如換不同的優化器,多訓練幾個epoch,增大圖像分辨率等等

接下來是對車牌圖片的分割和標籤的對應,我就不解釋了直接貼代碼

# 對車牌圖片進行處理,分割出車牌中的每一個字符並保存
license_plate = cv2.imread('./車牌.png')
gray_plate = cv2.cvtColor(license_plate, cv2.COLOR_RGB2GRAY)
ret, binary_plate = cv2.threshold(gray_plate, 175, 255, cv2.THRESH_BINARY)
result = []
for col in range(binary_plate.shape[1]):
    result.append(0)
    for row in range(binary_plate.shape[0]):
        result[col] = result[col] + binary_plate[row][col]/255
character_dict = {}
num = 0
i = 0
while i < len(result):
    if result[i] == 0:
        i += 1
    else:
        index = i + 1
        while result[index] != 0:
            index += 1
        character_dict[num] = [i, index-1]
        num += 1
        i = index

for i in range(8):
    if i==2:
        continue
    padding = (170 - (character_dict[i][1] - character_dict[i][0])) / 2
    ndarray = np.pad(binary_plate[:,character_dict[i][0]:character_dict[i][1]], ((0,0), (int(padding), int(padding))), 'constant', constant_values=(0,0))
    ndarray = cv2.resize(ndarray, (20,20))
    cv2.imwrite('./' + str(i) + '.png', ndarray)
    
def load_image(path):
    img = paddle.dataset.image.load_image(file=path, is_color=False)
    img = img.astype('float32')
    img = img[np.newaxis, ] / 255.0
    return img


#將標籤進行轉換
print('Label:',LABEL_temp)
match = {'A':'A','B':'B','C':'C','D':'D','E':'E','F':'F','G':'G','H':'H','I':'I','J':'J','K':'K','L':'L','M':'M','N':'N',
        'O':'O','P':'P','Q':'Q','R':'R','S':'S','T':'T','U':'U','V':'V','W':'W','X':'X','Y':'Y','Z':'Z',
        'yun':'雲','cuan':'川','hei':'黑','zhe':'浙','ning':'寧','jin':'津','gan':'贛','hu':'滬','liao':'遼','jl':'吉','qing':'青','zang':'藏',
        'e1':'鄂','meng':'蒙','gan1':'甘','qiong':'瓊','shan':'陝','min':'閩','su':'蘇','xin':'新','wan':'皖','jing':'京','xiang':'湘','gui':'貴',
        'yu1':'渝','yu':'豫','ji':'冀','yue':'粵','gui1':'桂','sx':'晉','lu':'魯',
        '0':'0','1':'1','2':'2','3':'3','4':'4','5':'5','6':'6','7':'7','8':'8','9':'9'}
L = 0
LABEL ={}

for V in LABEL_temp.values():
    LABEL[str(L)] = match[V]
    L += 1
print(LABEL)



#構建預測動態圖過程
with fluid.dygraph.guard():
    #model=MyLeNet()#模型實例化
    model = AlexNet('AlexNet')
    model_dict,_=fluid.load_dygraph('AlexNet')
    model.load_dict(model_dict)#加載模型參數
    model.eval()#評估模式
    lab=[]
    for i in range(8):
        if i==2:
            continue
        infer_imgs = []
        infer_imgs.append(load_image('./' + str(i) + '.png'))
        infer_imgs = np.array(infer_imgs)
        infer_imgs = fluid.dygraph.to_variable(infer_imgs)
        result=model(infer_imgs)
        lab.append(np.argmax(result.numpy()))
# print(lab)


display(Image.open('./車牌.png'))
print('\n車牌識別結果爲:',end='')
for i in range(len(lab)):
    print(LABEL[str(lab[i])],end='')

效果圖如下:
效果圖

心得

這次訓練在有上次手勢識別的基礎上學習不難,主要還是自己調參遇到的困難居多,比如明明訓練測試效果都不錯,偏偏圖片預測就不準。。。。。。
總之python對於模塊的封裝效果還是不錯的,整個訓練過程相對之前也就稍稍微調了一下網絡模型,整體代碼差異不大。
繼續加油 fight!fight!fight!

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章