百度飛槳(4)—— 車牌識別

前言

本節學習車牌識別,小改lenet-5網絡。
視屏教程:百度飛漿

存在的問題

本地用pycharm讀文件順序和AI studio讀取順序不同… …導致加載本地匹配碼總是錯位,無奈只能靜態的存匹配關係了。但是這樣適配到別的機器上就要修改test_list.list和train_list.list文件裏面的路徑了,可以使用pycharm的replace_all功能替換。

網絡搭建

參考上一節的Lenet-5網絡分析:百度飛槳(3)—— 手勢識別
本節使用的數據是20 × 20大小的單通道圖片,所以輸入層相對於上一節代碼需要做變動。
網絡代碼:

#定義網絡
class MyLeNet(fluid.dygraph.Layer):
    def __init__(self):
        super(MyLeNet, self).__init__()
        self.c1 = Conv2D(1, 6, 5, 1)
        self.s2 = Pool2D(pool_size=2, pool_type='max', pool_stride=1)
        self.c3 = Conv2D(6, 16, 5, 1)
        self.s4 = Pool2D(pool_size=2, pool_type='max', pool_stride=1)
        self.c5 = Conv2D(16, 120, 10, 1)
        self.f6 = Linear(120, 84, act='relu')
        self.f7 = Linear(84, 65, act='softmax')


    def forward(self, input):
        print("input shape : " + str(input.shape))
        x = self.c1(input)
        print("C1 : " + str(x.shape))
        x = self.s2(x)
        print("S2 : " + str(x.shape))
        x = self.c3(x)
        print("C3 : " + str(x.shape))
        x = self.s4(x)
        print("S4 : " + str(x.shape))
        x = self.c5(x)
        print("C5 : " + str(x.shape))
        x = fluid.layers.reshape(x, shape=[-1, 120])
        print(x.shape)
        x = self.f6(x)
        y = self.f7(x)
        return y

這裏打印查看下每一層的形狀:

input shape : [128, 1, 20, 20]
C1 : [128, 6, 16, 16]
S2 : [128, 6, 15, 15]
C3 : [128, 16, 11, 11]
S4 : [128, 16, 10, 10]
C5 : [128, 120, 1, 1]
[128, 120]

對於C5層圖片尺寸必須是1 × 1的,不然形狀變換成一維的就會報錯,這句話self.c5 = Conv2D(16, 120, 10, 1)中的10就是:

10 - x + 1 = 1

求解出來的x的值,上面式子的參數10就是S4層的形狀10 × 10中的10,等號右邊的1是輸出尺寸爲1 × 1的1,左邊的1是一個固定值,所以就知道如何配置C5層的參數了。如果C5層輸出尺寸不爲1就會報錯如下:

----------------------
Error Message Summary:
----------------------
InvalidArgumentError: Input(X) and Input(Label) shall have the same shape except the last dimension. But received: the shape of Input(X) is [512, 65], the shape of Input(Label) is [128, 1].
  [Hint: Expected framework::slice_ddim(x_dims, 0, rank - 1) == framework::slice_ddim(label_dims, 0, rank - 1), but received framework::slice_ddim(x_dims, 0, rank - 1):512 != framework::slice_ddim(label_dims, 0, rank - 1):128.] at (/paddle/paddle/fluid/operators/cross_entropy_op.cc:49)

訓練結果

在這裏插入圖片描述

識別結果

在這裏插入圖片描述

完整代碼

訓練文件的代碼carIDTrain.py:

#導入需要的包
# -*- coding: utf-8 -*-
import numpy as np
import paddle as paddle
import paddle.fluid as fluid
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import os
from multiprocessing import cpu_count
from paddle.fluid.dygraph import Pool2D,Conv2D
from paddle.fluid.dygraph import Linear


def makePictureList(data_path):
    # 生成車牌字符圖像列表
    character_folders = os.listdir(data_path)
    label = 0
    LABEL_temp = {}
    if (os.path.exists('./train_data.list')):
        os.remove('./train_data.list')
    if (os.path.exists('./test_data.list')):
        os.remove('./test_data.list')
    for character_folder in character_folders:
        with open('./train_data.list', 'a') as f_train:
            with open('./test_data.list', 'a') as f_test:
                if character_folder == '.DS_Store' or character_folder == '.ipynb_checkpoints' or character_folder == 'data23617':
                    continue
                print(character_folder + " " + str(label))
                LABEL_temp[str(label)] = character_folder  # 存儲一下標籤的對應關係
                character_imgs = os.listdir(os.path.join(data_path, character_folder))
                for i in range(len(character_imgs)):
                    if i % 10 == 0:
                        f_test.write(
                            os.path.join(os.path.join(data_path, character_folder), character_imgs[i]) + "\t" + str(
                                label) + '\n')
                    else:
                        f_train.write(
                            os.path.join(os.path.join(data_path, character_folder), character_imgs[i]) + "\t" + str(
                                label) + '\n')
        label = label + 1
    print('圖像列表已生成')
    return LABEL_temp


# 用上一步生成的圖像列表定義車牌字符訓練集和測試集的reader
def data_mapper(sample):
    img, label = sample
    path = img
    img = paddle.dataset.image.load_image(file=img, is_color=False)
    try:
        img = img.flatten().astype('float32') / 255.0
    except:
        print(path)
    return img, label


def data_reader(data_list_path):
    def reader():
        with open(data_list_path, 'r') as f:
            lines = f.readlines()
            for line in lines:
                img, label = line.split('\t')
                yield img, int(label)
    return paddle.reader.xmap_readers(data_mapper, reader, cpu_count(), 1024)


#定義網絡
class MyLeNet(fluid.dygraph.Layer):
    def __init__(self):
        super(MyLeNet, self).__init__()
        self.c1 = Conv2D(1, 6, 5, 1)
        self.s2 = Pool2D(pool_size=2, pool_type='max', pool_stride=1)
        self.c3 = Conv2D(6, 16, 5, 1)
        self.s4 = Pool2D(pool_size=2, pool_type='max', pool_stride=1)
        self.c5 = Conv2D(16, 120, 10, 1)
        self.f6 = Linear(120, 84, act='relu')
        self.f7 = Linear(84, 65, act='softmax')


    def forward(self, input):
        print("input shape : " + str(input.shape))
        x = self.c1(input)
        print("C1 : " + str(x.shape))
        x = self.s2(x)
        print("S2 : " + str(x.shape))
        x = self.c3(x)
        print("C3 : " + str(x.shape))
        x = self.s4(x)
        print("S4 : " + str(x.shape))
        x = self.c5(x)
        print("C5 : " + str(x.shape))
        x = fluid.layers.reshape(x, shape=[-1, 120])
        print(x.shape)
        x = self.f6(x)
        y = self.f7(x)
        return y


def load_image(path):
    img = paddle.dataset.image.load_image(file=path, is_color=False)
    img = img.astype('float32')
    img = img[np.newaxis,] / 255.0
    return img


if __name__ == '__main__':
    #data_path = "/home/xmy/PycharmProjects/test/paddle/data/characterData"
    #LABEL_temp = makePictureList(data_path)

    # 用於訓練的數據提供器
    train_reader = paddle.batch(reader=paddle.reader.shuffle(reader=data_reader('./train_data.list'), buf_size=512),
                                batch_size=128)
    # 用於測試的數據提供器
    test_reader = paddle.batch(reader=data_reader('./test_data.list'), batch_size=128)

    with fluid.dygraph.guard():
        model = MyLeNet()  # 模型實例化
        model.train()  # 訓練模式
        opt = fluid.optimizer.SGDOptimizer(learning_rate=0.01,
                                           parameter_list=model.parameters())  # 優化器選用SGD隨機梯度下降,學習率爲0.001.
        epochs_num = 40  # 迭代次數爲20(可在此處進行調參)

        for pass_num in range(epochs_num):

            for batch_id, data in enumerate(train_reader()):
                images = np.array([x[0].reshape(1, 20, 20) for x in data], np.float32)
                labels = np.array([x[1] for x in data]).astype('int64')
                labels = labels[:, np.newaxis]
                image = fluid.dygraph.to_variable(images)
                label = fluid.dygraph.to_variable(labels)

                predict = model(image)  # 使用訓練好的模型進行預測

                loss = fluid.layers.cross_entropy(predict, label)
                avg_loss = fluid.layers.mean(loss)  # 獲取avg_loss值

                acc = fluid.layers.accuracy(predict, label)  # 計算精度

                if batch_id != 0 and batch_id % 50 == 0:
                    print("train_pass:{},batch_id:{},train_loss:{},train_acc:{}".format(pass_num, batch_id,
                                                                                        avg_loss.numpy(), acc.numpy()))

                avg_loss.backward()
                opt.minimize(avg_loss)
                model.clear_gradients()

        fluid.save_dygraph(model.state_dict(), 'MyLeNet')  # 保存模型

    # 模型校驗
    with fluid.dygraph.guard():
        accs = []
        model = MyLeNet()  # 模型實例化
        model_dict, _ = fluid.load_dygraph('MyLeNet')
        model.load_dict(model_dict)  # 加載模型參數
        model.eval()  # 評估模式
        for batch_id, data in enumerate(test_reader()):  # 測試集
            images = np.array([x[0].reshape(1, 20, 20) for x in data], np.float32)
            labels = np.array([x[1] for x in data]).astype('int64')
            labels = labels[:, np.newaxis]

            image = fluid.dygraph.to_variable(images)
            label = fluid.dygraph.to_variable(labels)

            predict = model(image)  # 預測
            acc = fluid.layers.accuracy(predict, label)
            accs.append(acc.numpy()[0])
            avg_acc = np.mean(accs)
        print(avg_acc)

識別的代碼carIDRecognize.py:

#導入需要的包

import numpy as np
import paddle as paddle
import paddle.fluid as fluid
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import os
from carIDTrain import MyLeNet
from multiprocessing import cpu_count
from paddle.fluid.dygraph import Pool2D,Conv2D
from paddle.fluid.dygraph import Linear

def load_image(path):
    img = paddle.dataset.image.load_image(file=path, is_color=False)
    img = img.astype('float32')
    img = img[np.newaxis,] / 255.0
    return img

# 對車牌圖片進行處理,分割出車牌中的每一個字符並保存
license_plate = cv2.imread('車牌.png')
gray_plate = cv2.cvtColor(license_plate, cv2.COLOR_RGB2GRAY)
ret, binary_plate = cv2.threshold(gray_plate, 175, 255, cv2.THRESH_BINARY)
result = []
for col in range(binary_plate.shape[1]):
    result.append(0)
    for row in range(binary_plate.shape[0]):
        result[col] = result[col] + binary_plate[row][col] / 255
character_dict = {}
num = 0
i = 0
while i < len(result):
    if result[i] == 0:
        i += 1
    else:
        index = i + 1
        while result[index] != 0:
            index += 1
        character_dict[num] = [i, index - 1]
        num += 1
        i = index

for i in range(8):
    if i == 2:
        continue
    padding = (170 - (character_dict[i][1] - character_dict[i][0])) / 2
    ndarray = np.pad(binary_plate[:, character_dict[i][0]:character_dict[i][1]], ((0, 0), (int(padding), int(padding))),
                     'constant', constant_values=(0, 0))
    ndarray = cv2.resize(ndarray, (20, 20))
    cv2.imwrite('./' + str(i) + '.png', ndarray)


#將標籤進行轉換
LABEL ={'0': 'A', '1': '新', '2': '陝', '3': 'B', '4': '津', '5': 'W', '6': 'E', '7': 'P', '8': '8', '9': 'D', '10': 'M', '11': '貴', '12': '1', '13': 'Z', '14': '吉', '15': 'F', '16': '遼', '17': 'H', '18': '蒙', '19': '豫', '20': '皖', '21': 'L', '22': '冀', '23': '瓊', '24': '黑', '25': 'X', '26': '浙', '27': '5', '28': '6', '29': 'R', '30': '滬', '31': '鄂', '32': '藏', '33': 'C', '34': 'T', '35': '贛', '36': 'J', '37': 'Q', '38': 'G', '39': '川', '40': 'U', '41': '魯', '42': '2', '43': 'K', '44': '渝', '45': '蘇', '46': '7', '47': '雲', '48': '晉', '49': '寧', '50': 'S', '51': '閩', '52': '9', '53': 'N', '54': '0', '55': '粵', '56': '桂', '57': '4', '58': 'V', '59': 'Y', '60': '青', '61': '京', '62': '湘', '63': '甘', '64': '3'}

#構建預測動態圖過程
with fluid.dygraph.guard():
    model=MyLeNet()#模型實例化
    model_dict,_=fluid.load_dygraph('/home/xmy/PycharmProjects/test/paddle/proj2_carIDRecongnize/MyLeNet')
    model.load_dict(model_dict)#加載模型參數
    model.eval()#評估模式
    lab=[]
    for i in range(8):
        if i==2:
            continue
        infer_imgs = []
        infer_imgs.append(load_image('./' + str(i) + '.png'))
        infer_imgs = np.array(infer_imgs)
        infer_imgs = fluid.dygraph.to_variable(infer_imgs)
        result=model(infer_imgs)
        lab.append(np.argmax(result.numpy()))

cv2.imshow("carID",cv2.imread("車牌.png"))
print("車牌原來結果:魯A686EJ")
print('車牌識別結果:',end='')
for i in range(len(lab)):
    print(LABEL[str(lab[i])],end='')

工程下載

工程下載地址

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章