【Tensorflow】基於卷積神經網絡實現車牌的識別

引言:

過去幾周我一直在涉足深度學習領域,尤其是卷積神經網絡模型。最近,谷歌圍繞街景多位數字識別技術發佈了一篇不錯的paper。該文章描述了一個用於提取街景門牌號的單個端到端神經網絡系統。然後,作者闡述了基於同樣的網絡結構如何來突破谷歌驗證碼識別系統的準確率。
爲了親身體驗神經網絡的實現,我決定嘗試設計一個可以解決類似問題的系統:國內車牌號自動識別系統。設計這樣一個系統的原因有3點:

  • 我應該能夠參照谷歌那篇paper搭建一個同樣的或者類似的網絡架構:谷歌提供的那個網絡架構在驗證碼識別上相當不錯,那麼講道理的話,用它來識別車牌號應該也會很給力。擁有一個知名的網絡架構將會大大地簡化我學習CNN的步驟。
  • 我可以很容易地生成訓練數據(車牌數)。訓練神經網絡存在一個很大的問題就是需要大量的標籤樣本。通常要訓練好一個網絡就需要幾十萬張標記過的圖片。 
  • 好奇心。傳統的車牌號自動識別系統依賴於自己編寫算法來實現車牌定位,標準化,分割和字符識別等功能。照這樣的話,實現這些系統的代碼可能達到上千行。然而,我比較感興趣的是,如何使用相對較少的代碼和最少的專業領域知識來開發一個不錯的系統。

開發該項目的環境要求有Python,Tensorflow,OpenCV和NumPy等軟件。源代碼在這裏

首先項目結構及成果圖:

合成圖片

genplate.py

爲了訓練任何一個神經網絡,必須提供一套擁有正確輸出的訓練數據。 

 

文本和車牌背景國內正常的車牌格式,但是文本顏色必須比車牌顏色更深一些。這是爲了模擬真實場景的光線變化。最後再加入一些噪音,這樣不僅能夠解釋真實傳感器的噪音,而且能夠避免過多依賴於銳化的輪廓邊界而看到的將會是離焦的輸入圖片。font目錄導入了字體包、image導入了車牌背景及噪聲圖片。

 

車牌變換採用了一種基於隨機滾轉、傾斜、偏轉、平移以及縮放的仿射變換。每個參數允許的範圍是車牌號可能被看到的所有情況的集合。比如,偏轉比滾轉允許變化更多(你更可能看到一輛汽車在拐彎而不是翻轉到一邊)。


import PIL
from PIL import ImageFont
from PIL import Image
from PIL import ImageDraw
import cv2;
import numpy as np;
import os;
from math import *

# 生成車牌
# font = ImageFont.truetype("Arial-Bold.ttf",14)

index = {"京": 0, "滬": 1, "津": 2, "渝": 3, "冀": 4, "晉": 5, "蒙": 6, "遼": 7, "吉": 8, "黑": 9, "蘇": 10, "浙": 11, "皖": 12,
         "閩": 13, "贛": 14, "魯": 15, "豫": 16, "鄂": 17, "湘": 18, "粵": 19, "桂": 20, "瓊": 21, "川": 22, "貴": 23, "雲": 24,
         "藏": 25, "陝": 26, "甘": 27, "青": 28, "寧": 29, "新": 30, "0": 31, "1": 32, "2": 33, "3": 34, "4": 35, "5": 36,
         "6": 37, "7": 38, "8": 39, "9": 40, "A": 41, "B": 42, "C": 43, "D": 44, "E": 45, "F": 46, "G": 47, "H": 48,
         "J": 49, "K": 50, "L": 51, "M": 52, "N": 53, "P": 54, "Q": 55, "R": 56, "S": 57, "T": 58, "U": 59, "V": 60,
         "W": 61, "X": 62, "Y": 63, "Z": 64};

chars = ["京", "滬", "津", "渝", "冀", "晉", "蒙", "遼", "吉", "黑", "蘇", "浙", "皖", "閩", "贛", "魯", "豫", "鄂", "湘", "粵", "桂",
             "瓊", "川", "貴", "雲", "藏", "陝", "甘", "青", "寧", "新", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A",
             "B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "U", "V", "W", "X",
             "Y", "Z"
             ];

def AddSmudginess(img, Smu):
    rows = r(Smu.shape[0] - 50)

    cols = r(Smu.shape[1] - 50)
    adder = Smu[rows:rows + 50, cols:cols + 50];
    adder = cv2.resize(adder, (50, 50));
    #   adder = cv2.bitwise_not(adder)
    img = cv2.resize(img,(50,50))
    img = cv2.bitwise_not(img)
    img = cv2.bitwise_and(adder, img)
    img = cv2.bitwise_not(img)
    return img
def rot(img,angel,shape,max_angel):
    """ 使圖像輕微的畸變
        img 輸入圖像
        factor 畸變的參數
        size 爲圖片的目標尺寸
    """
    size_o = [shape[1],shape[0]]
    size = (shape[1]+ int(shape[0]*cos((float(max_angel )/180) * 3.14)),shape[0])
    interval = abs( int( sin((float(angel) /180) * 3.14)* shape[0]));
    pts1 = np.float32([[0,0],[0,size_o[1]],[size_o[0],0],[size_o[0],size_o[1]]])
    if(angel>0):
        pts2 = np.float32([[interval,0],[0,size[1]  ],[size[0],0  ],[size[0]-interval,size_o[1]]])
    else:
        pts2 = np.float32([[0,0],[interval,size[1]  ],[size[0]-interval,0  ],[size[0],size_o[1]]])
    M  = cv2.getPerspectiveTransform(pts1,pts2);
    dst = cv2.warpPerspective(img,M,size);
    return dst;
def rotRandrom(img, factor, size):
    shape = size;
    pts1 = np.float32([[0, 0], [0, shape[0]], [shape[1], 0], [shape[1], shape[0]]])
    pts2 = np.float32([[r(factor), r(factor)], [ r(factor), shape[0] - r(factor)], [shape[1] - r(factor),  r(factor)],
                       [shape[1] - r(factor), shape[0] - r(factor)]])
    M = cv2.getPerspectiveTransform(pts1, pts2);
    dst = cv2.warpPerspective(img, M, size);
    return dst;
def tfactor(img):
    hsv = cv2.cvtColor(img,cv2.COLOR_BGR2HSV);
    hsv[:,:,0] = hsv[:,:,0]*(0.8+ np.random.random()*0.2);
    hsv[:,:,1] = hsv[:,:,1]*(0.3+ np.random.random()*0.7);
    hsv[:,:,2] = hsv[:,:,2]*(0.2+ np.random.random()*0.8);
    img = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR);
    return img
def random_envirment(img,data_set):
    index=r(len(data_set))
    env = cv2.imread(data_set[index])
    env = cv2.resize(env,(img.shape[1],img.shape[0]))
    bak = (img==0);
    bak = bak.astype(np.uint8)*255;
    inv = cv2.bitwise_and(bak,env)
    img = cv2.bitwise_or(inv,img)
    return img
def GenCh(f,val):
    img=Image.new("RGB", (45,70),(255,255,255))
    draw = ImageDraw.Draw(img)
    draw.text((0, 3),val,(0,0,0),font=f)
    img =  img.resize((23,70))
    A = np.array(img)

    return A
def GenCh1(f,val):
    img=Image.new("RGB", (23,70),(255,255,255))
    draw = ImageDraw.Draw(img)
    #draw.text((0, 2),val.decode('utf-8'),(0,0,0),font=f)
    draw.text((0, 2),val,(0,0,0),font=f)
    A = np.array(img)
    return A
def AddGauss(img, level):
    return cv2.blur(img, (level * 2 + 1, level * 2 + 1));

def r(val):
    return int(np.random.random() * val)

def AddNoiseSingleChannel(single):
    diff = 255-single.max();
    noise = np.random.normal(0,1+r(6),single.shape);
    noise = (noise - noise.min())/(noise.max()-noise.min())
    noise= diff*noise;
    noise= noise.astype(np.uint8)
    dst = single + noise
    return dst

def addNoise(img,sdev = 0.5,avg=10):
    img[:,:,0] =  AddNoiseSingleChannel(img[:,:,0]);
    img[:,:,1] =  AddNoiseSingleChannel(img[:,:,1]);
    img[:,:,2] =  AddNoiseSingleChannel(img[:,:,2]);
    return img;

class GenPlate:
    def __init__(self,fontCh,fontEng,NoPlates):
        self.fontC =  ImageFont.truetype(fontCh,43,0);
        self.fontE =  ImageFont.truetype(fontEng,60,0);
        self.img=np.array(Image.new("RGB", (226,70),(255,255,255)))
        self.bg  = cv2.resize(cv2.imread("./images/template.bmp"),(226,70));
        self.smu = cv2.imread("./images/smu2.jpg");
        self.noplates_path = [];
        for parent,parent_folder,filenames in os.walk(NoPlates):
            for filename in filenames:
                path = parent+"/"+filename;
                self.noplates_path.append(path);
    def draw(self,val):
        offset= 2 ;
        self.img[0:70,offset+8:offset+8+23]= GenCh(self.fontC,val[0]);
        self.img[0:70,offset+8+23+6:offset+8+23+6+23]= GenCh1(self.fontE,val[1]);
        for i in range(5):
            base = offset+8+23+6+23+17 +i*23 + i*6 ;
            self.img[0:70, base  : base+23]= GenCh1(self.fontE,val[i+2]);
        return self.img
    def generate(self,text):
        if len(text) == 7:
            fg = self.draw(text.encode('utf-8').decode(encoding="utf-8"));
            fg = cv2.bitwise_not(fg);
            com = cv2.bitwise_or(fg,self.bg);
            com = rot(com,r(60)-30,com.shape,30);
            com = rotRandrom(com,10,(com.shape[1],com.shape[0]));
            #com = AddSmudginess(com,self.smu)
            com = tfactor(com)
            com = random_envirment(com,self.noplates_path);
            com = AddGauss(com, 1+r(4));
            com = addNoise(com);


            return com
    def genPlateString(self,pos,val):
        plateStr = "";
        box = [0,0,0,0,0,0,0];
        if(pos!=-1):
            box[pos]=1;
        for unit,cpos in zip(box,range(len(box))):
            if unit == 1:
                plateStr += val
            else:
                if cpos == 0:
                    plateStr += chars[r(31)]
                elif cpos == 1:
                    plateStr += chars[41+r(24)]
                else:
                    plateStr += chars[31 + r(34)]

        return plateStr;

    def genBatch(self, batchSize,pos,charRange, outputPath,size):
        if (not os.path.exists(outputPath)):
            os.mkdir(outputPath)
        for i in range(batchSize):
                plateStr = G.genPlateString(-1,-1)
                img =  G.generate(plateStr);
                img = cv2.resize(img,size);
                cv2.imwrite(outputPath + "/" + str(i).zfill(2) + ".jpg", img);

       # return img
G = GenPlate("./font/platech.ttf",'./font/platechar.ttf',"./NoPlates")
G.genBatch(15,2,range(31,65),"./plate",(272,72)) #註釋原因爲每次其他模塊運行,若導入該庫,都會刷性該函數

inputdata.py 省略....

產生用於訓練的數據
# 批量生成車牌,供算法調用

 

 

網絡結構  # 算法的核心,卷積神經網絡

使用的卷積神經網絡結構如model.py所示:

import tensorflow as tf
import numpy as np

# 算法的核心,卷積神經網絡
def inference (images,keep_prob):
    '''
    Build the model
    Args:
        image: image batch,4D tensor,tf.float32,[batch_size,height,width,channels]
    Returns:
        output tensor with the computed logits,float,[batch_size,65]
    '''
    # conv1
    with tf.variable_scope('conv1') as scope:
        weights = tf.get_variable('weights',
                                  shape = [3,3,3,32],
                                  dtype = tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
        conv = tf.nn.conv2d(images,weights,strides=[1,1,1,1],padding='VALID')
        biases = tf.get_variable('biases',
                                 shape=[32],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        pre_activation = tf.nn.bias_add(conv,biases)
        conv1 = tf.nn.relu(pre_activation,name= scope.name)

    # conv2
    with tf.variable_scope('conv2') as scope:
        weights = tf.get_variable('weights',shape=[3,3,32,32],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
        conv = tf.nn.conv2d(conv1,weights,strides=[1,1,1,1],padding='VALID')
        biases = tf.get_variable('biases',
                                 shape=[32],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        pre_activation = tf.nn.bias_add(conv,biases)
        conv2 = tf.nn.relu(pre_activation,name= scope.name)

    with tf.variable_scope('max_pooling1') as scope:
        pool1 = tf.nn.max_pool(conv2,ksize = [1,2,2,1],strides= [1,2,2,1],padding='VALID',name='pooling1')

    #conv3
    with tf.variable_scope('conv3') as scope:
        weights = tf.get_variable('weights',shape=[3,3,32,64],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
        conv = tf.nn.conv2d(pool1,weights,strides=[1,1,1,1],padding='VALID')
        biases = tf.get_variable('biases',shape=[64],dtype = tf.float32,initializer= tf.constant_initializer(0.1))
        pre_activation = tf.nn.bias_add(conv,biases)
        conv3 = tf.nn.relu(pre_activation,name=scope.name)

    #conv4
    with tf.variable_scope('conv4') as scope:
        weights = tf.get_variable('weights',shape=[3,3,64,64],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
        conv =tf.nn.conv2d(conv3,weights,strides=[1,1,1,1],padding='VALID')
        biases = tf.get_variable('biases',shape=[64],dtype=tf.float32,initializer=tf.constant_initializer(0.1))
        pre_activation = tf.nn.bias_add(conv,biases)
        conv4 = tf.nn.relu(pre_activation,name=scope.name)

    with tf.variable_scope('max_pooling2') as scope:
        pool2 = tf.nn.max_pool(conv4,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID',name='pooling2')

    #conv5
    with tf.variable_scope('conv5') as scope:
        weights = tf.get_variable('weights',shape=[3,3,64,128],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
        conv =tf.nn.conv2d(pool2,weights,strides=[1,1,1,1],padding='VALID')
        biases = tf.get_variable('biases',shape=[128],dtype=tf.float32,initializer=tf.constant_initializer(0.1))
        pre_activation = tf.nn.bias_add(conv,biases)
        conv5 = tf.nn.relu(pre_activation,name=scope.name)

    #conv6
    with tf.variable_scope('conv6') as scope:
        weights = tf.get_variable('weights',shape=[3,3,128,128],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
        conv =tf.nn.conv2d(conv5,weights,strides=[1,1,1,1],padding='VALID')
        biases = tf.get_variable('biases',shape=[128],dtype=tf.float32,initializer=tf.constant_initializer(0.1))
        pre_activation = tf.nn.bias_add(conv,biases)
        conv6 = tf.nn.relu(pre_activation,name=scope.name)

    #pool3
    with tf.variable_scope('max_pool3') as scope:
         pool3 = tf.nn.max_pool(conv6,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID',name='pool3')
    #%%
    #fc1_flatten
    with tf.variable_scope('fc1') as scope:
        shp = pool3.get_shape()
        flattened_shape =shp[1].value*shp[2].value*shp[3].value
        reshape = tf.reshape(pool3,[-1,flattened_shape])
        fc1 = tf.nn.dropout(reshape,keep_prob,name='fc1_dropdot')

    # 全連接層
    # 第一個全連接層,識別車牌上的省
    with tf.variable_scope('fc21') as scope:
        weights = tf.get_variable('weights',
                                  shape=[flattened_shape,65],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))

        biases = tf.get_variable('biases',
                                 shape=[65],
                                 dtype=tf.float32,
                                 initializer = tf.truncated_normal_initializer(0.1)
                                 )
        fc21 = tf.matmul(fc1,weights)+biases
    # 第二個全連接層,識別車牌上的市
    with tf.variable_scope('fc22') as scope:
        weights = tf.get_variable('weights',
                                  shape=[flattened_shape,65],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))

        biases = tf.get_variable('biases',
                                 shape=[65],
                                 dtype=tf.float32,
                                 initializer = tf.truncated_normal_initializer(0.1)
                                 )
        fc22 = tf.matmul(fc1,weights)+biases
    # 第三個全連接層,識別車牌第三位的字母或者數字
    with tf.variable_scope('fc23') as scope:
        weights = tf.get_variable('weights',
                                  shape=[flattened_shape,65],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))

        biases = tf.get_variable('biases',
                                 shape=[65],
                                 dtype=tf.float32,
                                 initializer = tf.truncated_normal_initializer(0.1)
                                 )
        fc23= tf.matmul(fc1,weights)+biases
    with tf.variable_scope('fc24') as scope:
        weights = tf.get_variable('weights',
                                  shape=[flattened_shape,65],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))

        biases = tf.get_variable('biases',
                                 shape=[65],
                                 dtype=tf.float32,
                                 initializer = tf.truncated_normal_initializer(0.1)
                                 )
        fc24 = tf.matmul(fc1,weights)+biases
    with tf.variable_scope('fc25') as scope:
        weights = tf.get_variable('weights',
                                  shape=[flattened_shape,65],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))

        biases = tf.get_variable('biases',
                                 shape=[65],
                                 dtype=tf.float32,
                                 initializer = tf.truncated_normal_initializer(0.1)
                                 )
        fc25 = tf.matmul(fc1,weights)+biases
    with tf.variable_scope('fc26') as scope:
        weights = tf.get_variable('weights',
                                  shape=[flattened_shape,65],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))

        biases = tf.get_variable('biases',
                                 shape=[65],
                                 dtype=tf.float32,
                                 initializer = tf.truncated_normal_initializer(0.1)
                                 )
        fc26 = tf.matmul(fc1,weights)+biases
    with tf.variable_scope('fc27') as scope:
        weights = tf.get_variable('weights',
                                  shape=[flattened_shape,65],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))

        biases = tf.get_variable('biases',
                                 shape=[65],
                                 dtype=tf.float32,
                                 initializer = tf.truncated_normal_initializer(0.1)
                                 )
        fc27 = tf.matmul(fc1,weights)+biases

    return fc21,fc22,fc23,fc24,fc25,fc26,fc27    #shape = [7,batch_size,65]

# 卷積神經網絡返回的輸出層,進行交叉熵計算
def losses(logits1,logits2,logits3,logits4,logits5,logits6,logits7,labels):
    '''Compute loss from logits and labels
    Args:
        logits: logits tensor, float, [7*batch_size, 65]
        labels: label tensor, tf.int32, [7*batch_size]

    Returns:
        loss tensor of float type
    '''
    labels = tf.convert_to_tensor(labels,tf.int32)

    with tf.variable_scope('loss1') as scope:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits1, labels=labels[:,0], name='xentropy_per_example')
        #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
        loss1 = tf.reduce_mean(cross_entropy, name='loss1')
        tf.summary.scalar(scope.name+'/loss1', loss1)

    with tf.variable_scope('loss2') as scope:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits2, labels=labels[:,1], name='xentropy_per_example')
        #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
        loss2 = tf.reduce_mean(cross_entropy, name='loss2')
        tf.summary.scalar(scope.name+'/loss2', loss2)

    with tf.variable_scope('loss3') as scope:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits3, labels=labels[:,2], name='xentropy_per_example')
        #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
        loss3 = tf.reduce_mean(cross_entropy, name='loss3')
        tf.summary.scalar(scope.name+'/loss3', loss3)

    with tf.variable_scope('loss4') as scope:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits4, labels=labels[:,3], name='xentropy_per_example')
        #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
        loss4 = tf.reduce_mean(cross_entropy, name='loss4')
        tf.summary.scalar(scope.name+'/loss4', loss4)

    with tf.variable_scope('loss5') as scope:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits5, labels=labels[:,4], name='xentropy_per_example')
        #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
        loss5 = tf.reduce_mean(cross_entropy, name='loss5')
        tf.summary.scalar(scope.name+'/loss5', loss5)

    with tf.variable_scope('loss6') as scope:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits6, labels=labels[:,5], name='xentropy_per_example')
        #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
        loss6 = tf.reduce_mean(cross_entropy, name='loss6')
        tf.summary.scalar(scope.name+'/loss6', loss6)

    with tf.variable_scope('loss7') as scope:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits7, labels=labels[:,6], name='xentropy_per_example')
        #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
        loss7 = tf.reduce_mean(cross_entropy, name='loss7')
        tf.summary.scalar(scope.name+'/loss7', loss7)

    return loss1,loss2,loss3,loss4,loss5,loss6,loss7

# 最優化,自適應梯度下降,進行優化
def trainning( loss1,loss2,loss3,loss4,loss5,loss6,loss7, learning_rate):
    '''Training ops, the Op returned by this function is what must be passed to
        'sess.run()' call to cause the model to train.

    Args:
        loss: loss tensor, from losses()

    Returns:
        train_op: The op for trainning
    '''
    with tf.name_scope('optimizer1'):
        optimizer1 = tf.train.AdamOptimizer(learning_rate= learning_rate)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op1 = optimizer1.minimize(loss1, global_step= global_step)
    with tf.name_scope('optimizer2'):
        optimizer2 = tf.train.AdamOptimizer(learning_rate= learning_rate)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op2 = optimizer2.minimize(loss2, global_step= global_step)
    with tf.name_scope('optimizer3'):
        optimizer3 = tf.train.AdamOptimizer(learning_rate= learning_rate)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op3 = optimizer3.minimize(loss3, global_step= global_step)
    with tf.name_scope('optimizer4'):
        optimizer4 = tf.train.AdamOptimizer(learning_rate= learning_rate)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op4 = optimizer4.minimize(loss4, global_step= global_step)
    with tf.name_scope('optimizer5'):
        optimizer5 = tf.train.AdamOptimizer(learning_rate= learning_rate)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op5 = optimizer5.minimize(loss5, global_step= global_step)
    with tf.name_scope('optimizer6'):
        optimizer6 = tf.train.AdamOptimizer(learning_rate= learning_rate)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op6 = optimizer6.minimize(loss6, global_step= global_step)
    with tf.name_scope('optimizer7'):
        optimizer7 = tf.train.AdamOptimizer(learning_rate= learning_rate)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op7 = optimizer7.minimize(loss7, global_step= global_step)

    return train_op1,train_op2,train_op3,train_op4,train_op5,train_op6,train_op7

# 對模型評估
def evaluation(logits1,logits2,logits3,logits4,logits5,logits6,logits7,labels):
  """Evaluate the quality of the logits at predicting the label.
  Args:
    logits: Logits tensor, float - [batch_size, NUM_CLASSES].
    labels: Labels tensor, int32 - [batch_size], with values in the
      range [0, NUM_CLASSES).
  Returns:
    A scalar int32 tensor with the number of examples (out of batch_size)
    that were predicted correctly.
  """
  logits_all = tf.concat([logits1,logits2,logits3,logits4,logits5,logits6,logits7],0)
  labels = tf.convert_to_tensor(labels,tf.int32)
  labels_all = tf.reshape(tf.transpose(labels),[-1])
  with tf.variable_scope('accuracy') as scope:
      correct = tf.nn.in_top_k(logits_all, labels_all, 1)
      correct = tf.cast(correct, tf.float16)
      accuracy = tf.reduce_mean(correct)
      tf.summary.scalar(scope.name+'/accuracy', accuracy)
  return accuracy

訓練模型:train.py

import os
import numpy as np
import tensorflow as tf
from input_data import OCRIter
import model
import time
import datetime

# train訓練
img_w = 272
img_h = 72
num_label=7
batch_size = 8
# count =30000
count = 500000
learning_rate = 0.0001

#默認參數[N,H,W,C]
image_holder = tf.placeholder(tf.float32,[batch_size,img_h,img_w,3])
label_holder = tf.placeholder(tf.int32,[batch_size,7])
keep_prob = tf.placeholder(tf.float32)

logs_train_dir = r'H:\GPpython\第三階段數據分析\神經網絡\Licence_plate_recognize\Licence_plate_recognize\train_result'

def get_batch():
    data_batch = OCRIter(batch_size,img_h,img_w)
    image_batch,label_batch = data_batch.iter()

    image_batch1 = np.array(image_batch)
    label_batch1 = np.array(label_batch)
    return image_batch1,label_batch1

def fit():
    train_logits1, train_logits2, train_logits3, train_logits4, train_logits5, train_logits6, train_logits7 = model.inference(
        image_holder, keep_prob)

    train_loss1, train_loss2, train_loss3, train_loss4, train_loss5, train_loss6, train_loss7 = model.losses(
        train_logits1, train_logits2, train_logits3, train_logits4, train_logits5, train_logits6, train_logits7,
        label_holder)
    train_op1, train_op2, train_op3, train_op4, train_op5, train_op6, train_op7 = model.trainning(train_loss1,
                                                                                                  train_loss2,
                                                                                                  train_loss3,
                                                                                                  train_loss4,
                                                                                                  train_loss5,
                                                                                                  train_loss6,
                                                                                                  train_loss7,
                                                                                                  learning_rate)

    train_acc = model.evaluation(train_logits1, train_logits2, train_logits3, train_logits4, train_logits5,
                                 train_logits6, train_logits7, label_holder)

    input_image = tf.summary.image('input', image_holder)

    summary_op = tf.summary.merge(tf.get_collection(tf.GraphKeys.SUMMARIES))

    sess = tf.Session()
    train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
    saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer())
    start_time1 = time.time()
    for step in range(count):
        x_batch, y_batch = get_batch()
        start_time2 = time.time()
        time_str = datetime.datetime.now().isoformat()
        feed_dict = {image_holder: x_batch, label_holder: y_batch, keep_prob: 0.5}
        _, _, _, _, _, _, _, tra_loss1, tra_loss2, tra_loss3, tra_loss4, tra_loss5, tra_loss6, tra_loss7, acc, summary_str = sess.run(
            [train_op1, train_op2, train_op3, train_op4, train_op5, train_op6, train_op7, train_loss1, train_loss2,
             train_loss3, train_loss4, train_loss5, train_loss6, train_loss7, train_acc, summary_op], feed_dict)
        train_writer.add_summary(summary_str, step)
        duration = time.time() - start_time2
        tra_all_loss = tra_loss1 + tra_loss2 + tra_loss3 + tra_loss4 + tra_loss5 + tra_loss6 + tra_loss7

        # print(y_batch)  #僅測試代碼訓練實際樣本與標籤是否一致

        if step % 10 == 0:
            sec_per_batch = float(duration)
            print('%s : Step %d,train_loss = %.2f,acc= %.2f,sec/batch=%.3f' % (
                time_str, step, tra_all_loss, acc, sec_per_batch))

        if step % 10000 == 0 or (step + 1) == count:
            checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step=step)
    sess.close()
    print(time.time() - start_time1)
if __name__ == '__main__':
    # fit()
    data,labels = get_batch()
    print(data.shape)
    print(labels.shape)
    print(labels[0])

除了輸出層使用ReLU激活函數之外,所有層都採用深度神經網絡的標準結構。指示存在的節點使用sigmoid激活函數,典型地用於二值輸出。其他輸出節點使用softmax貫穿字符(結果是每一列的概率之和爲1),是模型化離散概率分佈的標準方法。

根據標籤和網絡輸出的交叉熵來定義損失函數。爲了數值穩定性,利用softmax_cross_entropy_with_logitssigmoid_cross_entropy_with_logits將最後一層的激活函數捲入交叉熵的計算。關於對交叉熵詳細而直觀的介紹可以參考Michael A. Nielsen的free online book中查看這一節

使用一塊nVidia GTX 960m花費大約數小時來訓練(train.py),通過CPU的一個後臺進程來運行訓練數據的生成。

輸出處理

 

 

import tensorflow as tf
import numpy as np
import os
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import model
import genplate
index = {"京": 0, "滬": 1, "津": 2, "渝": 3, "冀": 4, "晉": 5, "蒙": 6, "遼": 7, "吉": 8, "黑": 9, "蘇": 10, "浙": 11, "皖": 12,
         "閩": 13, "贛": 14, "魯": 15, "豫": 16, "鄂": 17, "湘": 18, "粵": 19, "桂": 20, "瓊": 21, "川": 22, "貴": 23, "雲": 24,
         "藏": 25, "陝": 26, "甘": 27, "青": 28, "寧": 29, "新": 30, "0": 31, "1": 32, "2": 33, "3": 34, "4": 35, "5": 36,
         "6": 37, "7": 38, "8": 39, "9": 40, "A": 41, "B": 42, "C": 43, "D": 44, "E": 45, "F": 46, "G": 47, "H": 48,
         "J": 49, "K": 50, "L": 51, "M": 52, "N": 53, "P": 54, "Q": 55, "R": 56, "S": 57, "T": 58, "U": 59, "V": 60,
         "W": 61, "X": 62, "Y": 63, "Z": 64};

chars = ["京", "滬", "津", "渝", "冀", "晉", "蒙", "遼", "吉", "黑", "蘇", "浙", "皖", "閩", "贛", "魯", "豫", "鄂", "湘", "粵", "桂",
             "瓊", "川", "貴", "雲", "藏", "陝", "甘", "青", "寧", "新", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A",
             "B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "U", "V", "W", "X",
             "Y", "Z"
             ];
'''
Test one image against the saved models and parameters
'''
global pic
def get_one_image(test):
    '''
    Randomly pick one image from training data
    Return: ndarry
    '''
    G = genplate.GenPlate("./font/platech.ttf", './font/platechar.ttf', "./NoPlates")

    G.genBatch(15, 2, range(31, 65), "./plate", (272, 72))  # 註釋原因爲每次其他模塊運行,若導入該庫,都會刷性該函數
    n = len(test)
    ind =np.random.randint(0,n)
    img_dir = test[ind]

    image_show = Image.open(img_dir)
    plt.imshow(image_show)
    #image = image.resize([120,30])
    image = cv2.imread(img_dir)
    global  pic
    pic = image
    # cv2.imshow('image', image)
    # cv2.waitKey(0)
    img = np.multiply(image,1/255.0)
    #image = np.array(img)
    #image = img.transpose(1,0,2)
    image = np.array([img])
    print(image.shape)

    return image

batch_size = 1
x = tf.placeholder(tf.float32,[batch_size,72,272,3])
keep_prob =tf.placeholder(tf.float32)

test_dir = r'H:/GPpython/Licence_plate_recognize/plate/'
test_image = []
for file in os.listdir(test_dir):
    test_image.append(test_dir + file)
test_image = list(test_image)

image_array = get_one_image(test_image)

#logit = model.inference(x,keep_prob)
logit1,logit2,logit3,logit4,logit5,logit6,logit7 = model.inference(x,keep_prob)

#logit1 = tf.nn.softmax(logit1)
#logit2 = tf.nn.softmax(logit2)
#logit3 = tf.nn.softmax(logit3)
#logit4 = tf.nn.softmax(logit4)
#logit5 = tf.nn.softmax(logit5)
#logit6 = tf.nn.softmax(logit6)
#logit7 = tf.nn.softmax(logit7)

 
logs_train_dir = r'H:/GPpython/Licence_plate_recognize/train_result/'
saver = tf.train.Saver()

with tf.Session() as sess:
    print ("Reading checkpoint...")
    ckpt = tf.train.get_checkpoint_state(logs_train_dir)
    if ckpt and ckpt.model_checkpoint_path:
        global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
        saver.restore(sess, ckpt.model_checkpoint_path)
        print('Loading success, global_step is %s' % global_step)
    else:
        print('No checkpoint file found')

    pre1,pre2,pre3,pre4,pre5,pre6,pre7 = sess.run([logit1,logit2,logit3,logit4,logit5,logit6,logit7], feed_dict={x: image_array,keep_prob:1.0})
    prediction = np.reshape(np.array([pre1,pre2,pre3,pre4,pre5,pre6,pre7]),[-1,65])
    #prediction = np.array([[pre1],[pre2],[pre3],[pre4],[pre5],[pre6],[pre7]])
    #print(prediction)

    max_index = np.argmax(prediction,axis=1)
    print(max_index)
    line = ''
    for i in range(prediction.shape[0]):
        if i == 0:
            result = np.argmax(prediction[i][0:31])
        if i == 1:
            result = np.argmax(prediction[i][41:65])+41
        if i > 1:
            result = np.argmax(prediction[i][31:65])+31

        line += chars[result]+" "
    print ('predicted: ' + line)

cv2.imshow('pic',pic)
cv2.waitKeyEx(0)

總結

我已經開源了一個擁有相對較短代碼系統,它不用導入任何特定領域的庫以及不需要太多特定領域的知識,就能夠實現車牌號自動識別。此外,我還通過在線合成圖片的方法解決了上千張訓練圖片的需求問題(通常是在深度神經網絡的情況下)。

另一方面,我的系統也存在一些缺點

  1. 只適用於特定車牌號。尤其是,網絡結構明確假定了輸出只有7個字符。
  2. 只適用於特定字體。
  3. 速度太慢。該系統運行一張適當尺寸的圖片要花費幾秒鐘。

爲了解決第1個問題,谷歌團隊將他們的網絡結構的高層拆分成了多個子網絡,每一個子網絡用於假定輸出號碼中的不同號碼位。還有一個並行的子網絡來決定存在多少號碼。我覺得這種方法可以應用到這兒,但是我沒有在這個項目中實現。

關於第2點我在上面舉過例子,由於字體的稍微不同只能使用於國內車牌。如果嘗試着檢測US車牌號的話,誤檢將會更加嚴重,因爲US車牌號字體類型更多。一個可能的解決方案就是使得訓練數據有更多不同的字體類型可選擇,儘管還不清楚需要多少字體類型才能成功。

第3點提到的速度慢的問題是扼殺許多應用的cancer:在一個相當強大的GPU上處理一張適當尺寸的輸入圖片就要花費幾秒鐘。我認爲不引進一種級聯式結構的檢測網絡就想避開這個問題是不太可能的,比如Haar級聯,HOG檢測器,或者一個更簡單的神經網絡。

我很有興趣去嘗試和其他機器學習方法的比較會怎樣,特別是姿態迴歸看起來有希望,最後可能會附加一個最基本的分類階段。如果使用了像scikit-learn這樣的機器學習庫,那麼應該同樣簡單。

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章