端到端車牌/驗證碼識別(tensorflow版)——(1)

端到端車牌識別(1)


一 、簡介

本項目主要通過tensorflow實現端到端車牌識別,無需車牌字符分割。目前,僅關注識別問題,未考慮車輛檢測、車牌定位等問題,本項目嘗試了採用兩種方法:(1)CNN ; (2)LSTM ;

二、CNN方法實現

1. 數據集

數據集採用了一個開源的車牌生成器,該項目作者採用Mxnet實現車牌識別。生成的車牌基本都有傾斜、加噪處理。

這裏寫圖片描述這裏寫圖片描述這裏寫圖片描述這裏寫圖片描述


代碼 genplate.py 如下:

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep  4 09:19:43 2017

@author: llc
"""

#coding=utf-8
import PIL
from PIL import ImageFont
from PIL import Image
from PIL import ImageDraw
import cv2;
import numpy as np;
import os;
from math import *


# font = ImageFont.truetype("Arial-Bold.ttf",14)

index = {"京": 0, "滬": 1, "津": 2, "渝": 3, "冀": 4, "晉": 5, "蒙": 6, "遼": 7, "吉": 8, "黑": 9, "蘇": 10, "浙": 11, "皖": 12,
         "閩": 13, "贛": 14, "魯": 15, "豫": 16, "鄂": 17, "湘": 18, "粵": 19, "桂": 20, "瓊": 21, "川": 22, "貴": 23, "雲": 24,
         "藏": 25, "陝": 26, "甘": 27, "青": 28, "寧": 29, "新": 30, "0": 31, "1": 32, "2": 33, "3": 34, "4": 35, "5": 36,
         "6": 37, "7": 38, "8": 39, "9": 40, "A": 41, "B": 42, "C": 43, "D": 44, "E": 45, "F": 46, "G": 47, "H": 48,
         "J": 49, "K": 50, "L": 51, "M": 52, "N": 53, "P": 54, "Q": 55, "R": 56, "S": 57, "T": 58, "U": 59, "V": 60,
         "W": 61, "X": 62, "Y": 63, "Z": 64};

chars = ["京", "滬", "津", "渝", "冀", "晉", "蒙", "遼", "吉", "黑", "蘇", "浙", "皖", "閩", "贛", "魯", "豫", "鄂", "湘", "粵", "桂",
             "瓊", "川", "貴", "雲", "藏", "陝", "甘", "青", "寧", "新", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A",
             "B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "U", "V", "W", "X",
             "Y", "Z"
             ];

def AddSmudginess(img, Smu):
    rows = r(Smu.shape[0] - 50)

    cols = r(Smu.shape[1] - 50)
    adder = Smu[rows:rows + 50, cols:cols + 50];
    adder = cv2.resize(adder, (50, 50));
    #   adder = cv2.bitwise_not(adder)
    img = cv2.resize(img,(50,50))
    img = cv2.bitwise_not(img)
    img = cv2.bitwise_and(adder, img)
    img = cv2.bitwise_not(img)
    return img

def rot(img,angel,shape,max_angel):
    """ 使圖像輕微的畸變

        img 輸入圖像
        factor 畸變的參數
        size 爲圖片的目標尺寸

    """
    size_o = [shape[1],shape[0]]

    size = (shape[1]+ int(shape[0]*cos((float(max_angel )/180) * 3.14)),shape[0])


    interval = abs( int( sin((float(angel) /180) * 3.14)* shape[0]));

    pts1 = np.float32([[0,0],[0,size_o[1]],[size_o[0],0],[size_o[0],size_o[1]]])
    if(angel>0):

        pts2 = np.float32([[interval,0],[0,size[1]  ],[size[0],0  ],[size[0]-interval,size_o[1]]])
    else:
        pts2 = np.float32([[0,0],[interval,size[1]  ],[size[0]-interval,0  ],[size[0],size_o[1]]])

    M  = cv2.getPerspectiveTransform(pts1,pts2);
    dst = cv2.warpPerspective(img,M,size);

    return dst;

def rotRandrom(img, factor, size):
    shape = size;
    pts1 = np.float32([[0, 0], [0, shape[0]], [shape[1], 0], [shape[1], shape[0]]])
    pts2 = np.float32([[r(factor), r(factor)], [ r(factor), shape[0] - r(factor)], [shape[1] - r(factor),  r(factor)],
                       [shape[1] - r(factor), shape[0] - r(factor)]])
    M = cv2.getPerspectiveTransform(pts1, pts2);
    dst = cv2.warpPerspective(img, M, size);
    return dst;



def tfactor(img):
    hsv = cv2.cvtColor(img,cv2.COLOR_BGR2HSV);

    hsv[:,:,0] = hsv[:,:,0]*(0.8+ np.random.random()*0.2);
    hsv[:,:,1] = hsv[:,:,1]*(0.3+ np.random.random()*0.7);
    hsv[:,:,2] = hsv[:,:,2]*(0.2+ np.random.random()*0.8);

    img = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR);
    return img

def random_envirment(img,data_set):
    index=r(len(data_set))
    env = cv2.imread(data_set[index])

    env = cv2.resize(env,(img.shape[1],img.shape[0]))

    bak = (img==0);
    bak = bak.astype(np.uint8)*255;
    inv = cv2.bitwise_and(bak,env)
    img = cv2.bitwise_or(inv,img)
    return img

def GenCh(f,val):
    img=Image.new("RGB", (45,70),(255,255,255))
    draw = ImageDraw.Draw(img)
    draw.text((0, 3),val,(0,0,0),font=f)
    img =  img.resize((23,70))
    A = np.array(img)

    return A
def GenCh1(f,val):
    img=Image.new("RGB", (23,70),(255,255,255))
    draw = ImageDraw.Draw(img)
    #draw.text((0, 2),val.decode('utf-8'),(0,0,0),font=f)
    draw.text((0, 2),val,(0,0,0),font=f)
    A = np.array(img)
    return A
def AddGauss(img, level):
    return cv2.blur(img, (level * 2 + 1, level * 2 + 1));


def r(val):
    return int(np.random.random() * val)

def AddNoiseSingleChannel(single):
    diff = 255-single.max();
    noise = np.random.normal(0,1+r(6),single.shape);
    noise = (noise - noise.min())/(noise.max()-noise.min())
    noise= diff*noise;
    noise= noise.astype(np.uint8)
    dst = single + noise
    return dst

def addNoise(img,sdev = 0.5,avg=10):
    img[:,:,0] =  AddNoiseSingleChannel(img[:,:,0]);
    img[:,:,1] =  AddNoiseSingleChannel(img[:,:,1]);
    img[:,:,2] =  AddNoiseSingleChannel(img[:,:,2]);
    return img;


class GenPlate:


    def __init__(self,fontCh,fontEng,NoPlates):
        self.fontC =  ImageFont.truetype(fontCh,43,0);
        self.fontE =  ImageFont.truetype(fontEng,60,0);
        self.img=np.array(Image.new("RGB", (226,70),(255,255,255)))
        self.bg  = cv2.resize(cv2.imread("./images/template.bmp"),(226,70));
        self.smu = cv2.imread("./images/smu2.jpg");
        self.noplates_path = [];
        for parent,parent_folder,filenames in os.walk(NoPlates):
            for filename in filenames:
                path = parent+"/"+filename;
                self.noplates_path.append(path);


    def draw(self,val):
        offset= 2 ;

        self.img[0:70,offset+8:offset+8+23]= GenCh(self.fontC,val[0]);
        self.img[0:70,offset+8+23+6:offset+8+23+6+23]= GenCh1(self.fontE,val[1]);
        for i in range(5):
            base = offset+8+23+6+23+17 +i*23 + i*6 ;
            self.img[0:70, base  : base+23]= GenCh1(self.fontE,val[i+2]);
        return self.img
    def generate(self,text):
        if len(text) == 7:
            fg = self.draw(text.encode('utf-8').decode(encoding="utf-8"));
            fg = cv2.bitwise_not(fg);
            com = cv2.bitwise_or(fg,self.bg);
            com = rot(com,r(60)-30,com.shape,30);
            com = rotRandrom(com,10,(com.shape[1],com.shape[0]));
            #com = AddSmudginess(com,self.smu)

            com = tfactor(com)
            com = random_envirment(com,self.noplates_path);
            com = AddGauss(com, 1+r(4));
            com = addNoise(com);


            return com
    def genPlateString(self,pos,val):
        plateStr = "";
        box = [0,0,0,0,0,0,0];
        if(pos!=-1):
            box[pos]=1;
        for unit,cpos in zip(box,range(len(box))):
            if unit == 1:
                plateStr += val
            else:
                if cpos == 0:
                    plateStr += chars[r(31)]
                elif cpos == 1:
                    plateStr += chars[41+r(24)]
                else:
                    plateStr += chars[31 + r(34)]

        return plateStr;

    def genBatch(self, batchSize,pos,charRange, outputPath,size):
        if (not os.path.exists(outputPath)):
            os.mkdir(outputPath)
        for i in range(batchSize):
                plateStr = G.genPlateString(-1,-1)
                img =  G.generate(plateStr);
                img = cv2.resize(img,size);
                cv2.imwrite(outputPath + "/" + str(i).zfill(2) + ".jpg", img);

       # return img

G = GenPlate("./font/platech.ttf",'./font/platechar.ttf',"./NoPlates")

#G.genBatch(15,2,range(31,65),"./plate",(272,72)) #註釋原因爲每次其他模塊運行,若導入該庫,都會刷性該函數


# cv2.imshow("a",com)
# cv2.waitKey(0);

2. 數據讀入

網絡訓練讀入的數據並不是利用上述1中生成的車牌圖片保存到本地,然後再讀取,加載進網絡。而是,直接邊生成車牌數據邊讀入數據訓練。


數據讀入代碼input_data.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Sep  5 15:44:33 2017

@author: llc
"""
#%%
import numpy as np
import cv2
from genplate import *

#產生用於訓練的數據
class OCRIter():
    def __init__(self,batch_size,height,width):
        super(OCRIter, self).__init__()
        self.genplate = GenPlate("./font/platech.ttf",'./font/platechar.ttf','./NoPlates')
        self.batch_size = batch_size
        self.height = height
        self.width = width
        #print("make plate data")

#    def iter(self):
#        for k in range((int)(self.count / self.batch_size)):
#            data = []
#            label = []
#            for i in range(self.batch_size):
#                num, img = gen_sample(self.genplate, self.width, self.height)
#                data.append(img)
#                label.append(num)
#            data_all = data
#            label_all = label
#        return data_all,label_all   

    def iter(self):
        data = []
        label = []
        for i in range(self.batch_size):
            num, img = gen_sample(self.genplate, self.width, self.height)
            data.append(img)
            label.append(num)
        data_all = data
        label_all = label
        return data_all,label_all   


def rand_range(lo,hi):
    return lo+r(hi-lo);


def gen_rand():
    name = ""
    label=[]
    label.append(rand_range(0,31))  #產生車牌開頭32個省的標籤
    label.append(rand_range(41,65)) #產生車牌第二個字母的標籤
    for i in range(5):
        label.append(rand_range(31,65)) #產生車牌後續5個字母的標籤

    name+=chars[label[0]]
    name+=chars[label[1]]
    for i in range(5):
        name+=chars[label[i+2]]
    return name,label

def gen_sample(genplate, width, height):
    num,label =gen_rand()
    img = genplate.generate(num)
    img = cv2.resize(img,(width,height))
    img = np.multiply(img,1/255.0) #[height,width,channel]
    #img = img.transpose(2,0,1)
    #img = img.transpose(1,0,2)
    return label,img        #返回的label爲標籤,img爲深度爲3的圖像像素

3. 網絡模型(tensorflow)

先附上代碼model.py:

"""
Created on Tue Sep  5 14:04:19 2017

@author: llc
"""

import tensorflow as tf
import numpy as np

def inference (images,keep_prob):
    '''
    Build the model
    Args:
        image: image batch,4D tensor,tf.float32,[batch_size,height,width,channels]
    Returns:
        output tensor with the computed logits,float,[batch_size,65]
    '''

    with tf.variable_scope('conv1') as scope:
        weights = tf.get_variable('weights',
                                  shape = [3,3,3,32],
                                  dtype = tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
        conv = tf.nn.conv2d(images,weights,strides=[1,1,1,1],padding='VALID') 
        biases = tf.get_variable('biases',
                                 shape=[32],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        pre_activation = tf.nn.bias_add(conv,biases)
        conv1 = tf.nn.relu(pre_activation,name= scope.name)

    # conv2
    with tf.variable_scope('conv2') as scope:
        weights = tf.get_variable('weights',shape=[3,3,32,32],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
        conv = tf.nn.conv2d(conv1,weights,strides=[1,1,1,1],padding='VALID')
        biases = tf.get_variable('biases',
                                 shape=[32],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        pre_activation = tf.nn.bias_add(conv,biases)
        conv2 = tf.nn.relu(pre_activation,name= scope.name)

    with tf.variable_scope('max_pooling1') as scope:
        pool1 = tf.nn.max_pool(conv2,ksize = [1,2,2,1],strides= [1,2,2,1],padding='VALID',name='pooling1')

    #%%
    #conv3
    with tf.variable_scope('conv3') as scope:
        weights = tf.get_variable('weights',shape=[3,3,32,64],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
        conv = tf.nn.conv2d(pool1,weights,strides=[1,1,1,1],padding='VALID')
        biases = tf.get_variable('biases',shape=[64],dtype = tf.float32,initializer= tf.constant_initializer(0.1))
        pre_activation = tf.nn.bias_add(conv,biases)
        conv3 = tf.nn.relu(pre_activation,name=scope.name)

    #conv4
    with tf.variable_scope('conv4') as scope:
        weights = tf.get_variable('weights',shape=[3,3,64,64],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
        conv =tf.nn.conv2d(conv3,weights,strides=[1,1,1,1],padding='VALID')
        biases = tf.get_variable('biases',shape=[64],dtype=tf.float32,initializer=tf.constant_initializer(0.1))
        pre_activation = tf.nn.bias_add(conv,biases)
        conv4 = tf.nn.relu(pre_activation,name=scope.name)

    with tf.variable_scope('max_pooling2') as scope:
        pool2 = tf.nn.max_pool(conv4,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID',name='pooling2')   

    #conv5
    with tf.variable_scope('conv5') as scope:
        weights = tf.get_variable('weights',shape=[3,3,64,128],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
        conv =tf.nn.conv2d(pool2,weights,strides=[1,1,1,1],padding='VALID')
        biases = tf.get_variable('biases',shape=[128],dtype=tf.float32,initializer=tf.constant_initializer(0.1))
        pre_activation = tf.nn.bias_add(conv,biases)
        conv5 = tf.nn.relu(pre_activation,name=scope.name)

    #conv6
    with tf.variable_scope('conv6') as scope:
        weights = tf.get_variable('weights',shape=[3,3,128,128],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
        conv =tf.nn.conv2d(conv5,weights,strides=[1,1,1,1],padding='VALID')
        biases = tf.get_variable('biases',shape=[128],dtype=tf.float32,initializer=tf.constant_initializer(0.1))
        pre_activation = tf.nn.bias_add(conv,biases)
        conv6 = tf.nn.relu(pre_activation,name=scope.name)

    #pool3
    with tf.variable_scope('max_pool3') as scope:
         pool3 = tf.nn.max_pool(conv6,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID',name='pool3')
    #%%
    #fc1_flatten
    with tf.variable_scope('fc1') as scope:
        shp = pool3.get_shape()
        flattened_shape =shp[1].value*shp[2].value*shp[3].value
        reshape = tf.reshape(pool3,[-1,flattened_shape])
#        dim = reshape.get_shape()[-1].value
#        weights = tf.get_variable('weights',shape=[dim,256],dtype = tf.float32,
#                                  initializer = tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
#        biases = tf.get_variable('biases',
#                                 shape=[256],
#                                 dtype=tf.float32,
#                                 initializer = tf.truncated_normal_initializer(0.1)
#                                 ) 
#        fc1 = tf.matmul(reshape,weights)+biases
#        fc1 = tf.nn.relu(fc1,name=scope.name)
        fc1 = tf.nn.dropout(reshape,keep_prob,name='fc1_dropdot')

    with tf.variable_scope('fc21') as scope:              
        weights = tf.get_variable('weights',
                                  shape=[flattened_shape,65],
                                  dtype=tf.float32, 
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))

        biases = tf.get_variable('biases',
                                 shape=[65],
                                 dtype=tf.float32,
                                 initializer = tf.truncated_normal_initializer(0.1)
                                 )
        fc21 = tf.matmul(fc1,weights)+biases
    with tf.variable_scope('fc22') as scope:                     
        weights = tf.get_variable('weights',
                                  shape=[flattened_shape,65],
                                  dtype=tf.float32, 
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))

        biases = tf.get_variable('biases',
                                 shape=[65],
                                 dtype=tf.float32,
                                 initializer = tf.truncated_normal_initializer(0.1)
                                 )
        fc22 = tf.matmul(fc1,weights)+biases
    with tf.variable_scope('fc23') as scope:                      
        weights = tf.get_variable('weights',
                                  shape=[flattened_shape,65],
                                  dtype=tf.float32, 
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))

        biases = tf.get_variable('biases',
                                 shape=[65],
                                 dtype=tf.float32,
                                 initializer = tf.truncated_normal_initializer(0.1)
                                 )
        fc23= tf.matmul(fc1,weights)+biases
    with tf.variable_scope('fc24') as scope:                      
        weights = tf.get_variable('weights',
                                  shape=[flattened_shape,65],
                                  dtype=tf.float32, 
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))

        biases = tf.get_variable('biases',
                                 shape=[65],
                                 dtype=tf.float32,
                                 initializer = tf.truncated_normal_initializer(0.1)
                                 )
        fc24 = tf.matmul(fc1,weights)+biases
    with tf.variable_scope('fc25') as scope:   
        weights = tf.get_variable('weights',
                                  shape=[flattened_shape,65],
                                  dtype=tf.float32, 
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))

        biases = tf.get_variable('biases',
                                 shape=[65],
                                 dtype=tf.float32,
                                 initializer = tf.truncated_normal_initializer(0.1)
                                 )
        fc25 = tf.matmul(fc1,weights)+biases
    with tf.variable_scope('fc26') as scope:                       
        weights = tf.get_variable('weights',
                                  shape=[flattened_shape,65],
                                  dtype=tf.float32, 
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))

        biases = tf.get_variable('biases',
                                 shape=[65],
                                 dtype=tf.float32,
                                 initializer = tf.truncated_normal_initializer(0.1)
                                 )
        fc26 = tf.matmul(fc1,weights)+biases
    with tf.variable_scope('fc27') as scope:                   
        weights = tf.get_variable('weights',
                                  shape=[flattened_shape,65],
                                  dtype=tf.float32, 
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))

        biases = tf.get_variable('biases',
                                 shape=[65],
                                 dtype=tf.float32,
                                 initializer = tf.truncated_normal_initializer(0.1)
                                 )
        fc27 = tf.matmul(fc1,weights)+biases
#    with tf.variable_scope('fc2') as scope:       
#        fc2 = tf.concat([fc21,fc22,fc23,fc24,fc25,fc26,fc27],0)  #shape = [7xbatch_size,65]
#        
#    return fc2


    return fc21,fc22,fc23,fc24,fc25,fc26,fc27    #shape = [7,batch_size,65]

def losses(logits1,logits2,logits3,logits4,logits5,logits6,logits7,labels):
    '''Compute loss from logits and labels
    Args:
        logits: logits tensor, float, [7*batch_size, 65]
        labels: label tensor, tf.int32, [7*batch_size]

    Returns:
        loss tensor of float type
    '''
    labels = tf.convert_to_tensor(labels,tf.int32)

    with tf.variable_scope('loss1') as scope:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits1, labels=labels[:,0], name='xentropy_per_example')
        #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
        loss1 = tf.reduce_mean(cross_entropy, name='loss1')
        tf.summary.scalar(scope.name+'/loss1', loss1)

    with tf.variable_scope('loss2') as scope:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits2, labels=labels[:,1], name='xentropy_per_example')
        #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
        loss2 = tf.reduce_mean(cross_entropy, name='loss2')
        tf.summary.scalar(scope.name+'/loss2', loss2)   

    with tf.variable_scope('loss3') as scope:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits3, labels=labels[:,2], name='xentropy_per_example')
        #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
        loss3 = tf.reduce_mean(cross_entropy, name='loss3')
        tf.summary.scalar(scope.name+'/loss3', loss3)

    with tf.variable_scope('loss4') as scope:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits4, labels=labels[:,3], name='xentropy_per_example')
        #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
        loss4 = tf.reduce_mean(cross_entropy, name='loss4')
        tf.summary.scalar(scope.name+'/loss4', loss4)

    with tf.variable_scope('loss5') as scope:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits5, labels=labels[:,4], name='xentropy_per_example')
        #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
        loss5 = tf.reduce_mean(cross_entropy, name='loss5')
        tf.summary.scalar(scope.name+'/loss5', loss5)

    with tf.variable_scope('loss6') as scope:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits6, labels=labels[:,5], name='xentropy_per_example')
        #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
        loss6 = tf.reduce_mean(cross_entropy, name='loss6')
        tf.summary.scalar(scope.name+'/loss6', loss6)

    with tf.variable_scope('loss7') as scope:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits7, labels=labels[:,6], name='xentropy_per_example')
        #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
        loss7 = tf.reduce_mean(cross_entropy, name='loss7')
        tf.summary.scalar(scope.name+'/loss7', loss7)

    return loss1,loss2,loss3,loss4,loss5,loss6,loss7

def trainning( loss1,loss2,loss3,loss4,loss5,loss6,loss7, learning_rate):
    '''Training ops, the Op returned by this function is what must be passed to 
        'sess.run()' call to cause the model to train.

    Args:
        loss: loss tensor, from losses()

    Returns:
        train_op: The op for trainning
    '''
    with tf.name_scope('optimizer1'):
        optimizer1 = tf.train.AdamOptimizer(learning_rate= learning_rate)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op1 = optimizer1.minimize(loss1, global_step= global_step)
    with tf.name_scope('optimizer2'):
        optimizer2 = tf.train.AdamOptimizer(learning_rate= learning_rate)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op2 = optimizer2.minimize(loss2, global_step= global_step) 
    with tf.name_scope('optimizer3'):
        optimizer3 = tf.train.AdamOptimizer(learning_rate= learning_rate)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op3 = optimizer3.minimize(loss3, global_step= global_step) 
    with tf.name_scope('optimizer4'):
        optimizer4 = tf.train.AdamOptimizer(learning_rate= learning_rate)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op4 = optimizer4.minimize(loss4, global_step= global_step) 
    with tf.name_scope('optimizer5'):
        optimizer5 = tf.train.AdamOptimizer(learning_rate= learning_rate)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op5 = optimizer5.minimize(loss5, global_step= global_step) 
    with tf.name_scope('optimizer6'):
        optimizer6 = tf.train.AdamOptimizer(learning_rate= learning_rate)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op6 = optimizer6.minimize(loss6, global_step= global_step) 
    with tf.name_scope('optimizer7'):
        optimizer7 = tf.train.AdamOptimizer(learning_rate= learning_rate)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op7 = optimizer7.minimize(loss7, global_step= global_step) 

    return train_op1,train_op2,train_op3,train_op4,train_op5,train_op6,train_op7

def evaluation(logits1,logits2,logits3,logits4,logits5,logits6,logits7,labels):
  """Evaluate the quality of the logits at predicting the label.
  Args:
    logits: Logits tensor, float - [batch_size, NUM_CLASSES].
    labels: Labels tensor, int32 - [batch_size], with values in the
      range [0, NUM_CLASSES).
  Returns:
    A scalar int32 tensor with the number of examples (out of batch_size)
    that were predicted correctly.
  """
  logits_all = tf.concat([logits1,logits2,logits3,logits4,logits5,logits6,logits7],0) 
  labels = tf.convert_to_tensor(labels,tf.int32)
  labels_all = tf.reshape(tf.transpose(labels),[-1])
  with tf.variable_scope('accuracy') as scope:
      correct = tf.nn.in_top_k(logits_all, labels_all, 1)
      correct = tf.cast(correct, tf.float16)
      accuracy = tf.reduce_mean(correct)
      tf.summary.scalar(scope.name+'/accuracy', accuracy)   
  return accuracy
#

代碼可以簡化,用keras寫更簡單,網上有keras版的教程keras版車牌識別。由於車牌爲7個字符,每個字符對應65(數字/英文字母/省份)個標籤,當然,前面兩個字符僅分別爲:省份、字母。因此,神經網絡的輸出要求有7個單元(全連接層),每個單元對應65個字符標籤(驗證碼等識別可根據需求設計)。

網絡設計中的幾個關鍵點:

 (1)全連接層Fc1:該層將卷積層及池化層提取出的特徵全部flatten爲1維張量,便於後續7個輸出單元。
 (2)Dropout:對Fc1層訓練階段進行dropout,防止過擬合,測試時無需dropout。
 (3)7個輸出單元:fc21,fc22,fc23,fc24,fc25,fc26,fc27。
 (4)Loss:剛開始設計時,本人將7個輸出單元tf.concat在一起,即合成爲一個logits,計算其loss。發現loss始終在24左右,無法下降。後來,將7個輸出單元分別計算loss,每個loss都基本正常。

這裏寫圖片描述

即使loss7也還行:
這裏寫圖片描述
(以上結果並不是最優的訓練版本。)

 (5)訓練精確度:直接將7個輸出單元concat,利用tf.nn.in_top_k計算。

這裏寫圖片描述


(未完,見下一篇)
端到端車牌識別(2)

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章