TensorFlow使用深度學習破解字符驗證碼

本篇環境Ubuntu 16.04.3 LTS，Python 3.5.2，TensorFlow1.4.1

1、驗證碼製作

首先，爲了得到“驗證碼圖片”以及對應的“正確結果”，即訓練集。在這裏使用php生成而非人力打碼。

Ubuntu上安裝php
apt-get install libapache2-mod-php sudo apt-get install php sudo apt-get install apache2
php生成驗證碼（PS：借用了網絡管理課程的驗證碼生成代碼）
TestCodeLen爲2，即驗證碼長度爲2

<?php

$TestCodeChars="1234567890";
$TestCodeLen=2;
$TestCode="";
$Num=1000000;
for($n=0;$n<$Num;$n++){
    $date = explode(' ', microtime());
    $seed = $date[0];
    srand($seed*1000);
    //根據驗證碼字符集隨機生成驗證碼字符串
    for($i=0;$i<$TestCodeLen;$i++){
        $TestCode .=$TestCodeChars[rand(0,strlen($TestCodeChars)-1)];
    }
    print($TestCode."\n");
    //定義驗證碼輸出圖像的參數
    $font="C:\Windows\Fonts\TIMESBD.ttf";
    $FontSize=14;
    $angle=10;
    $AddSize=6;
    $x_size=$TestCodeLen*$FontSize+$AddSize;
    $y_size=$FontSize+$AddSize;

    $im=@imagecreatetruecolor($x_size,$y_size);

    $white=imagecolorallocate($im,255,255,255);
    $red=imagecolorallocate($im,255,0,0);

    //繪製驗證碼圖像
    imagefilledrectangle($im,0,0,$x_size-1,$y_size-1,$white);

    for($i=0;$i<$TestCodeLen;$i++){
        imagettftext($im,$FontSize,$angle,$FontSize*$i+$AddSize,$FontSize+$AddSize/2,$red,$font,$TestCode[$i]);
    }

    for($j=0;$j<50;$j++){
        imagesetpixel($im,rand(0,$x_size),rand(0,$y_size),$red);
    }
    imagepng($im,"testdata/".$TestCode."-".$n.".jpg");
    $TestCode = "";
}
?>

2、TensorFlow卷積神經網絡構建

因爲是第一次搞這個，所以random_mini_batches()以及整體函數用了以前coursera上的作業。
先是讀入文件，然後轉爲灰度，再經過2層卷積層和最後的全連接層獲得輸出。
採用端到端的理念，輸入直接是驗證碼圖片（不經過分割），爲了簡便，輸入是20*34=680個像素點，對應680個神經元，輸出就是“0-9十個數字”乘以驗證碼長度2，共計20個神經元。
epoch是指把數據集輪了多少次，如果當前次訓練集的acc>0.5，則把當前權重對應的網絡保存下來。
PS：事實上，就輪了100次，訓練集和測試集的正確率就到70%以上了，甚至有時候達到90%

import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import math

#嘗試讀取其中一副圖片，查看圖片像素大小及其他相關信息
img=np.array(Image.open('/home/lygwangyp/TensorflowCaptcha/traindata/28-1708.jpg'))#打開圖像並轉化爲數字矩陣
plt.imshow(img)
plt.show()

print(img.shape)
print(img.dtype)
print(img.size)
print(type(img))

(20, 34, 3)
uint8
2040
<class 'numpy.ndarray'>

#將圖片轉爲灰度圖
def convert2gray(img):
    gray = np.empty((img.shape[0],img.shape[1],1),dtype='float32')
    gray[:,:,0] = np.mean(img, -1)
    return gray

greyimg = convert2gray(img)
plt.imshow(greyimg[:,:,0],cmap = 'gray')

<matplotlib.image.AxesImage at 0x7f9ef3d80390>

img.shape

(20, 34, 3)

greyimg.shape

(20, 34, 1)

def load_data(filepath):
    #os.listdir(filename)返回filename中所有文件的文件名列表
    imgs = os.listdir(filepath)
    #獲得圖片數量
    num = len(imgs)
    #Return a new array of given shape and type, without initializing entries.
    data = np.empty((num,20,34,1),dtype='float32')    
    label = np.empty((num,1),dtype='float32')
    for i in range(num):
        #PIL 的 open() 函數用於創建 PIL 圖像對象
        img = np.array(Image.open(filepath+imgs[i]))
        greyimg = convert2gray(img)
        data[i,:,:] = greyimg
        label[i] = int(imgs[i].split('-')[0])
    #把即將feed進tensor Y的數據由"21","26"這樣的數字形式，轉化成所需要的[0 1 0 0][0 0 0 1]向量形式 labels原始 -> label輸入
    label = label.tolist()
    labellist = []
    for y in label:
        labellist.append(text2vec(str(int(y[0]))))
    label = np.array(labellist)
    return data,label

def initialize_parameters():
    with tf.variable_scope(""): 
        W1 = tf.get_variable("W1", [4, 4, 1, 8], initializer = tf.contrib.layers.xavier_initializer(seed = 0))
        W2 = tf.get_variable("W2", [2, 2, 8, 16], initializer = tf.contrib.layers.xavier_initializer(seed = 0))

        parameters = {"W1": W1,
                      "W2": W2}

    return parameters

#將字符串轉化爲[0,1,0,1...]的向量
def text2vec(text):
    text_len = len(text)
    if text_len > MAX_CAPTCHA:
        raise ValueError('驗證碼最長4個字符')

    vector = np.zeros(MAX_CAPTCHA*CHAR_SET_LEN)
    def char2pos(c):
        if c =='_':
            k = 62
            return k
        k = ord(c)-48
        if k > 9:
            k = ord(c) - 55
            if k > 35:
                k = ord(c) - 61
                if k > 61:
                    raise ValueError('No Map') 
        return k
    for i, c in enumerate(text):
        idx = i * CHAR_SET_LEN + char2pos(c)
        vector[idx] = 1
    return vector

# 向量轉回文本
def vec2text(vec):
    char_pos = vec.nonzero()[0]
    text=[]
    for i, c in enumerate(char_pos):
        char_at_pos = i #c/63
        char_idx = c % CHAR_SET_LEN
        if char_idx < 10:
            char_code = char_idx + ord('0')
        elif char_idx <36:
            char_code = char_idx - 10 + ord('A')
        elif char_idx < 62:
            char_code = char_idx-  36 + ord('a')
        elif char_idx == 62:
            char_code = ord('_')
        else:
            raise ValueError('error')
        text.append(chr(char_code))
    return "".join(text)

def forward_propagation(X, parameters):
    W1 = parameters['W1']
    W2 = parameters['W2']

    ### START CODE HERE ###
    # CONV2D: stride of 1, padding 'SAME'
    Z1 = tf.nn.conv2d(X,W1, strides = [1,1,1,1], padding = 'SAME')
    # RELU
    A1 = tf.nn.relu(Z1)
    # MAXPOOL: window 8x8, sride 8, padding 'SAME'
    P1 = tf.nn.max_pool(A1, ksize = [1,8,8,1], strides = [1,8,8,1], padding = 'SAME')
    # CONV2D: filters W2, stride 1, padding 'SAME'
    Z2 = tf.nn.conv2d(P1,W2, strides = [1,1,1,1], padding = 'SAME')
    # RELU
    A2 = tf.nn.relu(Z2)
    # MAXPOOL: window 4x4, stride 4, padding 'SAME'
    P2 = tf.nn.max_pool(A2, ksize = [1,4,4,1], strides = [1,4,4,1], padding = 'SAME')
    # FLATTEN
    P2 = tf.contrib.layers.flatten(P2)
    # FULLY-CONNECTED without non-linear activation function (not not call softmax).
    # 6 neurons in output layer. Hint: one of the arguments should be "activation_fn=None" 
    Z3 = tf.contrib.layers.fully_connected(P2, num_outputs = MAX_CAPTCHA*CHAR_SET_LEN, activation_fn=None)

    return Z3

def compute_cost(Z3, Y):
    cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = Z3, labels = Y))
    return cost

def random_mini_batches(X, Y, mini_batch_size = 64, seed = 0):
    """
    Creates a list of random minibatches from (X, Y)

    Arguments:
    X -- input data, of shape (input size, number of examples) (m, Hi, Wi, Ci)
    Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples) (m, n_y)
    mini_batch_size - size of the mini-batches, integer
    seed -- this is only for the purpose of grading, so that you're "random minibatches are the same as ours.

    Returns:
    mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y)
    """

    m = X.shape[0]                  # number of training examples
    mini_batches = []
    np.random.seed(seed)

    # Step 1: Shuffle (X, Y)
    permutation = list(np.random.permutation(m))
    shuffled_X = X[permutation,:,:,:]
    shuffled_Y = Y[permutation,:]

    # Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
    num_complete_minibatches = math.floor(m/mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning
    for k in range(0, num_complete_minibatches):
        mini_batch_X = shuffled_X[k * mini_batch_size : k * mini_batch_size + mini_batch_size,:,:,:]
        mini_batch_Y = shuffled_Y[k * mini_batch_size : k * mini_batch_size + mini_batch_size,:]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)

    # Handling the end case (last mini-batch < mini_batch_size)
    if m % mini_batch_size != 0:
        mini_batch_X = shuffled_X[num_complete_minibatches * mini_batch_size : m,:,:,:]
        mini_batch_Y = shuffled_Y[num_complete_minibatches * mini_batch_size : m,:]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)

    return mini_batches

IMAGE_HEIGHT = 20
IMAGE_WIDTH = 34
MAX_CAPTCHA = 2 #驗證碼文本最長字符數
CHAR_SET_LEN = 10 #驗證碼字符集字符數1-10
learning_rate = 0.001

X_train,Y_train = load_data("/home/lygwangyp/TensorflowCaptcha/traindata/")
X_test,Y_test = load_data("/home/lygwangyp/TensorflowCaptcha/testdata/")

def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.009,
          num_epochs = 100, minibatch_size = 64, print_cost = True):

    X = tf.placeholder(tf.float32, [None, IMAGE_HEIGHT, IMAGE_WIDTH, 1])
    Y = tf.placeholder(tf.float32, [None, MAX_CAPTCHA*CHAR_SET_LEN])
    parameters = initialize_parameters()
    Z3 = forward_propagation(X, parameters)
    cost = compute_cost(Z3, Y)

    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)

    predict = tf.reshape(Z3, [-1, MAX_CAPTCHA, CHAR_SET_LEN])
    max_idx_p = tf.argmax(predict, 2)
    max_idx_l = tf.argmax(tf.reshape(Y, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2)
    correct_pred = tf.equal(max_idx_p, max_idx_l)
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    # Initialize all the variables globally
    init = tf.global_variables_initializer()
    seed = 233
    tf.set_random_seed(2333)
    costs = []    # To keep track of the cost
    m = X_train.shape[0]
    saver = tf.train.Saver()
    # Start the session to compute the tensorflow graph
    with tf.Session() as sess:

        # Run the initialization
        sess.run(init)

        # Do the training loop
        for epoch in range(num_epochs):

            minibatch_cost = 0.
            num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set
            seed = seed + 1
            minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed)

            for minibatch in minibatches:

                # Select a minibatch
                (minibatch_X, minibatch_Y) = minibatch
                # IMPORTANT: The line that runs the graph on a minibatch.
                # Run the session to execute the optimizer and the cost, the feedict should contain a minibatch for (X,Y).
                _ , temp_cost ,acc = sess.run([optimizer, cost, accuracy], feed_dict={X: minibatch_X, Y: minibatch_Y})

                minibatch_cost += temp_cost / num_minibatches


            # Print the cost every epoch
            if print_cost == True and epoch % 5 == 0:
                print ("Cost after epoch %i: %f\nThis epoch Accuracy is %f" % (epoch, minibatch_cost, acc))
                if acc>0.5:
                    saver.save(sess, "/home/lygwangyp/TensorflowCaptcha/pengpengNet/Saved_model/pengNetCaptcha.model", global_step=epoch)
            if print_cost == True and epoch % 1 == 0:
                costs.append(minibatch_cost)
        # plot the cost
        plt.plot(np.squeeze(costs))
        plt.ylabel('cost')
        plt.xlabel('iterations (per tens)')
        plt.title("Learning rate =" + str(learning_rate))
        plt.show()

        train_accuracy = accuracy.eval({X: X_train, Y: Y_train})
        print("All Train Accuracy:", train_accuracy)
        test_accuracy = accuracy.eval({X: X_test, Y: Y_test})
        print("All Test Accuracy:", test_accuracy)
        return parameters

parameters = model(X_train, Y_train, X_test, Y_test, num_epochs = 100)

Cost after epoch 0: 1.301943
This epoch Accuracy is 0.113636
Cost after epoch 5: 0.311392
This epoch Accuracy is 0.113636
Cost after epoch 10: 0.309575
This epoch Accuracy is 0.113636
Cost after epoch 15: 0.307371
This epoch Accuracy is 0.181818
Cost after epoch 20: 0.305834
This epoch Accuracy is 0.068182
Cost after epoch 25: 0.305516
This epoch Accuracy is 0.136364
Cost after epoch 30: 0.305877
This epoch Accuracy is 0.181818
Cost after epoch 35: 0.300717
This epoch Accuracy is 0.318182
Cost after epoch 40: 0.146618
This epoch Accuracy is 0.659091
Cost after epoch 45: 0.105703
This epoch Accuracy is 0.750000
Cost after epoch 50: 0.094605
This epoch Accuracy is 0.931818
Cost after epoch 55: 0.098196
This epoch Accuracy is 0.840909
Cost after epoch 60: 0.093290
This epoch Accuracy is 0.840909
Cost after epoch 65: 0.096823
This epoch Accuracy is 0.750000
Cost after epoch 70: 0.086718
This epoch Accuracy is 0.863636
Cost after epoch 75: 0.083978
This epoch Accuracy is 0.818182
Cost after epoch 80: 0.080751
This epoch Accuracy is 0.931818
Cost after epoch 85: 0.094561
This epoch Accuracy is 0.818182
Cost after epoch 90: 0.078955
This epoch Accuracy is 0.909091
Cost after epoch 95: 0.106287
This epoch Accuracy is 0.772727
All Train Accuracy: 0.770552
All Test Accuracy: 0.770072

3、TensorFlow神經網絡測試

隨手選了一張圖片，加載之前保存的權值，進行測試，測了幾張都能準確識別出來。

import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import math
from tools import *

IMAGE_HEIGHT = 20
IMAGE_WIDTH = 34
MAX_CAPTCHA = 2 #驗證碼文本最長字符數
CHAR_SET_LEN = 10 #驗證碼字符集字符數1-10
learning_rate = 0.001

def testImg(greyimg):
    with tf.Graph().as_default() as g:
        # [Variable and model creation goes here.]
        X = tf.placeholder(tf.float32, [None, IMAGE_HEIGHT, IMAGE_WIDTH, 1])
        parameters = initialize_parameters()
        Z3 = forward_propagation(X, parameters)

        predict = tf.reshape(Z3, [-1, MAX_CAPTCHA, CHAR_SET_LEN])
        max_idx_p = tf.argmax(predict, 2)
        saver = tf.train.Saver()  # Gets all variables in `graph`.

    with tf.Session(graph=g) as sess:
        saver.restore(sess,tf.train.latest_checkpoint('/home/lygwangyp/TensorflowCaptcha/pengpengNet/Saved_model'))
        # Do some work with the model....
        text_list = sess.run(max_idx_p, feed_dict={X: [greyimg]})

        text = text_list[0].tolist()
        vector = np.zeros(MAX_CAPTCHA*CHAR_SET_LEN)
        i = 0
        for n in text:
            vector[i*CHAR_SET_LEN + n] = 1
            i += 1
        print(vec2text(vector))
        return vec2text(vector)

#選一張測試集裏面的圖片進行測試
#嘗試讀取其中一副圖片，查看圖片像素大小及其他相關信息
img=np.array(Image.open('/home/lygwangyp/TensorflowCaptcha/testdata/53-6547.jpg'))#打開圖像並轉化爲數字矩陣
greyimg = convert2gray(img)
captcha_image = np.array([greyimg.tolist()],dtype = np.float32)

predictText = testImg(greyimg)
f = plt.figure()
ax = f.add_subplot(111)
ax.text(0.1, 0.9,predictText, ha='center', va='center', transform=ax.transAxes)
plt.imshow(img)

INFO:tensorflow:Restoring parameters from /home/lygwangyp/TensorflowCaptcha/pengpengNet/Saved_model/pengNetCaptcha.model-95
53





<matplotlib.image.AxesImage at 0x7f2a445827f0>

tools.py文件

import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import math

IMAGE_HEIGHT = 20
IMAGE_WIDTH = 34
MAX_CAPTCHA = 2 #驗證碼文本最長字符數
CHAR_SET_LEN = 10 #驗證碼字符集字符數1-10
learning_rate = 0.001

def initialize_parameters():
    with tf.variable_scope(""): 
        W1 = tf.get_variable("W1", [4, 4, 1, 8], initializer = tf.contrib.layers.xavier_initializer(seed = 0))
        W2 = tf.get_variable("W2", [2, 2, 8, 16], initializer = tf.contrib.layers.xavier_initializer(seed = 0))

        parameters = {"W1": W1,
                      "W2": W2}

    return parameters

def forward_propagation(X, parameters):
    W1 = parameters['W1']
    W2 = parameters['W2']

    ### START CODE HERE ###
    # CONV2D: stride of 1, padding 'SAME'
    Z1 = tf.nn.conv2d(X,W1, strides = [1,1,1,1], padding = 'SAME')
    # RELU
    A1 = tf.nn.relu(Z1)
    # MAXPOOL: window 8x8, sride 8, padding 'SAME'
    P1 = tf.nn.max_pool(A1, ksize = [1,8,8,1], strides = [1,8,8,1], padding = 'SAME')
    # CONV2D: filters W2, stride 1, padding 'SAME'
    Z2 = tf.nn.conv2d(P1,W2, strides = [1,1,1,1], padding = 'SAME')
    # RELU
    A2 = tf.nn.relu(Z2)
    # MAXPOOL: window 4x4, stride 4, padding 'SAME'
    P2 = tf.nn.max_pool(A2, ksize = [1,4,4,1], strides = [1,4,4,1], padding = 'SAME')
    # FLATTEN
    P2 = tf.contrib.layers.flatten(P2)
    # FULLY-CONNECTED without non-linear activation function (not not call softmax).
    # 6 neurons in output layer. Hint: one of the arguments should be "activation_fn=None" 
    Z3 = tf.contrib.layers.fully_connected(P2, num_outputs = MAX_CAPTCHA*CHAR_SET_LEN, activation_fn=None)

    return Z3


#將圖片轉爲灰度圖
def convert2gray(img):
    gray = np.empty((img.shape[0],img.shape[1],1),dtype='float32')
    gray[:,:,0] = np.mean(img, -1)
    return gray

# 向量轉回文本
def vec2text(vec):
    char_pos = vec.nonzero()[0]
    text=[]
    for i, c in enumerate(char_pos):
        char_at_pos = i #c/63
        char_idx = c % CHAR_SET_LEN
        if char_idx < 10:
            char_code = char_idx + ord('0')
        elif char_idx <36:
            char_code = char_idx - 10 + ord('A')
        elif char_idx < 62:
            char_code = char_idx-  36 + ord('a')
        elif char_idx == 62:
            char_code = ord('_')
        else:
            raise ValueError('error')
        text.append(chr(char_code))
    return "".join(text)