利用卷積神經網絡實現驗證碼破解(數字+字母)

本次文章主要是爲了探討學習,如有出現任何非正常渠道獲利行爲與本人無關。

import tensorflow as tf
from captcha.image import ImageCaptcha
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import random
from cracking.machine_learning_demo.keras_cnn import config
from cracking.machine_learning_demo.keras_cnn import helper
from imutils import paths
import os

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

number = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
            'v', 'w', 'x', 'y', 'z']
ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
            'V', 'W', 'X', 'Y', 'Z']
CHAR_SET = number + alphabet + ALPHABET

image_height = 60
image_width = 160
max_captcha = 4
# print("驗證碼文本最長字符數", max_captcha)
char_set = CHAR_SET
char_set_len = len(char_set)


# print(CHAR_SET)

def random_captcha_text(char_set=CHAR_SET, captcha_size=4):
    captcha_text = []
    for i in range(captcha_size):
        c = random.choice(char_set)
        captcha_text.append(c)
    return captcha_text


def gen_captcha_text_image():
    image = ImageCaptcha()
    captcha_text = random_captcha_text()
    captcha_text = ''.join(captcha_text)
    captcha = image.generate(captcha_text)
    captcha_image = Image.open(captcha)
    captcha_image = np.array(captcha_image)
    return captcha_text, captcha_image


def convert2gray(img):
    if len(img.shape) > 2:
        r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]
        gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
        return gray
    else:
        return img

def text2vec(text):
    text_len = len(text)
    if text_len > max_captcha:
        # raise ValueError('驗證碼最長4個字符')
        print('驗證碼最長4個字符', text)

    vector = np.zeros(max_captcha * char_set_len)

    def char2pos(c):
        if c == '_':
            k = 62
            return k
        k = ord(c) - 48
        if k > 9:
            k = ord(c) - 55
            if k > 35:
                k = ord(c) - 61
                if k > 61:
                    raise ValueError('No Map')
        return k

    for i, c in enumerate(text):
        idx = i * char_set_len + char2pos(c)
        vector[idx] = 1
    return vector

def vec2text(vec):
    text = []
    for i, c in enumerate(vec):
        char_idx = c % char_set_len
        if char_idx < 10:
            char_code = char_idx + ord('0')
        elif char_idx < 36:
            char_code = char_idx - 10 + ord('A')
        elif char_idx < 62:
            char_code = char_idx - 36 + ord('a')
        elif char_idx == 62:
            char_code = ord('_')
        else:
            raise ValueError('error')
        text.append(chr(char_code))
    return "".join(text)


def read_local_image(method):
    ##讀取特定文件下的驗證碼
    if method == 0:
        CAPTCHA_IMAGE_FOLDER = config.train_pic_path ###訓練圖片的存儲路徑
    elif method == 1:
        CAPTCHA_IMAGE_FOLDER = config.test_pic_path ###測試圖片存儲的路徑
    else:
        print("請輸出步驟", method)
    captcha_image_files = list(paths.list_images(CAPTCHA_IMAGE_FOLDER))
    # print(captcha_image_files)
    image_file = random.sample(captcha_image_files, 1)[0]
    text = image_file.split("\\")[-1].split(".")[0]
    # text = text.lower()
    # print(text)
    # print("驗證碼大小2:", image.shape)  # (60,160,3)
    image = Image.open(image_file)
    image = helper.change_image_channels(image)
    image = image.resize((160, 60), Image.ANTIALIAS)
    image = np.array(image)
    # max_captcha = len(text)
    return text, image


def get_next_batch(batch_size, method):
    batch_x = np.zeros([batch_size, image_height * image_width])
    batch_y = np.zeros([batch_size, max_captcha * char_set_len])

    def wrap_gen_captcha_text_and_image():

        while True:
            ##隨機生成的驗證碼
            # text, image = gen_captcha_text_image()
            ##本地的驗證碼
            text, image = read_local_image(method)

            if image.shape == (60, 160, 3):
                return text, image

    for i in range(batch_size):
        text, image = wrap_gen_captcha_text_and_image()
        image = convert2gray(image)

        batch_x[i, :] = image.flatten() / 255
        if len(text) == 4:
            batch_y[i, :] = text2vec(text)
        else:
            continue

    return batch_x, batch_y


def cnn_structure(X, Y, keep_prob, b_alpha=0.1):
    x = tf.reshape(X, shape=[-1, image_height, image_width, 1])

    wc1 = tf.get_variable(name='wc1', shape=[3, 3, 1, 32], dtype=tf.float32,
                          initializer=tf.contrib.layers.xavier_initializer())
    # wc1 = tf.Variable(w_alpha * tf.random_normal([3, 3, 1, 32]))
    bc1 = tf.Variable(b_alpha * tf.random_normal([32]))
    conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, wc1, strides=[1, 1, 1, 1], padding='SAME'), bc1))
    conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    conv1 = tf.nn.dropout(conv1, keep_prob)

    wc2 = tf.get_variable(name='wc2', shape=[3, 3, 32, 64], dtype=tf.float32,
                          initializer=tf.contrib.layers.xavier_initializer())
    # wc2 = tf.Variable(w_alpha * tf.random_normal([3, 3, 32, 64]))
    bc2 = tf.Variable(b_alpha * tf.random_normal([64]))
    conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, wc2, strides=[1, 1, 1, 1], padding='SAME'), bc2))
    conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    conv2 = tf.nn.dropout(conv2, keep_prob)

    wc3 = tf.get_variable(name='wc3', shape=[3, 3, 64, 128], dtype=tf.float32,
                          initializer=tf.contrib.layers.xavier_initializer())
    # wc3 = tf.Variable(w_alpha * tf.random_normal([3, 3, 64, 128]))
    bc3 = tf.Variable(b_alpha * tf.random_normal([128]))
    conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, wc3, strides=[1, 1, 1, 1], padding='SAME'), bc3))
    conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    conv3 = tf.nn.dropout(conv3, keep_prob)

    wd1 = tf.get_variable(name='wd1', shape=[8 * 20 * 128, 1024], dtype=tf.float32,
                          initializer=tf.contrib.layers.xavier_initializer())
    # wd1 = tf.Variable(w_alpha * tf.random_normal([7*20*128,1024]))
    bd1 = tf.Variable(b_alpha * tf.random_normal([1024]))
    dense = tf.reshape(conv3, [-1, wd1.get_shape().as_list()[0]])
    dense = tf.nn.relu(tf.add(tf.matmul(dense, wd1), bd1))
    dense = tf.nn.dropout(dense, keep_prob)

    wout = tf.get_variable('name', shape=[1024, max_captcha * char_set_len], dtype=tf.float32,
                           initializer=tf.contrib.layers.xavier_initializer())
    # wout = tf.Variable(w_alpha * tf.random_normal([1024, max_captcha * char_set_len]))
    bout = tf.Variable(b_alpha * tf.random_normal([max_captcha * char_set_len]))
    out = tf.add(tf.matmul(dense, wout), bout)
    return out


def train_cnn(X, Y, keep_prob, method):
    output = cnn_structure(X, Y, keep_prob)
    cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output, labels=Y))
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
    predict = tf.reshape(output, [-1, max_captcha, char_set_len])
    max_idx_p = tf.argmax(predict, 2)
    max_idx_l = tf.argmax(tf.reshape(Y, [-1, max_captcha, char_set_len]), 2)
    correct_pred = tf.equal(max_idx_p, max_idx_l)
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    saver = tf.train.Saver()

    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)
        step = 0
        while True:
            batch_x, batch_y = get_next_batch(10, method)
            # print('batch_x=', batch_x)
            # print('batch_y=', batch_y)
            _, cost_ = sess.run([optimizer, cost], feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.75})
            print(step, cost_)

            if step % 100 == 0:
                batch_x_test, batch_y_test = get_next_batch(100, method)
                acc = sess.run(accuracy, feed_dict={X: batch_x_test, Y: batch_y_test, keep_prob: 1.})

                print(step, acc)
            # 如果準確率大於90%,保存模型,完成訓練
            if acc > 0.99:
                saver.save(sess, config.save_model_path, global_step=step)###模型存儲的配置路徑
                break
            # if acc > 0.95:
            #     saver.save(sess, config.save_model_path, global_step=step)
            step += 1


def crack_captcha(captcha_image, X, Y, keep_prob):
    output = cnn_structure(X, Y, keep_prob)

    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, config.download_model_path) ###模型生成存儲的配置路徑

        predict = tf.argmax(tf.reshape(output, [-1, max_captcha, char_set_len]), 2)
        text_list = sess.run(predict, feed_dict={X: [captcha_image], keep_prob: 1.})
        vec = text_list[0].tolist()
        predict_text = vec2text(vec)
        return predict_text


def operate_cnn(method, image_path):
    if method == 0:
        X = tf.placeholder(tf.float32, [None, image_height * image_width])
        Y = tf.placeholder(tf.float32, [None, max_captcha * char_set_len])
        keep_prob = tf.placeholder(tf.float32)
        train_cnn(X, Y, keep_prob, method)
        return "訓練模型完成!"

    if method == 1:
        num = 10
        true_num = 0
        for i in range(num):
            tf.reset_default_graph()
            # text, image = gen_captcha_text_image()
            text, image = read_local_image(method)

            image = np.array(image)
            image = convert2gray(image)
            image = image.flatten() / 255
            X = tf.placeholder(tf.float32, [None, image_height * image_width])
            Y = tf.placeholder(tf.float32, [None, max_captcha * char_set_len])
            keep_prob = tf.placeholder(tf.float32)
            predict_text = crack_captcha(image, X, Y, keep_prob)
            # print("正確: {}  預測: {}".format(text, predict_text))
            predict_text_str = str(predict_text).replace("['", "").replace("', '", "").replace("']", "")
            # print(predict_text_str.lower())
            # predict_value = predict_text_str.lower()
            # normal_value = text.lower()
            if text == predict_text:
                true_num += 1
            else:
                print("正確: {}  預測: {}".format(text, predict_text))

        return "預測正確的個數==", true_num

    if method == 2:
        image = image_path

        image = Image.open(image)
        image = helper.change_image_channels(image)
        image = image.resize((160, 60), Image.ANTIALIAS)
        image = np.array(image)

        image = convert2gray(image)
        image = image.flatten() / 255
        X = tf.placeholder(tf.float32, [None, image_height * image_width])
        Y = tf.placeholder(tf.float32, [None, max_captcha * char_set_len])
        keep_prob = tf.placeholder(tf.float32)
        predict_text = crack_captcha(image, X, Y, keep_prob)

        predict_text_str = str(predict_text).replace("['", "").replace("', '", "").replace("']", "")

        predict_value = predict_text_str.lower()

        return predict_value


if __name__ == '__main__':
    '''
     # step=0: 訓練模型
     # step=1: 批量測試模型
     # step=2: 單張測試模型
    '''

    value = operate_cnn(method=1, image_path="2ceb.jpg")
    print(value)

以上是主要函數代碼實例。

樣例數據如下:

經測試,準確率在97%左右。

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章