深度學習之Tensorflow框架實現VGG/RNN網絡/驗證碼生成與識別

①實現VGG網絡模型

cwd = os.getcwd()           #獲取當前路徑
VGG_PATH = cwd + "/data/imagenet-vgg-verydeep-19.mat"
data = scipy.io.loadmat(VGG_PATH)
mean = data['normalization'][0][0][0]
mean_pixel = np.mean(mean, axis=(0,1))  #獲取三個通道的均值
weights= data['layers'][0]
#找到 權重參數對應的維度
print(weights[0][0][0][0][0][0].shape)
print(weights[0][0][0][0][0][1].shape)

def net(data_path, input_image):
    layers = (
      'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1'
      ...
      ...                                                       #19層網絡
    )
    data = scipy.io.loadmat(VGG_PATH)
    mean = data['normalization'][0][0][0]
    mean_pixel = np.mean(mean, axis=(0,1))  #獲取三個通道的均值
    weights= data['layers'][0]
    net = {}
    current = input_image
    for i, name in enumerate(layers):
        kind = name[:4]
        if kind == 'conv':
            kernels, bias = weights[i][0][0][0][0]         #代表w和b
            kernels = np.transpose(kernels, (1,0,2,3))    #轉換參數格式
            bias = bias.reshape(-1)
            current = _conv_layer(current, kernels, bias)
        elif kind == 'relu':
            current = tf.nn.relu(current)
        elif kind == 'pool':
            current = _pool_layer(current)
        net[name] = current             #name 每一層的名稱,存儲每一層傳的值
    assert len(net) ==len(layers)
print("Network for VGG ready")

def _conv_layer(input, weights, bias):
    conv = tf.nn.conv2d(input, tf.constant(weights), striders=(1,1,1,1), padding='SAME')
    return tf.nn.bias_add(conv, bias)
def _pool_layer(input):
    return tf.nn.max_pool(input, ksize=(1,2,2,1), strides=(1,2,2,1), padding='SAME')
def preprocess(image, mean_pixel):
    return image - mean_pixel
def unprocess(image, mean_pixel)
    return image + mean_pixel
def imread(path):
    return scripy.misc.imread(path).astype(np.float)
def imsave(path, img):
    img = np.clip(image, 0 ,255).astype(np.uint8)
    scripy.misc.imsave(path, img)
print("Functions for VGG ready")

VGG_PATH =cwd + "/ / .mat"
IMG_PATH = cwd + "/data/ .jpg"
input_image = imread(IMG_PATH)
shape = (1, input_image.shape[0], input_image.shape[1], input_image.shape[2])
with tf.Session() as sess:
    image = tf.placeholder('float', shape=shape)
    nets, mean_pixel, all_layers = net(VGG_PATH, image)
    input_image

②實現RNN網絡模型

接受輸入:
序列化 b1--b2--b3--b4

例子:處理MINIST數據集
整體數據-》序列數據   
diminput = 28
dimhidden = 128
dimoutput = nclasses
nsteps = 28       #步長
weights = {
    'hidden':tf.Variable(tf.random_normal([diminput, dimhidden])),
    'out':tf.Variable(tf.random_normal([dimhidden, dimoutput]))
}
biases = {
    'hidden': tf.Variable(tf.random_normal([dimhidden]))
    'out':tf.Variable(tf.random_normal([dimoutput]))
}

def _RNN(_X, _W, _b, _nsteps, _name):
    #=>[nsteps, batchsize, diminput]
    _X = tf.transpose(_X, [1, 0, 2])
    _X = tf.reshape(_X, [-1, diminput])
    _H = tf.matmul(_X, _W['hidden']) + _b['hidden']
    _Hsplit = tf.split(0, _nsteps, _H)
    with tf.variable_scope(_name) as scope:
        scope.reuse_variables()        #變量共享
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(dimhidden, forget_bias=1.0)
        _LSTM_0, _LSTM_S = tf.nn.rnn(lstm_cell, _Hsplit, dtype=tf.float32)
    #輸出 最後位置的
    _0 = tf.matmul(_LSTM_0[-1], _W['out']) + _b['out']
    #Return!
    return {
            'X':_X, 'H',_H, 'Hsplit':_Hsplit, 'LSTM_0':_LSTM_0, 'LSTM_S':_LSTM_S, '0':_0
}

learning_rate = 0.001
x = tf.placeholder("float", [None, nsteps, diminput])
y = tf.placeholder("float", [dimoutput])
myrnn = _RNN(x, weights, biases ,nsteps, 'basic')  #basic名字:命名域名字
pred = myrnn['0']
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optm = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
accr =tf.reduce_mean(tf.cast(tf.equal(tf.argmax(pred,1), tf.argmax(y,1)), tf.float32))
init = tf.global_variables_initializer()
print("Network Ready!")

training_epochs = 5
batch_size = 16
display_step = 1
sess = tf.Session()
sess.run(init)
print("Start optimization")
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = 100
    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        batch_xs = batch_xs.reshape((batch_size, nsteps, diminput))
        feeds = {x:batch_xs, y:batch_ys}
        sess.run(optm, feed_dict=feeds)
        avg_cost +=sess.run(cost, feed_dict=feeds)/ total_batch
    if epoch%display_step == 0:
        print("Epoch: %03d%03d"%(epoch, training_epochs, avg_cost))
        feeds ={x:batch_xs, y:batch_ys}
        train_acc = sess.run(accr, feed_dict=feeds)
        print("Training accuracy:%.3f"%(train_acc))
        testimgs = testimgs.reshape((ntest, nsteps, diminput))
        feeds ={x:testimgs, y:testlabels, istate:np.zeros((ntest, 2*dimhidden))}
        test_acc = sess.run(accr, feed_dict=feeds)
        print("Test accuracy:%.3f"%(test_acc))
print("Optimization Finished.")

③驗證碼生成與識別

from captcha.image import ImageCaptcha  #驗證碼
from PIL import Image

number = ['0', '1', '2', '3', '4', '5', '6'...]
alphabet = ['a', 'b', 'c', 'd'..]
ALPHABET=['A', 'B', 'C', 'D'...]
#生成字符串函數
def random_captcha_text(char_set = number+alphabet+ALPHABET, captcha_size=4):
    captcha_text = []
    for i in range(captcha_size):
        c=random.choice(char_set)
        captcha_text.append(c)
    return captcha_text
#生成圖片函數
def gen_captcha_text_and_image():
    image = ImageCaptcha()
    captcha_text = random_captcha_text()
    captcha_text = ''.join(captcha_text)
    captcha = image.generate(captcha_text)   #生成圖片
    captcha_image = Image.open(captcha)
    captcha_image = np.array(captcha_image)
    return captcha_text, captcha_image
#展示生成的圖片
if __name__ =='__main__':
    text, image = gen_captcha_text_and_image()
    f=plt.figure()
    ax = f.add_subplot(111)
    ax.text(0.1, 0.9, text, ha='center', va='center', transform=ax.transAxes)
    plt.imshow(image)
    plt.show()
    
if __name__ = '__main__':
    train = 0
    #訓練網絡
    if train == 0:
        number = ['0', '1', '2', '3',...]
        text, image = gen_captcha_text_and_image()
        print('驗證碼圖像channel:', image.shape)
        # 圖像大小
        IMAGE_HEIGHT = 60
        IMAGE_WIDTH = 160
        MAX_CAPTCHA = len(text)
        print("驗證碼文本最長字符數", MAX_CAPTCHA)
        #文本轉向量
        char_set = number
        CHAR_SET_LEN = len(char_set)
        X=tf.placeholder(tf.float32, [None, IMAGE_HEIGHT*IMAGE_WIDTH])
        Y=tf.placeholder(tf.float32, [None, MAX_CAPTCHA*CHAR_SET_LEN]) #每一位單獨表示  例 數字2 用0010000000這種格式表示
        keep_prob = tf.placeholder(tf.float32)   #dropout保留率
        train_crack_captcha_cnn()
    #測試階段
    if train == 1:
        number = ['0', '1', '2', '3'...]
        IMAGE_HEIGHT = 60
        IMAGE_WIDTH = 160
        
#網絡結構       
def train_crack_captcha_cnn():
    output = crack_captcha_cnn()  #架構
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(output, Y))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)
    predict = tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN])
    max_idx_p = tf.argmax(predict, 2)
    max_idx_l = tf.argmax(tf.reshape(Y, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2)
    correct_pred = tf.equal(max_idx_p, max_idx_l)           
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        step = 0
        while True:
            batch_x, batch_y = get_next_batch(64)
            _, loss_ = sess.run([optimizer, loss], feed_dict={X:batch_x, Y:batch_y, keep_prob:})
            if step % 100 == 0:
                batch_x_test, batch_y_test = get_next_batch(100)
                acc = sess.run(accuracy, feed_dict={X:batch_x_test, Y:batch_y_test,})
                print(step, acc)
                if acc > 0.85:
                    saver.save(sess, "./model/crack.model", global_step = step)
                    break
            step+=1

#生成一個訓練batch
def get_next_batch(batch_size=128):
    batch_x = np.zeros([batch_size, IMAGE_HEIGHT*IMAGE_WIDTH])
    batch_y = np.zeros([batch_size, MAX_CAPTCHA*CHAR_SET_LEN])
    def wrap_gen_captcha_text_and_image():
        while True:
            text, image = gen_captcha_text_and_image()
            if image.shape == (60, 160, 3):
                return text, iamge
    for i in range(batch_size):
        text, image = wrap_gen_captcha_text_and_image()
        image = convert2gray(image)
        batch_x[i, :] = image.flatten() / 255
        batch_y[i, :] = text2vec(text)
    return batch_x, batch_y
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章