深度學習之Tensorflow框架實現VGG/RNN網絡/驗證碼生成

①實現VGG網絡模型

cwd = os.getcwd()           #獲取當前路徑
VGG_PATH = cwd + "/data/imagenet-vgg-verydeep-19.mat"
data = scipy.io.loadmat(VGG_PATH)
mean = data['normalization'][0][0][0]
mean_pixel = np.mean(mean, axis=(0,1))  #獲取三個通道的均值
weights= data['layers'][0]
#找到 權重參數對應的維度
print(weights[0][0][0][0][0][0].shape)
print(weights[0][0][0][0][0][1].shape)

def net(data_path, input_image):
    layers = (
      'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1'
      ...
      ...                                                       #19層網絡
    )
    data = scipy.io.loadmat(VGG_PATH)
    mean = data['normalization'][0][0][0]
    mean_pixel = np.mean(mean, axis=(0,1))  #獲取三個通道的均值
    weights= data['layers'][0]
    net = {}
    current = input_image
    for i, name in enumerate(layers):
        kind = name[:4]
        if kind == 'conv':
            kernels, bias = weights[i][0][0][0][0]         #代表w和b
            kernels = np.transpose(kernels, (1,0,2,3))    #轉換參數格式
            bias = bias.reshape(-1)
            current = _conv_layer(current, kernels, bias)
        elif kind == 'relu':
            current = tf.nn.relu(current)
        elif kind == 'pool':
            current = _pool_layer(current)
        net[name] = current             #name 每一層的名稱,存儲每一層傳的值
    assert len(net) ==len(layers)
print("Network for VGG ready")

def _conv_layer(input, weights, bias):
    conv = tf.nn.conv2d(input, tf.constant(weights), striders=(1,1,1,1), padding='SAME')
    return tf.nn.bias_add(conv, bias)
def _pool_layer(input):
    return tf.nn.max_pool(input, ksize=(1,2,2,1), strides=(1,2,2,1), padding='SAME')
def preprocess(image, mean_pixel):
    return image - mean_pixel
def unprocess(image, mean_pixel)
    return image + mean_pixel
def imread(path):
    return scripy.misc.imread(path).astype(np.float)
def imsave(path, img):
    img = np.clip(image, 0 ,255).astype(np.uint8)
    scripy.misc.imsave(path, img)
print("Functions for VGG ready")

VGG_PATH =cwd + "/ / .mat"
IMG_PATH = cwd + "/data/ .jpg"
input_image = imread(IMG_PATH)
shape = (1, input_image.shape[0], input_image.shape[1], input_image.shape[2])
with tf.Session() as sess:
    image = tf.placeholder('float', shape=shape)
    nets, mean_pixel, all_layers = net(VGG_PATH, image)
    input_image

②實現RNN網絡模型

接受輸入:
序列化 b1--b2--b3--b4

例子:處理MINIST數據集
整體數據-》序列數據   
diminput = 28
dimhidden = 128
dimoutput = nclasses
nsteps = 28       #步長
weights = {
    'hidden':tf.Variable(tf.random_normal([diminput, dimhidden])),
    'out':tf.Variable(tf.random_normal([dimhidden, dimoutput]))
}
biases = {
    'hidden': tf.Variable(tf.random_normal([dimhidden]))
    'out':tf.Variable(tf.random_normal([dimoutput]))
}

def _RNN(_X, _W, _b, _nsteps, _name):
    #=>[nsteps, batchsize, diminput]
    _X = tf.transpose(_X, [1, 0, 2])
    _X = tf.reshape(_X, [-1, diminput])
    _H = tf.matmul(_X, _W['hidden']) + _b['hidden']
    _Hsplit = tf.split(0, _nsteps, _H)
    with tf.variable_scope(_name) as scope:
        scope.reuse_variables()        #變量共享
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(dimhidden, forget_bias=1.0)
        _LSTM_0, _LSTM_S = tf.nn.rnn(lstm_cell, _Hsplit, dtype=tf.float32)
    #輸出 最後位置的
    _0 = tf.matmul(_LSTM_0[-1], _W['out']) + _b['out']
    #Return!
    return {
            'X':_X, 'H',_H, 'Hsplit':_Hsplit, 'LSTM_0':_LSTM_0, 'LSTM_S':_LSTM_S, '0':_0
}

learning_rate = 0.001
x = tf.placeholder("float", [None, nsteps, diminput])
y = tf.placeholder("float", [dimoutput])
myrnn = _RNN(x, weights, biases ,nsteps, 'basic')  #basic名字:命名域名字
pred = myrnn['0']
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optm = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
accr =tf.reduce_mean(tf.cast(tf.equal(tf.argmax(pred,1), tf.argmax(y,1)), tf.float32))
init = tf.global_variables_initializer()
print("Network Ready!")

training_epochs = 5
batch_size = 16
display_step = 1
sess = tf.Session()
sess.run(init)
print("Start optimization")
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = 100
    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        batch_xs = batch_xs.reshape((batch_size, nsteps, diminput))
        feeds = {x:batch_xs, y:batch_ys}
        sess.run(optm, feed_dict=feeds)
        avg_cost +=sess.run(cost, feed_dict=feeds)/ total_batch
    if epoch%display_step == 0:
        print("Epoch: %03d%03d"%(epoch, training_epochs, avg_cost))
        feeds ={x:batch_xs, y:batch_ys}
        train_acc = sess.run(accr, feed_dict=feeds)
        print("Training accuracy:%.3f"%(train_acc))
        testimgs = testimgs.reshape((ntest, nsteps, diminput))
        feeds ={x:testimgs, y:testlabels, istate:np.zeros((ntest, 2*dimhidden))}
        test_acc = sess.run(accr, feed_dict=feeds)
        print("Test accuracy:%.3f"%(test_acc))
print("Optimization Finished.")

③驗證碼生成

from captcha.image import ImageCaptcha  #驗證碼
from PIL import Image

number = ['0', '1', '2', '3', '4', '5', '6'...]
alphabet = ['a', 'b', 'c', 'd'..]
ALPHABET=['A', 'B', 'C', 'D'...]

def random_captcha_text(char_set = number+alphabet+ALPHABET, captcha_size=4):
    captcha_text = []
    for i in range(captcha_size):
        c=random.choice(char_set)
        captcha_text.append(c)
    return captcha_text

def gen_captcha_text_and_image():
    image = ImageCaptcha()
    captcha_text = random_captcha_text()
    captcha_text = ''.join(captcha_text)
    captcha = image.generate(captcha_text)   #生成圖片
    captcha_image = Image.open(captcha)
    captcha_image = np.array(captcha_image)
    return captcha_text, captcha_image
if __name__ =='__main__':
    text, image = gen_captcha_text_and_image()
    f=plt.figure()
    ax = f.add_subplot(111)
    ax.text(0.1, 0.9, text, ha='center', va='center', transform=ax.transAxes)
    plt.imshow(image)
    plt.show()
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章