①實現VGG網絡模型
cwd = os.getcwd() #獲取當前路徑
VGG_PATH = cwd + "/data/imagenet-vgg-verydeep-19.mat"
data = scipy.io.loadmat(VGG_PATH)
mean = data['normalization'][0][0][0]
mean_pixel = np.mean(mean, axis=(0,1)) #獲取三個通道的均值
weights= data['layers'][0]
#找到 權重參數對應的維度
print(weights[0][0][0][0][0][0].shape)
print(weights[0][0][0][0][0][1].shape)
def net(data_path, input_image):
layers = (
'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1'
...
... #19層網絡
)
data = scipy.io.loadmat(VGG_PATH)
mean = data['normalization'][0][0][0]
mean_pixel = np.mean(mean, axis=(0,1)) #獲取三個通道的均值
weights= data['layers'][0]
net = {}
current = input_image
for i, name in enumerate(layers):
kind = name[:4]
if kind == 'conv':
kernels, bias = weights[i][0][0][0][0] #代表w和b
kernels = np.transpose(kernels, (1,0,2,3)) #轉換參數格式
bias = bias.reshape(-1)
current = _conv_layer(current, kernels, bias)
elif kind == 'relu':
current = tf.nn.relu(current)
elif kind == 'pool':
current = _pool_layer(current)
net[name] = current #name 每一層的名稱,存儲每一層傳的值
assert len(net) ==len(layers)
print("Network for VGG ready")
def _conv_layer(input, weights, bias):
conv = tf.nn.conv2d(input, tf.constant(weights), striders=(1,1,1,1), padding='SAME')
return tf.nn.bias_add(conv, bias)
def _pool_layer(input):
return tf.nn.max_pool(input, ksize=(1,2,2,1), strides=(1,2,2,1), padding='SAME')
def preprocess(image, mean_pixel):
return image - mean_pixel
def unprocess(image, mean_pixel)
return image + mean_pixel
def imread(path):
return scripy.misc.imread(path).astype(np.float)
def imsave(path, img):
img = np.clip(image, 0 ,255).astype(np.uint8)
scripy.misc.imsave(path, img)
print("Functions for VGG ready")
VGG_PATH =cwd + "/ / .mat"
IMG_PATH = cwd + "/data/ .jpg"
input_image = imread(IMG_PATH)
shape = (1, input_image.shape[0], input_image.shape[1], input_image.shape[2])
with tf.Session() as sess:
image = tf.placeholder('float', shape=shape)
nets, mean_pixel, all_layers = net(VGG_PATH, image)
input_image
②實現RNN網絡模型
接受輸入:
序列化 b1--b2--b3--b4
例子:處理MINIST數據集
整體數據-》序列數據
diminput = 28
dimhidden = 128
dimoutput = nclasses
nsteps = 28 #步長
weights = {
'hidden':tf.Variable(tf.random_normal([diminput, dimhidden])),
'out':tf.Variable(tf.random_normal([dimhidden, dimoutput]))
}
biases = {
'hidden': tf.Variable(tf.random_normal([dimhidden]))
'out':tf.Variable(tf.random_normal([dimoutput]))
}
def _RNN(_X, _W, _b, _nsteps, _name):
#=>[nsteps, batchsize, diminput]
_X = tf.transpose(_X, [1, 0, 2])
_X = tf.reshape(_X, [-1, diminput])
_H = tf.matmul(_X, _W['hidden']) + _b['hidden']
_Hsplit = tf.split(0, _nsteps, _H)
with tf.variable_scope(_name) as scope:
scope.reuse_variables() #變量共享
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(dimhidden, forget_bias=1.0)
_LSTM_0, _LSTM_S = tf.nn.rnn(lstm_cell, _Hsplit, dtype=tf.float32)
#輸出 最後位置的
_0 = tf.matmul(_LSTM_0[-1], _W['out']) + _b['out']
#Return!
return {
'X':_X, 'H',_H, 'Hsplit':_Hsplit, 'LSTM_0':_LSTM_0, 'LSTM_S':_LSTM_S, '0':_0
}
learning_rate = 0.001
x = tf.placeholder("float", [None, nsteps, diminput])
y = tf.placeholder("float", [dimoutput])
myrnn = _RNN(x, weights, biases ,nsteps, 'basic') #basic名字:命名域名字
pred = myrnn['0']
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optm = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
accr =tf.reduce_mean(tf.cast(tf.equal(tf.argmax(pred,1), tf.argmax(y,1)), tf.float32))
init = tf.global_variables_initializer()
print("Network Ready!")
training_epochs = 5
batch_size = 16
display_step = 1
sess = tf.Session()
sess.run(init)
print("Start optimization")
for epoch in range(training_epochs):
avg_cost = 0
total_batch = 100
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
batch_xs = batch_xs.reshape((batch_size, nsteps, diminput))
feeds = {x:batch_xs, y:batch_ys}
sess.run(optm, feed_dict=feeds)
avg_cost +=sess.run(cost, feed_dict=feeds)/ total_batch
if epoch%display_step == 0:
print("Epoch: %03d%03d"%(epoch, training_epochs, avg_cost))
feeds ={x:batch_xs, y:batch_ys}
train_acc = sess.run(accr, feed_dict=feeds)
print("Training accuracy:%.3f"%(train_acc))
testimgs = testimgs.reshape((ntest, nsteps, diminput))
feeds ={x:testimgs, y:testlabels, istate:np.zeros((ntest, 2*dimhidden))}
test_acc = sess.run(accr, feed_dict=feeds)
print("Test accuracy:%.3f"%(test_acc))
print("Optimization Finished.")
③驗證碼生成與識別
from captcha.image import ImageCaptcha #驗證碼
from PIL import Image
number = ['0', '1', '2', '3', '4', '5', '6'...]
alphabet = ['a', 'b', 'c', 'd'..]
ALPHABET=['A', 'B', 'C', 'D'...]
#生成字符串函數
def random_captcha_text(char_set = number+alphabet+ALPHABET, captcha_size=4):
captcha_text = []
for i in range(captcha_size):
c=random.choice(char_set)
captcha_text.append(c)
return captcha_text
#生成圖片函數
def gen_captcha_text_and_image():
image = ImageCaptcha()
captcha_text = random_captcha_text()
captcha_text = ''.join(captcha_text)
captcha = image.generate(captcha_text) #生成圖片
captcha_image = Image.open(captcha)
captcha_image = np.array(captcha_image)
return captcha_text, captcha_image
#展示生成的圖片
if __name__ =='__main__':
text, image = gen_captcha_text_and_image()
f=plt.figure()
ax = f.add_subplot(111)
ax.text(0.1, 0.9, text, ha='center', va='center', transform=ax.transAxes)
plt.imshow(image)
plt.show()
if __name__ = '__main__':
train = 0
#訓練網絡
if train == 0:
number = ['0', '1', '2', '3',...]
text, image = gen_captcha_text_and_image()
print('驗證碼圖像channel:', image.shape)
# 圖像大小
IMAGE_HEIGHT = 60
IMAGE_WIDTH = 160
MAX_CAPTCHA = len(text)
print("驗證碼文本最長字符數", MAX_CAPTCHA)
#文本轉向量
char_set = number
CHAR_SET_LEN = len(char_set)
X=tf.placeholder(tf.float32, [None, IMAGE_HEIGHT*IMAGE_WIDTH])
Y=tf.placeholder(tf.float32, [None, MAX_CAPTCHA*CHAR_SET_LEN]) #每一位單獨表示 例 數字2 用0010000000這種格式表示
keep_prob = tf.placeholder(tf.float32) #dropout保留率
train_crack_captcha_cnn()
#測試階段
if train == 1:
number = ['0', '1', '2', '3'...]
IMAGE_HEIGHT = 60
IMAGE_WIDTH = 160
#網絡結構
def train_crack_captcha_cnn():
output = crack_captcha_cnn() #架構
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(output, Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)
predict = tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN])
max_idx_p = tf.argmax(predict, 2)
max_idx_l = tf.argmax(tf.reshape(Y, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2)
correct_pred = tf.equal(max_idx_p, max_idx_l)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
step = 0
while True:
batch_x, batch_y = get_next_batch(64)
_, loss_ = sess.run([optimizer, loss], feed_dict={X:batch_x, Y:batch_y, keep_prob:})
if step % 100 == 0:
batch_x_test, batch_y_test = get_next_batch(100)
acc = sess.run(accuracy, feed_dict={X:batch_x_test, Y:batch_y_test,})
print(step, acc)
if acc > 0.85:
saver.save(sess, "./model/crack.model", global_step = step)
break
step+=1
#生成一個訓練batch
def get_next_batch(batch_size=128):
batch_x = np.zeros([batch_size, IMAGE_HEIGHT*IMAGE_WIDTH])
batch_y = np.zeros([batch_size, MAX_CAPTCHA*CHAR_SET_LEN])
def wrap_gen_captcha_text_and_image():
while True:
text, image = gen_captcha_text_and_image()
if image.shape == (60, 160, 3):
return text, iamge
for i in range(batch_size):
text, image = wrap_gen_captcha_text_and_image()
image = convert2gray(image)
batch_x[i, :] = image.flatten() / 255
batch_y[i, :] = text2vec(text)
return batch_x, batch_y