基於卷積神經網絡的手寫字符壓縮分類
壓縮感知:通過對數據進行按照一定數據量進行壓縮,傳輸,然後再利用重構算法將壓縮的數據重構出原始的數據。
將壓縮感知與卷積神經網絡結合,不但可以提高重構的精度,而且可以忽略信號的稀疏字典的這一項要求。
所在數據不完整的情況,對數據進行分類,可以使用卷積神經網絡實現。
1.隨機採樣矩陣不訓練
手寫字符集的分類過程可以表示爲:
使用Tensorflow框架,對圖中過程進行代碼整理:
python2.7
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplt as plt
#加載手寫字符集
mnist = input_data.read_data_sets('/home/awen/Juanjuan/image_classifier/data/MNIST_data/', one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
image = trX #訓練集
tesimage = teX #測試集
#一些參數
learning_rate=0.001
n_classes = 10
batch_size = 55
training_iters = 200000
#初始化佔位:
x = tf.placeholder(tf.float32, [None, 28, 28, 1])
y = tf.placeholder(tf.int64, [None, n_classes])
dropout = 0.75
keep_prob = tf.placeholder(tf.float32)
#conv
def conv2d(name, x, W, b, strides=1):
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x, name=name)
def maxpool2d(name, x, k=2):
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME', name=name)
def norm(name, linput, lsize=4):
return tf.nn.lrn(linput, lsize, bias=1.0, alpha=0.001/9.0, beta=0.75, name=name)
#權重和偏差值用的笨辦法進行的初始化定義,這種定義方法真的太笨了,唉...
weights = {
'w1': tf.Variable(tf.random_normal([5, 5, 1, 20], stddev=0.01)),
'w2': tf.Variable(tf.random_normal([5, 5, 20, 50], stddev=0.01)),
'wd1': tf.Variable(tf.random_normal([7*7*50, 500], stddev=0.01)),
'out': tf.Variable(tf.random_normal([500, 10], stddev=0.01)),
}
biases = {
'bc1': tf.Variable(tf.random_normal([20])),
'bc2': tf.Variable(tf.random_normal([50])),
'bd1': tf.Variable(tf.random_normal([500])),
'out': tf.Variable(tf.random_normal([10]))
}
#網絡:
def cs_lenet5(x, weights, biases, dropout):
x = tf.reshape(x, shape=[-1, 28, 28, 1])
conv1 = conv2d('conv1', x, weights['w1'], biases['bc1'])
pool1 = maxpool2d('pool1', conv1, k=2)
conv2 = conv2d('conv2', pool1, weights['w2'], biases['bc2'])
pool2 = maxpool2d('pool2', conv2, k=2)
#fully connected layers
fc1 = tf.reshape(pool2, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
fc1 = tf.nn.dropout(fc1, dropout)
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
return out
pred = cs_lenet5(x, weights, biases, keep_prob)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999).minimize(cost)
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) #準確率計算
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
with tf.Session() as sess:
sess.run(init_op)
mse_loss1 = []
mse_loss2 = []
accuracy1 = collections.defaultdict(list)
for epoch in range(50):
print epoch
m = np.random.normal(0, 0.1, (image.shape[1], int(rt*image.shape[1]))) **#採樣矩陣**
#image sampling
sm = np.dot(image, m)
rec_m = np.dot(sm, m.T)
rec = rec_m.reshape((-1, 28, 28, 1))
m1 = np.random.normal(0, 0.5, (image.shape[1], int(rt*image.shape[1]))) **#重構過程**
tesm = np.dot(tesimage, m1)
rec_tesm = np.dot(tesm, m1.T)
rec_tes = rec_tesm.reshape((-1, 28, 28, 1))
for i in range(1000):
batch_x, batch_y = rec[i*batch_size:(i+1)*batch_size], trY[i*batch_size:(i+1)*batch_size]
_, loss, acc = sess.run([optimizer, cost, accuracy], feed_dict={x: batch_x, y: batch_y, keep_prob: 1})
if i % 100 == 0:
print 'After %d training step(s),'%(i+1),'loss on training is {:.6f}'.format(loss), 'Training accuracy is {:.6f}'.format(acc)
tesloss, tesacc = sess.run([cost, accuracy], feed_dict={x: rec_tes, y: teY, keep_prob: 1})
print 'loss on test is {:.6f}'.format(tesloss), 'Test accuracy is {:.6f}'.format(tesacc)
mse_loss1.append(acc)
mse_loss2.append(tesacc)
accuracy1['train'] = mse_loss1
accuracy1['test'] = mse_loss2
plt.plot(mse_loss1, color='red', label='train data')
plt.plot(mse_loss2, color='black', label='test data')
plt.xlabel('epoch')
plt.ylabel('Accuracy')
2.訓練隨機矩陣
手寫字符集的分類過程將上圖中的Φ進行訓練,形成端到端的訓練過程。
#convolution
def conv2d_cs(name, x, W, strides):
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='VALID')
return tf.nn.relu(x, name=name)
def conv2d(name, x, W, b, strides=1):
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x, name=name)
def maxpool2d(name, x, k=2):
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME', name=name)
def norm(name, linput, lsize=4):
return tf.nn.lrn(linput, lsize, bias=1.0, alpha=0.001/9.0, beta=0.75, name=name)
weights = {
'wc1': tf.Variable(tf.random_normal([28, 28, 1, 784], stddev=0.01)),
'wc2': tf.Variable(tf.random_normal([1, 1, 784, 784], stddev=0.01)),
'w1': tf.Variable(tf.random_normal([5, 5, 1, 20], stddev=0.01)),
'w2': tf.Variable(tf.random_normal([5, 5, 20, 50], stddev=0.01)),
'wd1': tf.Variable(tf.random_normal([7*7*50, 500], stddev=0.01)),
'out': tf.Variable(tf.random_normal([500, 10], stddev=0.01)),
}
biases = {
'bc1': tf.Variable(tf.random_normal([20])),
'bc2': tf.Variable(tf.random_normal([50])),
'bd1': tf.Variable(tf.random_normal([500])),
'out': tf.Variable(tf.random_normal([10]))
}
def cs_lenet5(x, weights, biases, dropout):
x = tf.reshape(x, shape=[-1, 28, 28, 1])
#norm1 = norm('norm1', x, lsize=4)
conv1_cs = conv2d_cs('conv1_cs', x, weights['wc1'], strides=28) **#採樣層**
conv2_cs = conv2d_cs('conv2_cs', conv1_cs, weights['wc2'], strides=1) **#重構層**
conv3_rh = tf.reshape(conv2_cs, [-1, 28, 28, 1])
#norm1 = norm('norm1', conv3_rh)
conv1 = conv2d('conv1', conv3_rh, weights['w1'], biases['bc1'])
pool1 = maxpool2d('pool1', conv1, k=2)
conv2 = conv2d('conv2', pool1, weights['w2'], biases['bc2'])
pool2 = maxpool2d('pool2', conv2, k=2)
#fully connected layers:
fc1 = tf.reshape(pool2, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
#fc1 = tf.nn.dropout(fc1, dropout)
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
return out, conv3_rh
pred, csimg = cs_lenet5(x, weights, biases, keep_prob)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999).minimize(cost)
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
with tf.Session() as sess:
sess.run(init_op)
mse_loss1 = []
mse_loss2 = []
accuracy1 = collections.defaultdict(list)
for epoch in range(50):
print epoch
for i in range(1000):
batch_x, batch_y = trx[i*batch_size:(i+1)*batch_size], trY[i*batch_size:(i+1)*batch_size]
_, img1, loss, acc = sess.run([optimizer, csimg, cost, accuracy], feed_dict={x: batch_x, y: batch_y, keep_prob: 1.0})
if i % 100 == 0:
print 'After %d training step(s),'%(i+1),'loss on training is {:.6f}'.format(loss), 'Training accuracy is {:.6f}'.format(acc)
tesloss, tesacc = sess.run([cost, accuracy], feed_dict={x: tex, y: teY, keep_prob: 1.0})
print 'loss on test is {:.6f}'.format(tesloss), 'Test accuracy is {:.6f}'.format(tesacc)
mse_loss1.append(acc)
mse_loss2.append(tesacc)
accuracy1['train'] = mse_loss1
accuracy1['test'] = mse_loss2
plt.plot(mse_loss1, color='red', label='train data')
plt.plot(mse_loss2, color='black', label='test data')
plt.xlabel('epoch')
plt.ylabel('Accuracy')
通過對比訓練和不訓練兩種情況下的手寫字符集的準確率,結果如下圖所示:
我們可以發現將採樣矩陣進行訓練,在壓縮比爲0.01的情況下,訓練得到的結果依然很高。
參考論文:
[1].Lohit S , Kulkarni K , Turaga P . Direct inference on compressive measurements using convolutional neural networks[C]// IEEE International Conference on Image Processing. IEEE, 2016.
[2].Adler A, Elad M, Zibulevsky M. Compressed Learning: A Deep Neural Network Approach[J]. 2016.
我寫的代碼真的太囉嗦了,在學習的路上繼續改進吧。