基於神經網絡的手寫字符壓縮分類

基於卷積神經網絡的手寫字符壓縮分類

壓縮感知:通過對數據進行按照一定數據量進行壓縮,傳輸,然後再利用重構算法將壓縮的數據重構出原始的數據。
將壓縮感知與卷積神經網絡結合,不但可以提高重構的精度,而且可以忽略信號的稀疏字典的這一項要求。
所在數據不完整的情況,對數據進行分類,可以使用卷積神經網絡實現。
1.隨機採樣矩陣不訓練
手寫字符集的分類過程可以表示爲:
壓縮採樣及分類過程
使用Tensorflow框架,對圖中過程進行代碼整理:
python2.7

import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplt as plt
#加載手寫字符集
mnist = input_data.read_data_sets('/home/awen/Juanjuan/image_classifier/data/MNIST_data/', one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
image = trX  #訓練集
tesimage = teX   #測試集
#一些參數
learning_rate=0.001
n_classes = 10
batch_size = 55
training_iters = 200000
#初始化佔位:
x = tf.placeholder(tf.float32, [None, 28, 28, 1])
y = tf.placeholder(tf.int64, [None, n_classes])
dropout = 0.75
keep_prob = tf.placeholder(tf.float32)
#conv
def conv2d(name, x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x, name=name)
def maxpool2d(name, x, k=2):
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME', name=name)
def norm(name, linput, lsize=4):
    return tf.nn.lrn(linput, lsize, bias=1.0, alpha=0.001/9.0, beta=0.75, name=name)
#權重和偏差值用的笨辦法進行的初始化定義,這種定義方法真的太笨了,唉... 
weights = {
   'w1': tf.Variable(tf.random_normal([5, 5, 1, 20], stddev=0.01)),
   'w2': tf.Variable(tf.random_normal([5, 5, 20, 50], stddev=0.01)),
   'wd1': tf.Variable(tf.random_normal([7*7*50, 500], stddev=0.01)),  
   'out': tf.Variable(tf.random_normal([500, 10], stddev=0.01)),  
}
biases = {
   'bc1': tf.Variable(tf.random_normal([20])),
   'bc2': tf.Variable(tf.random_normal([50])),
   'bd1': tf.Variable(tf.random_normal([500])),
   'out': tf.Variable(tf.random_normal([10]))
}
#網絡:
def cs_lenet5(x, weights, biases, dropout):
    x = tf.reshape(x, shape=[-1, 28, 28, 1])
    conv1 = conv2d('conv1', x, weights['w1'], biases['bc1'])
    pool1 = maxpool2d('pool1', conv1, k=2)
    conv2 = conv2d('conv2', pool1, weights['w2'], biases['bc2'])
    pool2 = maxpool2d('pool2', conv2, k=2)
	#fully connected layers
    fc1 = tf.reshape(pool2, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    fc1 = tf.nn.dropout(fc1, dropout)
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return out
	
pred = cs_lenet5(x, weights, biases, keep_prob)     
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999).minimize(cost)
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))         
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))   #準確率計算

init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
with tf.Session() as sess:
    sess.run(init_op)
    mse_loss1 = []
    mse_loss2 = []
    accuracy1 = collections.defaultdict(list)
    for epoch in range(50):
        print epoch
        m = np.random.normal(0, 0.1, (image.shape[1], int(rt*image.shape[1])))  **#採樣矩陣**
        #image sampling
        sm = np.dot(image, m)
        rec_m = np.dot(sm, m.T)
        rec = rec_m.reshape((-1, 28, 28, 1))
        m1 = np.random.normal(0, 0.5, (image.shape[1], int(rt*image.shape[1])))   **#重構過程**
        tesm = np.dot(tesimage, m1)
        rec_tesm = np.dot(tesm, m1.T)
        rec_tes = rec_tesm.reshape((-1, 28, 28, 1))
        for i in range(1000):
            batch_x, batch_y = rec[i*batch_size:(i+1)*batch_size], trY[i*batch_size:(i+1)*batch_size]
            _, loss, acc = sess.run([optimizer, cost, accuracy], feed_dict={x: batch_x, y: batch_y, keep_prob: 1})
            if i % 100 == 0:
                print 'After %d training step(s),'%(i+1),'loss on training is {:.6f}'.format(loss), 'Training accuracy is {:.6f}'.format(acc)
        tesloss, tesacc = sess.run([cost, accuracy], feed_dict={x: rec_tes, y: teY, keep_prob: 1})
        print 'loss on test is {:.6f}'.format(tesloss), 'Test accuracy is {:.6f}'.format(tesacc)
        mse_loss1.append(acc)
        mse_loss2.append(tesacc)
    accuracy1['train'] = mse_loss1
    accuracy1['test'] = mse_loss2
plt.plot(mse_loss1, color='red', label='train data')
plt.plot(mse_loss2, color='black', label='test data')
plt.xlabel('epoch')
plt.ylabel('Accuracy')

2.訓練隨機矩陣
手寫字符集的分類過程將上圖中的Φ進行訓練,形成端到端的訓練過程。

#convolution 
def conv2d_cs(name, x, W, strides):
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='VALID')
    return tf.nn.relu(x, name=name)

def conv2d(name, x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x, name=name)
	
def maxpool2d(name, x, k=2):
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME', name=name)
	
def norm(name, linput, lsize=4):
    return tf.nn.lrn(linput, lsize, bias=1.0, alpha=0.001/9.0, beta=0.75, name=name)

weights = {
   'wc1': tf.Variable(tf.random_normal([28, 28, 1, 784], stddev=0.01)),
   'wc2': tf.Variable(tf.random_normal([1, 1, 784, 784], stddev=0.01)),
   'w1': tf.Variable(tf.random_normal([5, 5, 1, 20], stddev=0.01)),
   'w2': tf.Variable(tf.random_normal([5, 5, 20, 50], stddev=0.01)),
   'wd1': tf.Variable(tf.random_normal([7*7*50, 500], stddev=0.01)),  
   'out': tf.Variable(tf.random_normal([500, 10], stddev=0.01)),  
}

biases = {
   'bc1': tf.Variable(tf.random_normal([20])),
   'bc2': tf.Variable(tf.random_normal([50])),
   'bd1': tf.Variable(tf.random_normal([500])),
   'out': tf.Variable(tf.random_normal([10]))
}

def cs_lenet5(x, weights, biases, dropout):
    x = tf.reshape(x, shape=[-1, 28, 28, 1])
    #norm1 = norm('norm1', x, lsize=4)
    conv1_cs = conv2d_cs('conv1_cs', x, weights['wc1'], strides=28)   **#採樣層**
    conv2_cs = conv2d_cs('conv2_cs', conv1_cs, weights['wc2'], strides=1)  **#重構層**
    conv3_rh = tf.reshape(conv2_cs, [-1, 28, 28, 1])
    #norm1 = norm('norm1', conv3_rh)
    conv1 = conv2d('conv1', conv3_rh, weights['w1'], biases['bc1'])
    pool1 = maxpool2d('pool1', conv1, k=2)
    conv2 = conv2d('conv2', pool1, weights['w2'], biases['bc2'])
    pool2 = maxpool2d('pool2', conv2, k=2)
	#fully connected layers:
    fc1 = tf.reshape(pool2, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    #fc1 = tf.nn.dropout(fc1, dropout)
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return out, conv3_rh
	
pred, csimg = cs_lenet5(x, weights, biases, keep_prob)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999).minimize(cost)
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
with tf.Session() as sess:
    sess.run(init_op)
    mse_loss1 = []
    mse_loss2 = []
    accuracy1 = collections.defaultdict(list)
    for epoch in range(50):
        print epoch
        for i in range(1000):
            batch_x, batch_y = trx[i*batch_size:(i+1)*batch_size], trY[i*batch_size:(i+1)*batch_size]
            _, img1, loss, acc = sess.run([optimizer, csimg, cost, accuracy], feed_dict={x: batch_x, y: batch_y, keep_prob: 1.0})
            if i % 100 == 0:
                print 'After %d training step(s),'%(i+1),'loss on training is {:.6f}'.format(loss), 'Training accuracy is {:.6f}'.format(acc)
        tesloss, tesacc = sess.run([cost, accuracy], feed_dict={x: tex, y: teY, keep_prob: 1.0})
        print 'loss on test is {:.6f}'.format(tesloss), 'Test accuracy is {:.6f}'.format(tesacc)
        mse_loss1.append(acc)
        mse_loss2.append(tesacc)
    accuracy1['train'] = mse_loss1
    accuracy1['test'] = mse_loss2 
plt.plot(mse_loss1, color='red', label='train data')
plt.plot(mse_loss2, color='black', label='test data')
plt.xlabel('epoch')
plt.ylabel('Accuracy')

通過對比訓練和不訓練兩種情況下的手寫字符集的準確率,結果如下圖所示:
訓練與直接採樣對比
訓練的結果
我們可以發現將採樣矩陣進行訓練,在壓縮比爲0.01的情況下,訓練得到的結果依然很高。
參考論文:
[1].Lohit S , Kulkarni K , Turaga P . Direct inference on compressive measurements using convolutional neural networks[C]// IEEE International Conference on Image Processing. IEEE, 2016.
[2].Adler A, Elad M, Zibulevsky M. Compressed Learning: A Deep Neural Network Approach[J]. 2016.

我寫的代碼真的太囉嗦了,在學習的路上繼續改進吧。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章