import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
#if you want use gpu to train
#import os
#using gpu when training, zero means the first gpu
#os.environ['CUDA_VISIBLE_DEVICES']='0'
mnist=input_data.read_data_sets('MNIST_data',one_hot=True)
#mini batch size
batch_size = 4
#the num of batch
n_batch = mnist.train.num_examples // batch_size
#initial the weight
def weight_variable(shape):
initial = tf.truncated_normal(shape,stddev=0.1)
return tf.Variable(initial)
#initial the bias
def bias_variable(shape):
initial = tf.constant(0.1,shape=shape)
return tf.Variable(initial)
#define conv layer
def conv2d(x,W):
#x input tensor of shape [batch,in_height,in_width,in_channels]
#W filter/kernel tensor of shape [filter_height,filter_width,in_channels,out_channels]
#stride[0] = strides[3] =1 stride[1] means the stride for x ,stride[2] means the stride for y
#padding :string from : "SAME","VALID"
return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')
#define max pooling layer
def max_pool_2p2(x):
return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
#placeholder 784 means the input size, 10 means the output size, None means the mini-batch size
x = tf.placeholder(tf.float32,[None,784])
y = tf.placeholder(tf.float32,[None,10])
#dropout porb
keep_prob = tf.placeholder(tf.float32)
#reshape the input data form to mini-batch_size*28*28*1
x_image = tf.reshape(x,[-1,28,28,1])
#####################the first layer #######
W_conv1 = weight_variable([5,5,1,32]) #5*5 kernel size ; 32 means the num of kernel.
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image,W_conv1) + b_conv1)
h_pool1 = max_pool_2p2(h_conv1)
########################################################
#####################the second layer##################
W_conv2 = weight_variable([5,5,32,64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1,W_conv2) + b_conv2)
h_pool2 = max_pool_2p2(h_conv2)
########################################################
#by conv layer the output size is 7*7*64
##########dropout layer and fully connected layer########
W_ful = weight_variable([7*7*64,1024])
b_ful = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2,[-1,7*7*64])
h_fcl = tf.nn.relu(tf.matmul(h_pool2_flat,W_ful) + b_ful)
#dropout
h_fcl_drop = tf.nn.dropout(h_fcl,keep_prob)
#########################################################
#################the second fully connected layer########
W_ful2 = weight_variable([1024,10])
b_ful2 = bias_variable([10])
prediction = tf.nn.softmax(tf.matmul(h_fcl_drop,W_ful2) + b_ful2)
#########################################################
########loss function and other params###################
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=prediction))
lr=0.5
train = tf.train.AdadeltaOptimizer(lr).minimize(loss)
correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(prediction,1))
acc = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
##########################################################
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(11):
for k in range(n_batch):
batch_xs,batch_ys = mnist.train.next_batch(batch_size)
sess.run(train,feed_dict={x:batch_xs,y:batch_ys,keep_prob:0.8})
#train_accuracy = sess.run(acc,feed_dict={x:mnist.train.images,y:mnist.train.labels,keep_prob:1.0})
#print("iter = " + str(epoch) + ",train_acc = " + str(train_accuracy))
#if epoch%10 ==0:
test_accuracy = sess.run(acc,feed_dict={x:mnist.test.images,y:mnist.test.labels,keep_prob:1.0})
print("iter = " + str(epoch) + ",test_acc = " + str(test_accuracy))
result:
Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
iter = 0,test_acc = 0.9757
iter = 1,test_acc = 0.9845
iter = 2,test_acc = 0.9855
iter = 3,test_acc = 0.9877
iter = 4,test_acc = 0.9868
iter = 5,test_acc = 0.9871
iter = 6,test_acc = 0.989
iter = 7,test_acc = 0.9901
iter = 8,test_acc = 0.9907
iter = 9,test_acc = 0.9915
iter = 10,test_acc = 0.9905