Pytorch+Tensorflow 實現MNIST數字識別問題

MNIST數據集簡介

MNIST(Mixed National Institute of Standards and Technology database)是一個簡單的機器視覺數據集,由幾萬張28x28像素的手寫數字組成,這些圖片僅有灰度值信息。

MNIST數據集的樣例如圖1所示:MNIST數據集
該任務是對這些手寫數字的圖片進行分類,轉成0-9一共10類。

Tensorflow相關代碼說明

1. 首先需要將MNIST數據集導入,並查看MNIST數據的相關屬性。
  • 導入MNIST
# 從固定模塊中導入
from tensorflow.examples.tutorials.mnist import input_data
# 下面所使用的方法中,第一個參數爲MNIST在本地的位置,如果不存在便自動下載。
# one_hot的作用是爲了得到一個僅有一個位置爲1的一維數組,例如1對應[0,1,0,0,0,0,0,0,0,0]
mnist = input_data.read_data_sets("MNIST_data", one_hot = True)
  • 查看MNIST相關屬性
'''
MNIST數據集分爲三個部分,訓練數據集共55000張圖片,驗證集5000張圖,測試集10000張圖,像素值爲28x28=784;
每個圖片都有一個label與之對應,每個label包括10個由0和1組成的數組;
'''
# 查看訓練集、驗證集和測試集的樣例數目
print ("Training data size: ", mnist.train.num_examples)
print ("Validating data size: ", mnist.validation.num_examples)
print ("Testing data size: ", mnist.test.num_examples)

# 查看三個數據集的維度信息
print ("Training images dimension: ", mnist.train.images.shape)
print ("Validating images dimension: ", mnist.validation.images.shape)
print ("Testing images dimension: ", mnist.test.images.shape)
print ("Training labels dimension: ", mnist.train.images.shape)
print ("Validating labels dimension: ", mnist.validation.images.shape)
print ("Testing labels dimension: ", mnist.test.images.shape)
  • 查看training數據集中某個成員的像素矩陣生成的一維數組和其屬於的數字標籤。
print ("Example training data: ", mnist.train.images[0] )
print ("Example training data label: ", mnist.train.labels[0])
2. 開始一個一層全連接網絡訓練

因爲MNIST的像素大小不是很大,並且其空間相關性不是很大,所以可以使用全連接網絡完成訓練。

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data", one_hot = True)
# 使用下面這個函數可以將生成的會話自動註冊成了交互性對話,更加的方便,對下面的參數初始化有作用
sess = tf.InteractiveSession()
# 定義輸入訓練數據的類型和維度大小,None代表無論大小爲多少都可以
x = tf.placeholder(tf.float32, [None, 784])
# 將權重weights定義爲全0
W1 = tf.Variable(tf.zeros([784, 10]))
# 將偏差bias定義爲全0
b1 = tf.Variable(tf.zeros([10]))
# 對於多分類任務,softmax是較爲常用的一個函數
y = tf.nn.softmax(tf.matmul(x, W1) + b1)
# 真實的label
y_ = tf.placeholder(tf.float32, [None, 10])
# 使用簡單的交叉熵損失函數,reduce_mean函數將所有的數據進行了一個平均
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
# 定義訓練的目標和方法,使用基礎的隨機梯度下降
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
# 初始化所有的函數
tf.global_variables_initializer().run()
for i in range(1000):
    print("The %s-th steps" %(i + 1))
    # 每個批次僅訓練100個數據
    x_batch, y_batch = mnist.train.next_batch(100)
    # 將數據feed後訓練
    train_step.run({x: x_batch, y_: y_batch})
# argmax表示最大值的位置,equal用於表示兩者是否相同,返回的結果爲一個由True和False組成的數組
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
# 計算準確率,tf.cast將True和False變成浮點型
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# 將test數據feed進行測試
print(accuracy.eval({x: mnist.test.images, y_: mnist.test.labels}))

以上代碼可以達到92%左右的準確率。

3. 增加一個隱藏層
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data", one_hot = True)
x = tf.placeholder(tf.float32, [None, 784])
# 增加一個dropout比例,訓練的時候小於1,測試的時候等於1
keep_prob = tf.placeholder(tf.float32)
# 在這裏的W1如果設置爲全零的效果不夠好,可能陷入了局部最優
W1 = tf.Variable(tf.truncated_normal([784, 300], stddev = 0.1))
b1 = tf.Variable(tf.zeros([300]))
hidden1 = tf.nn.relu(tf.matmul(x, W1) + b1)
# 隱藏層做一個dropout,避免過擬合,減少了參數的訓練所以省時間
hidden1_drop = tf.nn.dropout(hidden1, keep_prob)
W2 = tf.Variable(tf.zeros([300, 10]))
b2 = tf.Variable(tf.zeros([10]))
y = tf.nn.softmax(tf.matmul(hidden1_drop, W2) + b2)
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
# 這裏使用了Ada
train_step = tf.train.AdagradOptimizer(0.3).minimize(cross_entropy)
tf.global_variables_initializer().run()
import time
time1 = time.time()
for i in range(3000):
    print("The %s-th steps" %(i + 1))
    x_batch, y_batch = mnist.train.next_batch(100)
    # 設置dropout的比例爲0.75,即只訓練其1/4的參數
    train_step.run({x: x_batch, y_: y_batch, keep_prob:0.75})
print("Done:%s" %(time.time() - time1))
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy.eval({x: mnist.test.images, y_: mnist.test.labels, keep_prob:1.0}))

上述的代碼可以達到98%的準確率。

4. 一個完整的代碼(轉自TensorFlow實戰google深度學習框架)

mnist_inference.py

import tensorflow as tf

INPUT_NODE = 784
OUTPUT_NODE = 10
LAYER1_NODE = 500

def get_weight_variable(shape, regularizer):
    weights = tf.get_variable("weights", shape, initializer=tf.truncated_normal_initializer(stddev=0.1))
    if regularizer != None: tf.add_to_collection('losses', regularizer(weights))
    return weights


def inference(input_tensor, regularizer):
    with tf.variable_scope('layer1'):

        weights = get_weight_variable([INPUT_NODE, LAYER1_NODE], regularizer)
        biases = tf.get_variable("biases", [LAYER1_NODE], initializer=tf.constant_initializer(0.0))
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights) + biases)

    with tf.variable_scope('layer2'):
        weights = get_weight_variable([LAYER1_NODE, OUTPUT_NODE], regularizer)
        biases = tf.get_variable("biases", [OUTPUT_NODE], initializer=tf.constant_initializer(0.0))
        layer2 = tf.matmul(layer1, weights) + biases

    return layer2

mnist_train.py

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist_inference
import os
BATCH_SIZE = 100 
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 30000
MOVING_AVERAGE_DECAY = 0.99 
MODEL_SAVE_PATH = "MNIST_model/"
MODEL_NAME = "mnist_model"
def train(mnist):
    # 定義輸入輸出placeholder。
    x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name='y-input')

    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    y = mnist_inference.inference(x, regularizer)
    global_step = tf.Variable(0, trainable=False)
    
    # 定義損失函數、學習率、滑動平均操作以及訓練過程。
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY,
        staircase=True)
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name='train')
        
    # 初始化TensorFlow持久化類。
    saver = tf.train.Saver()
    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        for i in range(1, TRAINING_STEPS + 1):
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys})
            if i % 1000 == 0:
                print("After %d training step(s), loss on training batch is %g." % (step, loss_value))
                saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)
                def main(argv=None):
    mnist = input_data.read_data_sets("../../../datasets/MNIST_data", one_hot=True)
    train(mnist)

if __name__ == '__main__':
    main()

mnist_eval.py

import time
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist_inference
import mnist_train
# 加載的時間間隔。
EVAL_INTERVAL_SECS = 10

def evaluate(mnist):
    with tf.Graph().as_default() as g:
        x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE], name='x-input')
        y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name='y-input')
        validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}

        y = mnist_inference.inference(x, None)
        correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        variable_averages = tf.train.ExponentialMovingAverage(mnist_train.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        while True:
            with tf.Session() as sess:
                ckpt = tf.train.get_checkpoint_state(mnist_train.MODEL_SAVE_PATH)
                if ckpt and ckpt.model_checkpoint_path:
                    saver.restore(sess, ckpt.model_checkpoint_path)
                    global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                    accuracy_score = sess.run(accuracy, feed_dict=validate_feed)
                    print("After %s training step(s), validation accuracy = %g" % (global_step, accuracy_score))
                else:
                    print('No checkpoint file found')
                    return
            time.sleep(EVAL_INTERVAL_SECS)
            def main(argv=None):
    mnist = input_data.read_data_sets("../../../datasets/MNIST_data", one_hot=True)
    evaluate(mnist)

if __name__ == '__main__':
    main()

Pytorch相關代碼說明

1、導入相關模塊並加載數據
# 導入torch模塊
import torch
import torch.nn as nn
# 加載Torchvision,用於圖像處理的一個庫,dataset中包含了相關數據
import torchvision.datasets as dset
# transforms可以將圖像轉換成張量形式
import torchvision.transforms as transforms
from torch.autograd import Variable
# 用於加載預先設計好的數據
import torch.utils.data as Data
import numpy as np
import matplotlib.pyplot as plt
# 這裏定義一些所需要的參數
input_size = 784
hidden_size = 300
output_size = 10
batch_size = 100
learning_rate = 0.001
num_epochs = 5

'''
root: 數據集的位置;
train = True表示訓練集, train = False表示測試集;
transform用於將PIL映像轉換成特定的形式;
download=True會將數據集從Internet下載,並將其放在根目錄中。如果數據集已經下載,則不會被下載。
'''
train_dataset = dset.MNIST(root = 'mnist_data', train = True, transform = transforms.ToTensor(), download = True)
test_dataset = dset.MNIST(root = 'mnist_data', train = False, transform = transforms.ToTensor())
2、傳入所有數據
'''
dataset爲加載數據的數據集;
batch_size爲加載批訓練的數據個數;
shuffle可以將每個epoch的數據重新打亂
'''
train_loader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = False)
3、定義一個包含一個隱藏層的網絡
class Net(nn.Module):
	'''
	激活函數使用ReLU
	'''
    def __init__(self, input_size, hidden_size, num_classes):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out
# 使用上述類完成一個簡單網絡的定義
net = Net(input_size, hidden_size, output_size)
4、定義損失函數及其相關
# 使用經典的交叉熵函數
criterion = nn.CrossEntropyLoss()
# 使用torch.optim之前,需要用一個optimizer對象保存參數的狀態,以便於計算參數的梯度信息,這裏更新梯度的算法使用的是Adam
optimizer = torch.optim.Adam(net.parameters(), lr = learning_rate)
5、開始訓練
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
    	# print (Variable(images.view(100, 28 * 28)))
    	# 這裏的-1和bacth_size有關,自適應參數,代碼中結果就是100
        images = Variable(images.view(-1, 28 * 28))
        # batch_size個標籤
        labels = Variable(labels)
        # 重置梯度信息
        optimizer.zero_grad()
        # feed整個圖片數據集
        outputs = net(images)
        # 開始計算損失函數並進行後向傳播
        loss = criterion(outputs, labels)
        loss.backward()
        # 更新參數
        optimizer.step()
        # print(loss.data.item())
        if(i + 1) % 100 == 0:
            print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' % (epoch + 1, num_epochs, i + 1, len(train_dataset)//batch_size, loss.data.item()))
6、計算準確率
correct = 0
total = 0
# 按照批次進行計算測試集中正確的個數
for images, labels in test_loader:
    images = Variable(images.view(-1, 28 * 28))
    outputs = net(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()
# 找到準確率
print('Accuracy: %d %%' % (100 * correct / total))

以上參數的設置並訓練之後,識別率達到了97%。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章