機器學習筆記 tensorflow實現在cifar10數據集上的cnn

主要是試着使用一下cifar-10數據集,cifar-10數據集爲60000張32*32*3的彩色圖片,總共有10個類別,其中50000張訓練集,10000張測試集。

下載地址:http://www.cs.toronto.edu/~kriz/cifar.html

     

代碼與google的例子和網上的版本大致相同,主要的區別是使用了cifar10的python版數據,自己寫了一個讀取和隨機抽取數據的程序,網上的版本大多使用google的例子中的cifar10_input.py下的輸入函數,會進行數據增強,自己手寫的沒有數據增強,google例子中使用Session讀取數據我跑的時候會卡住,網上說Session效率極低。

下面先給出這部分的代碼,我將讀取數據相關的程序寫在了input.py中

import pickle
import numpy as np
import random


def load(file_name):
    with open(file_name, 'rb') as fo:
        data = pickle.load(fo, encoding='bytes')
        return data


def get_train():
    data1 = load('D:\IDE\Project\Pycharm_Project\Cifar10_Project\cifar-10-batches-py\data_batch_1')
    x1 = np.array(data1[b'data'])
    x1 = x1.reshape(-1, 32, 32, 3)
    y1 = np.array(data1[b'labels'])
    data2 = load('D:\IDE\Project\Pycharm_Project\Cifar10_Project\cifar-10-batches-py\data_batch_2')
    x2 = np.array(data2[b'data'])
    x2 = x2.reshape(-1, 32, 32, 3)
    y2 = np.array(data2[b'labels'])
    train_data = np.r_[x1, x2]
    train_labels = np.r_[y1, y2]
    data3 = load('D:\IDE\Project\Pycharm_Project\Cifar10_Project\cifar-10-batches-py\data_batch_3')
    x3 = np.array(data3[b'data'])
    x3 = x3.reshape(-1, 32, 32, 3)
    y3 = data3[b'labels']
    train_data = np.r_[train_data, x3]
    train_labels = np.r_[train_labels, y3]
    data4 = load('D:\IDE\Project\Pycharm_Project\Cifar10_Project\cifar-10-batches-py\data_batch_4')
    x4 = np.array(data4[b'data'])
    x4 = x4.reshape(-1, 32, 32, 3)
    y4 = data4[b'labels']
    train_data = np.r_[train_data, x4]
    train_labels = np.r_[train_labels, y4]
    return list(train_data), list(train_labels)


def get_test():
    data1 = load('D:\IDE\Project\Pycharm_Project\Cifar10_Project\cifar-10-batches-py\\test_batch')
    x = np.array(data1[b'data'])
    x = x.reshape(-1, 32, 32, 3)
    y = data1[b'labels']
    return list(x), list(y)


def get_batch(batch_size, image, label):
    batch_image = list()
    batch_label = list()
    indexs = list()
    for i in range(batch_size):
        index = random.randint(0, len(image)-1)
        while index in indexs:
            index = random.randint(0, len(image)-1)
        d = list(image[index])
        batch_image.append(d)
        z = label[index]
        batch_label.append(z)
        indexs.append(index)
    return batch_image, batch_label

cifar10的python版解壓出來後算上test共有6個batch,前五個是train用的每個包含10000個圖片信息和標籤信息,用pickle讀取後是一個字典,用keys()可以打印出關鍵字,利用關鍵字讀取,get_batch()是一個隨機抽取(不放回)一定數量樣本的函數,原本使用list.sample()是可以抽取的可是label和image是分開的必須保持一致。

解決了數據讀取問題後就和google的例子基本一致了,首先封裝了一些tf中的重複操作在init.py中下面給出代碼,函數名可讀性還是很強的,就不做過多介紹。

import tensorflow as tf


def l2_weight_init(shape, stddev, w1):
    weight = tf.Variable(tf.truncated_normal(shape, stddev=stddev))
    if w1 is not None:
        weight_loss = tf.multiply(tf.nn.l2_loss(weight), w1, name="weight_loss")
        tf.add_to_collection("losses", weight_loss)
    return weight


def weight_init(shape, stddev):
    weight = tf.Variable(tf.truncated_normal(shape, stddev=stddev))
    return weight


def bias_init(shape):
    return tf.Variable(tf.random_normal(shape))


def conv2d(image, weight):
    return tf.nn.conv2d(image, weight, strides=[1, 1, 1, 1], padding="SAME")


def max_pool(tensor):
    return tf.nn.max_pool(tensor, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME")

下面是主要代碼,兩層卷積層兩層全連接層,用了LR正則化防止過擬合

import tensorflow as tf
import input
import numpy as np
import Init
import matplotlib.pyplot as plt
import random


max_steps = 5000
batch_size = 128
display = 100


def LRnorm(tensor):
    return tf.nn.lrn(tensor, 4, bias=1.0, alpha=0.001/9.0, beta=0.75)


def accuracy(test_labels, test_y_out):
    test_labels = tf.to_int64(test_labels)
    prediction_result = tf.equal(test_labels, tf.argmax(y_, 1))
    accu = tf.reduce_mean(tf.cast(prediction_result, tf.float32))
    return accu

# train_image, train_label = cifar10_input.distorted_inputs(batch_size=batch_size, data_dir="cifar-10-batches-bin")
# test_image, test_label = cifar10_input.inputs(batch_size=batch_size, data_dir="cifar-10-batches-bin", eval_data=True)

with tf.name_scope('Input'):
    image = tf.placeholder('float', [batch_size, 32, 32, 3])
    label = tf.placeholder('float', [batch_size])

with tf.name_scope('ConLayer_1'):
    we1 = Init.weight_init([5, 5, 3, 32], 0.05)
    b1 = Init.bias_init([32])
    conv1 = tf.nn.relu(Init.conv2d(image, we1)+b1)
    pool1 = Init.max_pool(conv1)
    LRn1 = LRnorm(pool1)

with tf.name_scope('ConLayer_2'):
    w2 = Init.weight_init([5, 5, 32, 32], 0.05)
    b2 = Init.bias_init([32])
    conv2 = tf.nn.relu(Init.conv2d(LRn1, w2)+b2)
    LRn2 = LRnorm(conv2)
    pool2 = Init.max_pool(LRn2)

with tf.name_scope('FullLayer_1'):
    reshape = tf.reshape(pool2, [batch_size, -1])
    n_input = reshape.get_shape()[1].value
    w3 = Init.l2_weight_init([n_input, 128], 0.05, w1=0.001)
    b3 = Init.bias_init([128])
    full_1 = tf.nn.relu(tf.matmul(reshape, w3)+b3)

with tf.name_scope("FullLayer_2"):
    w4 = Init.l2_weight_init([128, 64], 0.05, w1=0.003)
    b4 = Init.bias_init([64])
    full_2 = tf.nn.relu(tf.matmul(full_1, w4)+b4)

with tf.name_scope('Inference'):
    w5 = Init.weight_init([64, 10], 1/96.0)
    b5 = Init.bias_init([10])
    logits = tf.add(tf.matmul(full_2, w5), b5)
    y_ = tf.nn.softmax(logits)

with tf.name_scope('Loss'):
    label = tf.cast(label, tf.int64)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    tf.add_to_collection('losses', cross_entropy_mean)
    loss = tf.add_n(tf.get_collection('losses'), name='total_loss')

train_op = tf.train.AdamOptimizer(0.0001).minimize(loss)

top_k_op = tf.nn.in_top_k(logits, label, 1)

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

# tf.train.start_queue_runners(sess=sess)

Cross_loss = []
print("start")
train_image, train_label = input.get_train()

for i in range(5000):
    # batch_images, batch_labels = sess.run([train_image, train_label])           #用session讀取數據效率低,改成python讀取
    batch_images, batch_labels = input.get_batch(batch_size, train_image, train_label)
    _, cross_entropy = sess.run([train_op, loss], feed_dict={image: batch_images, label: batch_labels})
    Cross_loss.append(cross_entropy)
    if i % display == 0:
        print('epoch', i, 'loss:', cross_entropy)


test_image, test_label = input.get_test()
for i in range(10):
    test_batch_image, test_batch_label = input.get_batch(batch_size, test_image, test_label)
    ys = sess.run([top_k_op], feed_dict={image: test_batch_image, label: test_batch_label})
    print(np.sum(ys)/batch_size)

fig, ax = plt.subplots(figsize=(13, 6))
ax.plot(Cross_loss)
plt.grid()
plt.title('Train loss')
plt.show()

寫這個的主要目的是接觸一下mnist以外的數據集,畢竟mnist數據還是太弱了,上述程序能大概實現60%的預測精度

loss值的圖如下,超參數肯定不是最優,不過目的並不是調參,有興趣可以找找更好的超參數

隨機抽取10個test數據集中的數據統計準確率

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章