Tensorflow學習筆記(二)

Tensorflow下載MNIST數據集及實戰操作

1.MNIST數據集數據集下載

相信有很多小夥伴直接調用 datasets.mnist.load_data()無法下載數據集。不借助外網是很難下載下來的。小白在這裏直接給大家提供下載網站:

http://yann.lecun.com/exdb/mnist/

進去之後直接下載下面四個壓縮包就好:
在這裏插入圖片描述
解壓到本地之後是四個idx文件,直接上代碼解析就好:

# encoding: utf-8
"""
@author: 數據小白
關於IDX文件格式的解析規則:
========================
THE IDX FILE FORMAT
the IDX file format is a simple format for vectors and multidimensional matrices of various numerical types.
The basic format is
magic number
size in dimension 0
size in dimension 1
size in dimension 2
.....
size in dimension N
data
The magic number is an integer (MSB first). The first 2 bytes are always 0.
The third byte codes the type of the data:
0x08: unsigned byte
0x09: signed byte
0x0B: short (2 bytes)
0x0C: int (4 bytes)
0x0D: float (4 bytes)
0x0E: double (8 bytes)
The 4-th byte codes the number of dimensions of the vector/matrix: 1 for vectors, 2 for matrices....
The sizes in each dimension are 4-byte integers (MSB first, high endian, like in most non-Intel processors).
The data is stored like in a C array, i.e. the index in the last dimension changes the fastest.
"""

import numpy as np
import struct
import matplotlib.pyplot as plt

# 訓練集文件
train_images_idx3_ubyte_file = '/Users/xiaobai/Desktop/train-images-idx3-ubyte'
# 訓練集標籤文件
train_labels_idx1_ubyte_file = '/Users/xiaobai/Desktop/train-labels-idx1-ubyte'

# 測試集文件
test_images_idx3_ubyte_file = '/Users/xiaobai/Desktop/t10k-images-idx3-ubyte'
# 測試集標籤文件
test_labels_idx1_ubyte_file = '/Users/xiaobai/Desktop/t10k-labels-idx1-ubyte'


def decode_idx3_ubyte(idx3_ubyte_file):
    """
    解析idx3文件的通用函數
    :param idx3_ubyte_file: idx3文件路徑
    :return: 數據集
    """
    # 讀取二進制數據
    bin_data = open(idx3_ubyte_file, 'rb').read()

    # 解析文件頭信息,依次爲魔數、圖片數量、每張圖片高、每張圖片寬
    offset = 0
    fmt_header = '>iiii'
    magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, offset)
    print
    '魔數:%d, 圖片數量: %d張, 圖片大小: %d*%d' % (magic_number, num_images, num_rows, num_cols)

    # 解析數據集
    image_size = num_rows * num_cols
    offset += struct.calcsize(fmt_header)
    fmt_image = '>' + str(image_size) + 'B'
    images = np.empty((num_images, num_rows, num_cols))
    for i in range(num_images):
        if (i + 1) % 10000 == 0:
            print
            '已解析 %d' % (i + 1) + '張'
        images[i] = np.array(struct.unpack_from(fmt_image, bin_data, offset)).reshape((num_rows, num_cols))
        offset += struct.calcsize(fmt_image)
    return images


def decode_idx1_ubyte(idx1_ubyte_file):
    """
    解析idx1文件的通用函數
    :param idx1_ubyte_file: idx1文件路徑
    :return: 數據集
    """
    # 讀取二進制數據
    bin_data = open(idx1_ubyte_file, 'rb').read()

    # 解析文件頭信息,依次爲魔數和標籤數
    offset = 0
    fmt_header = '>ii'
    magic_number, num_images = struct.unpack_from(fmt_header, bin_data, offset)
    print
    '魔數:%d, 圖片數量: %d張' % (magic_number, num_images)

    # 解析數據集
    offset += struct.calcsize(fmt_header)
    fmt_image = '>B'
    labels = np.empty(num_images)
    for i in range(num_images):
        if (i + 1) % 10000 == 0:
            print
            '已解析 %d' % (i + 1) + '張'
        labels[i] = struct.unpack_from(fmt_image, bin_data, offset)[0]
        offset += struct.calcsize(fmt_image)
    return labels


def load_train_images(idx_ubyte_file=train_images_idx3_ubyte_file):
    """
    TRAINING SET IMAGE FILE (train-images-idx3-ubyte):
    [offset] [type]          [value]          [description]
    0000     32 bit integer  0x00000803(2051) magic number
    0004     32 bit integer  60000            number of images
    0008     32 bit integer  28               number of rows
    0012     32 bit integer  28               number of columns
    0016     unsigned byte   ??               pixel
    0017     unsigned byte   ??               pixel
    ........
    xxxx     unsigned byte   ??               pixel
    Pixels are organized row-wise. Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black).
    :param idx_ubyte_file: idx文件路徑
    :return: n*row*col維np.array對象,n爲圖片數量
    """
    return decode_idx3_ubyte(idx_ubyte_file)


def load_train_labels(idx_ubyte_file=train_labels_idx1_ubyte_file):
    """
    TRAINING SET LABEL FILE (train-labels-idx1-ubyte):
    [offset] [type]          [value]          [description]
    0000     32 bit integer  0x00000801(2049) magic number (MSB first)
    0004     32 bit integer  60000            number of items
    0008     unsigned byte   ??               label
    0009     unsigned byte   ??               label
    ........
    xxxx     unsigned byte   ??               label
    The labels values are 0 to 9.
    :param idx_ubyte_file: idx文件路徑
    :return: n*1維np.array對象,n爲圖片數量
    """
    return decode_idx1_ubyte(idx_ubyte_file)


def load_test_images(idx_ubyte_file=test_images_idx3_ubyte_file):
    """
    TEST SET IMAGE FILE (t10k-images-idx3-ubyte):
    [offset] [type]          [value]          [description]
    0000     32 bit integer  0x00000803(2051) magic number
    0004     32 bit integer  10000            number of images
    0008     32 bit integer  28               number of rows
    0012     32 bit integer  28               number of columns
    0016     unsigned byte   ??               pixel
    0017     unsigned byte   ??               pixel
    ........
    xxxx     unsigned byte   ??               pixel
    Pixels are organized row-wise. Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black).
    :param idx_ubyte_file: idx文件路徑
    :return: n*row*col維np.array對象,n爲圖片數量
    """
    return decode_idx3_ubyte(idx_ubyte_file)


def load_test_labels(idx_ubyte_file=test_labels_idx1_ubyte_file):
    """
    TEST SET LABEL FILE (t10k-labels-idx1-ubyte):
    [offset] [type]          [value]          [description]
    0000     32 bit integer  0x00000801(2049) magic number (MSB first)
    0004     32 bit integer  10000            number of items
    0008     unsigned byte   ??               label
    0009     unsigned byte   ??               label
    ........
    xxxx     unsigned byte   ??               label
    The labels values are 0 to 9.
    :param idx_ubyte_file: idx文件路徑
    :return: n*1維np.array對象,n爲圖片數量
    """
    return decode_idx1_ubyte(idx_ubyte_file)


def run():
    x = load_train_images()
    y = load_train_labels()
    x_test = load_test_images()
    y_test = load_test_labels()

    # 查看前十個數據及其標籤以讀取是否正確
    for i in range(10):
        print
        x_test[i]
        plt.imshow(x[i], cmap='gray')
        plt.show()
    print
    'done'


if __name__ == '__main__':
    run()

之後就是我們的實戰操作了,爲了避免了大數值的計算,節約內存。我們可以先對數據進行預處理:

# 對數據進行預處理
def preprocess(x, y):
    x = tf.cast(x, dtype=tf.float32) / 255.#就是把0~255的像素值放縮到[0 , 1]之間,這樣做的好處是避免了大數值的計算,節約內存。
    #若想縮放到[-1, 1],之間的話可以這樣做:
    #x = 2 * tf.cast(x, dtype=tf.float32) / 255. -1
    y = tf.cast(y, dtype=tf.int32)
    return x, y

然後構建dataset對象,方便對數據的打亂,批處理等一些操作:

train_db = tf.data.Dataset.from_tensor_slices((x, y)).shuffle(1000).batch(128)
train_db = train_db.map(preprocess)
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(128)
test_db = test_db.map(preprocess)

在進行網絡搭建之前,先讓我們構建模型中會用到的權重

w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))

由於線性結構的範圍太小,很多的關係都不滿足線性關係。所以我們選擇非線性網絡搭建,三層的全連接層的構建已經讓我們神經網絡的非線性擬合能力比較強了

# 三層非線性模型搭建
h1 = x @ w1 + tf.broadcast_to(b1, [x.shape[0], 256])
h1 = tf.nn.relu(h1)
h2 = h1 @ w2 + b2
h2 = tf.nn.relu(h2)
out = h2 @ w3 + b3

記得每次計算梯度之後要進行權重的更新哦。權重更新的公式如下:

w = w - lr * w_grad

爲了讓我們計算預測值和真實值之間的差異大小,可以去求解loss。
最後小白的輸出結果如下:在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章