質量聲明:原創文章,內容質量問題請評論吐槽。如對您產生干擾,可私信刪除。
主要參考:python 讀取 MNIST 數據集並解析爲圖片文件
摘要: MXNet實踐: 以Fasion-MNIST爲例,下載並讀取數據集
圖像分類數據集中最常⽤的是⼿寫數字識別數據集MNIST ,但⼤部分模型在MNIST上的分類精度都超過了95%,選用圖像內容更加複雜的數據集可以更直觀地觀察算法之間的差異,Fashion-MNIST
克隆了MNIST的所有外在特徵,⼀共包括了10個類別,分別爲t-shirt(T恤)、 trouser(褲⼦)、 pullover(套衫)、dress(連⾐裙)、 coat(外套)、 sandal(涼鞋)、 shirt(襯衫)、 sneaker(運動鞋)、 bag(包)和ankle boot(短靴)
下載數據集
-
MXNet 自帶下載工具:
from mxnet.gluon import data as gdata root='/content/drive/My Drive/Colab/datasets/fashion_mnist' mnist_train = gdata.vision.FashionMNIST(root, train=True) mnist_test = gdata.vision.FashionMNIST(root, train=False)
下載後的壓縮文件:
- train-images-idx3-ubyte.gz 訓練集圖片 - 55000 張 訓練圖片, 5000 張 驗證圖片
- train-labels-idx1-ubyte.gz 訓練集標籤
- t10k-images-idx3-ubyte.gz 測試集圖片 - 10000 張 圖片
- t10k-labels-idx1-ubyte.gz 測試集標籤
解壓
gzip -d XXX
解壓後的文件夾:
- train-images-idx3-ubyte
- train-labels-idx1-ubyte
- t10k-images-idx3-ubyte
- t10k-labels-idx1-ubyte
解析爲圖片文件,按標籤存放
-
最終的文件結構
-
MNIST轉換爲圖片的python代碼
import struct
import numpy as np
import os
import cv2
def decode_idx3_ubyte(idx3_ubyte_file):
with open(idx3_ubyte_file, 'rb') as f:
print('解析文件:', idx3_ubyte_file)
fb_data = f.read()
offset = 0
fmt_header = '>iiii' # 以大端法讀取4個 unsinged int32
magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, fb_data, offset)
print('魔數:{},圖片數:{}'.format(magic_number, num_images))
offset += struct.calcsize(fmt_header)
fmt_image = '>' + str(num_rows * num_cols) + 'B'
images = np.empty((num_images, num_rows, num_cols))
for i in range(num_images):
im = struct.unpack_from(fmt_image, fb_data, offset)
images[i] = np.array(im).reshape((num_rows, num_cols))
offset += struct.calcsize(fmt_image)
return images
def decode_idx1_ubyte(idx1_ubyte_file):
with open(idx1_ubyte_file, 'rb') as f:
print('解析文件:', idx1_ubyte_file)
fb_data = f.read()
offset = 0
fmt_header = '>ii' # 以大端法讀取兩個 unsinged int32
magic_number, label_num = struct.unpack_from(fmt_header, fb_data, offset)
print('魔數:{},標籤數:{}'.format(magic_number, label_num))
offset += struct.calcsize(fmt_header)
labels = []
fmt_label = '>B' # 每次讀取一個 byte
for i in range(label_num):
labels.append(struct.unpack_from(fmt_label, fb_data, offset)[0])
offset += struct.calcsize(fmt_label)
return labels
def check_folder(folder):
if not os.path.exists(folder):
os.mkdir(folder)
print(folder)
else:
if not os.path.isdir(folder):
os.mkdir(folder)
def export_img(exp_dir, img_ubyte, lable_ubyte):
check_folder(exp_dir)
images = decode_idx3_ubyte(img_ubyte)
labels = decode_idx1_ubyte(lable_ubyte)
nums = len(labels)
for i in range(nums):
img_dir = os.path.join(exp_dir, str(labels[i]))
check_folder(img_dir)
img_file = os.path.join(img_dir, str(i)+'.png')
imarr = images[i]
cv2.imwrite(img_file, imarr)
def parser_mnist_data(data_dir):
train_dir = os.path.join(data_dir, 'train')
train_img_ubyte = os.path.join(data_dir, 'train-images-idx3-ubyte')
train_label_ubyte = os.path.join(data_dir, 'train-labels-idx1-ubyte')
export_img(train_dir, train_img_ubyte, train_label_ubyte)
test_dir = os.path.join(data_dir, 'test')
test_img_ubyte = os.path.join(data_dir, 't10k-images-idx3-ubyte')
test_label_ubyte = os.path.join(data_dir, 't10k-labels-idx1-ubyte')
export_img(test_dir, test_img_ubyte, test_label_ubyte)
if __name__ == '__main__':
data_dir = os.path.join("fashion-mnist/")
data_dir = os.path.expanduser(data_dir)
parser_mnist_data(data_dir)
- 此時得到的圖片文件夾是以標籤
0,1,...,9
命名的,可選擇進行如下轉換
import os
text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
# 訓練集
path = "fashion-mnist/train"
dirs = os.listdir(path)
for oldname in dirs:
newname = text_labels[int(oldname)]
os.rename(oldname, newname)
# 測試集
path = "fashion-mnist/test"
dirs = os.listdir(path)
for oldname in dirs:
newname = text_labels[int(oldname)]
os.rename(oldname, newname)
讀入數據集
dataset_dir = "~/.mxnet/datasets/fashion-mnist"
train_imgs = gdata.vision.ImageFolderDataset(os.path.join(dataset_dir, 'train'))
test_imgs = gdata.vision.ImageFolderDataset(os.path.join(dataset_dir, 'test'))
此時train_imgs
和test_imgs
類型均爲datasets.ImageFolderDataset
, 標籤已被轉化爲數值型. 通過synsets
屬性可查看標籤對應的類別名,如:
trainset.synsets = [‘ankle boot’, ‘bag’, ‘coat’, ‘dress’, ‘pullover’, ‘sandal’, ‘shirt’, ‘sneaker’, ‘t-shirt’, ‘trouser’]
數據增廣
transformer = gdata.vision.transforms.Compose([
gdata.vision.transforms.RandomFlipLeftRight(),
gdata.vision.transforms.ToTensor(),
gdata.vision.transforms.Normalize(),])
trainset = trainset.transform_first(transformer)
testset = testset.transform_first(transformer)
構建小批量數據數據生成器
batch_size = 256
num_workers = 0 if sys.platform.startswith('win32') else 4
train_iter = gdata.DataLoader(trainset, batch_size, shuffle=True, num_workers=num_workers)
test_iter = gdata.DataLoader(testset, batch_size, shuffle=True, num_workers=num_workers)