cifar-10數據集來自於kaggle平臺上下載下來的(一般新推出的模型論文都會使用cifar數據集,但是使用的是cifar-100,是針對100類不同對象的分類),我們通過在本地建立模型來實現該10分類問題,並將test數據集上的識別結果生成csv文件上傳到kaggle上進行驗證,可以得到識別test數據的分數。
這裏代碼使用的是基本的卷積神經網絡模型(conv + bn + max_pooling)
import matplotlib as mpl #畫圖用的庫
import matplotlib.pyplot as plt
#下面這一句是爲了可以在notebook中畫圖
%matplotlib inline
import numpy as np
import sklearn #機器學習算法庫
import pandas as pd #處理數據的庫
import os
import sys
import time
import tensorflow as tf
from tensorflow import keras #使用tensorflow中的keras
#import keras #單純的使用keras
print(tf.__version__)
print(sys.version_info)
for module in mpl, np, sklearn, pd, tf, keras:
print(module.__name__, module.__version__)
2.0.0
sys.version_info(major=3, minor=6, micro=9, releaselevel='final', serial=0)
matplotlib 3.1.2
numpy 1.18.0
sklearn 0.21.3
pandas 0.25.3
tensorflow 2.0.0
tensorflow_core.keras 2.2.4-tf
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
tf.config.experimental.set_memory_growth(physical_devices[0], True)
class_names = [
'airplane',
'automobile',
'bird',
'cat',
'deer',
'dog',
'frog',
'horse',
'ship',
'truck',
]
train_dir = "/home/galaxy/DeepLearning/DATASETS/cifar-10/train"
train_labels_file = "/home/galaxy/DeepLearning/DATASETS/cifar-10/trainLabels.csv"
test_dir = "/home/galaxy/DeepLearning/DATASETS/cifar-10/test"
test_csv_file = '/home/galaxy/DeepLearning/DATASETS/cifar-10/sampleSubmission.csv'
print(os.path.exists(train_dir))
print(os.path.exists(test_dir))
print(os.path.exists(train_labels_file))
#讀取csv文件查看其內容
#labels = pd.read_csv(train_labels_file, header=0)
#print(labels)
#因爲train文件夾下直接存放的是所有的文件,其每個圖片的編號對應的label都在csv文件裏面一一對應,所以我們需要將圖片文件與label一一對應
def parse_csv_file(filepath, folder):
results = []
with open(filepath, 'r') as f:
lines = f.readlines()[1:]#跳過 第一行的 id,label
for line in lines:
image_id, label_str = line.strip('\n').split(',')#strip表示去掉換行符,以 ','作爲分隔符
image_full_path = os.path.join(folder, image_id + '.png')
results.append((image_full_path, label_str))
return results
import pprint
train_labels_info = parse_csv_file(train_labels_file, train_dir)
test_labels_info = parse_csv_file(test_csv_file, test_dir)
pprint.pprint(train_labels_info[0:5])
pprint.pprint(test_labels_info[0:5])
print(len(train_labels_info), len(test_labels_info))
True
True
True
[('/home/galaxy/DeepLearning/DATASETS/cifar-10/train/1.png', 'frog'),
('/home/galaxy/DeepLearning/DATASETS/cifar-10/train/2.png', 'truck'),
('/home/galaxy/DeepLearning/DATASETS/cifar-10/train/3.png', 'truck'),
('/home/galaxy/DeepLearning/DATASETS/cifar-10/train/4.png', 'deer'),
('/home/galaxy/DeepLearning/DATASETS/cifar-10/train/5.png', 'automobile')]
[('/home/galaxy/DeepLearning/DATASETS/cifar-10/test/1.png', 'cat'),
('/home/galaxy/DeepLearning/DATASETS/cifar-10/test/2.png', 'cat'),
('/home/galaxy/DeepLearning/DATASETS/cifar-10/test/3.png', 'cat'),
('/home/galaxy/DeepLearning/DATASETS/cifar-10/test/4.png', 'cat'),
('/home/galaxy/DeepLearning/DATASETS/cifar-10/test/5.png', 'cat')]
50000 300000
#train_df = pd.DataFrame(train_labels_info)#DataFrame 爲 表格型數據結構
#這裏將train_df切分爲 訓練集和驗證集 兩部分
train_df = pd.DataFrame(train_labels_info[0:45000])
valid_df = pd.DataFrame(train_labels_info[45000:])
test_df = pd.DataFrame(test_labels_info)
#設置 DataFrame的列名
train_df.columns = ['filepath', 'class']
valid_df.columns = ['filepath', 'class']
test_df.columns = ['filepath', 'class']
print(train_df.head())
print(valid_df.head())
print(test_df.head())
filepath class
0 /home/galaxy/DeepLearning/DATASETS/cifar-10/tr... frog
1 /home/galaxy/DeepLearning/DATASETS/cifar-10/tr... truck
2 /home/galaxy/DeepLearning/DATASETS/cifar-10/tr... truck
3 /home/galaxy/DeepLearning/DATASETS/cifar-10/tr... deer
4 /home/galaxy/DeepLearning/DATASETS/cifar-10/tr... automobile
filepath class
0 /home/galaxy/DeepLearning/DATASETS/cifar-10/tr... horse
1 /home/galaxy/DeepLearning/DATASETS/cifar-10/tr... automobile
2 /home/galaxy/DeepLearning/DATASETS/cifar-10/tr... deer
3 /home/galaxy/DeepLearning/DATASETS/cifar-10/tr... automobile
4 /home/galaxy/DeepLearning/DATASETS/cifar-10/tr... airplane
filepath class
0 /home/galaxy/DeepLearning/DATASETS/cifar-10/te... cat
1 /home/galaxy/DeepLearning/DATASETS/cifar-10/te... cat
2 /home/galaxy/DeepLearning/DATASETS/cifar-10/te... cat
3 /home/galaxy/DeepLearning/DATASETS/cifar-10/te... cat
4 /home/galaxy/DeepLearning/DATASETS/cifar-10/te... cat
##################################
#因爲這裏的數據集不是按照文件夾來分類的,而是所有的圖片數據都是在一個文件夾中,我們使用DataFrame來表示每一個數據文件對應的label,
#所以這裏使用的是 flow_from_dataframe
##################################
#resnet50使用的圖像寬高均爲224
#height = 224
#width = 224
height = 32 #設置圖像被縮放的寬高
width = 32
channels = 3 #圖像通道數
batch_size = 32
num_classes = 10
##########------------訓練集數據------------##########
#初始化一個訓練數據相關的generator
#具體用於 數據集中的圖片數據進行處理,可以對圖片數據進行歸一化、旋轉、翻轉等數據增強類操作
train_datagen = keras.preprocessing.image.ImageDataGenerator(
#preprocessing_function = keras.applications.resnet50.preprocess_input,#resnet50專門用來預處理圖像的函數,把圖像做歸一化到-1~1之間
# 使用第一行preprocessing_function 就不需要 rescale
rescale = 1./255, #放縮因子, 除以255是因爲圖片中每個像素點值範圍都在0~255之間
rotation_range = 40, #圖片隨機轉動的角度範圍(-40 ~ 40)
width_shift_range = 0.2, #值 < 1時,表示偏移的比例,即在 0~值 這個比例幅度之間進行偏移
height_shift_range= 0.2, #值 > 1時,表示像素寬度,即該圖片的偏移幅度大小
shear_range = 0.2, #剪切強度
zoom_range = 0.2, #縮放強度
horizontal_flip = True,#水平隨機翻轉
fill_mode = 'nearest',#像素填充模式
)
#接下來讀取目錄下的圖片然後按照上面的數據增強相關操作對圖片進行處理
train_generator = train_datagen.flow_from_dataframe(train_df,
directory=train_dir,
x_col='filepath',
y_col='class',
classes=class_names,
target_size=(height,width),
batch_size=batch_size,
seed=7,
shuffle=True,
class_mode='sparse')
'''
train_generator = train_datagen.flow_from_directory(train_dir,
target_size = (height,width), #目錄下的圖片會被resize的大小
batch_size = batch_size,
seed = 7,#隨機種子,用於洗牌和轉換,隨便給個數即可
shuffle = True,#False->則按字母數字順序對數據進行排序 True->打亂數據
class_mode = "categorical", # 該參數決定了返回的標籤數組的形式
#classes = 這個參數就是描述的 文件夾名與輸出標籤的對應關係
)
'''
#classes:可選參數,爲子文件夾的列表,如['dogs','cats']默認爲None. 若未提供,則該類別列表將從directory下的子文件夾名稱/結構自動推斷。
#每一個子文件夾都會被認爲是一個新的類。(類別的順序將按照字母表順序映射到標籤值)。通過屬性class_indices可獲得文件夾名與類的序號的對應字典。
#使用生成器的.class_indices方法即可獲取模型默認的Labels序列,文件夾名與類的序號的對應字典
print(train_generator.class_indices)
##########------------驗證集數據------------##########
#初始化一個驗證數據相關的generator
#驗證數據集上不需要進行數據增強的相關操作,僅保留縮放即可,不然的話訓練集與驗證集的值的分佈會不同
valid_datagen = keras.preprocessing.image.ImageDataGenerator(
#preprocessing_function = keras.applications.resnet50.preprocess_input,#resnet50專門用來預處理圖像的函數,相當於歸一化,所以無需rescale
rescale = 1./255, #放縮因子, 除以255是因爲圖片中每個像素點值範圍都在0~255之間
)
#接下來讀取目錄下的圖片然後按照上面的數據增強相關操作對圖片進行處理
valid_generator = valid_datagen.flow_from_dataframe(valid_df,
directory=train_dir,
x_col='filepath',
y_col='class',
classes=class_names,
target_size=(height,width),
batch_size=batch_size,
seed=7,
shuffle=True,
class_mode='sparse')
'''
valid_generator = valid_datagen.flow_from_directory(valid_dir,
target_size = (height,width), #目錄下的圖片會被resize的大小
batch_size = batch_size,
seed = 7,#隨機種子,用於洗牌和轉換,隨便給個數即可
shuffle = False,#不需要訓練所以不需要打亂數據
class_mode = "categorical", # 該參數決定了返回的標籤數組的形式
)
'''
#使用生成器的.class_indices方法即可獲取模型默認的Labels序列,文件夾名與類的序號的對應字典
print(valid_generator.class_indices)
train_num = train_generator.samples
valid_num = valid_generator.samples
print(train_num, valid_num)
Found 45000 validated image filenames belonging to 10 classes.
{'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}
Found 5000 validated image filenames belonging to 10 classes.
{'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}
45000 5000
for i in range(1):
x,y = train_generator.next()
print(x.shape, y.shape)
print(y)
#因爲 class_mode 設置爲sparse,所以label標籤返回的是2D的one-hot編碼標籤(2-> one_hot -> [0, 0, 1])
(32, 32, 32, 3) (32,)
[2. 1. 4. 4. 4. 4. 6. 5. 2. 8. 4. 6. 6. 3. 7. 1. 7. 2. 8. 8. 3. 0. 5. 3.
9. 1. 4. 5. 6. 7. 9. 2.]
#使用resnet50做遷移學習
'''
#1.這裏ResNet50層當做一層,只有最後一層是可以被訓練的
resnet50_fine_tune = keras.models.Sequential([
keras.applications.ResNet50(include_top=False,#include_top:是否保留頂層的全連接網絡,這裏最後要定義自己的softmax選False
pooling='avg',#‘avg’代表全局平均池化,‘max’代表全局最大值池化
weights='imagenet',#None代表隨機初始化,即不加載預訓練權重;'imagenet’代表加載預訓練權重
),
keras.layers.Dense(num_classes, activation='softmax'),
])
resnet50_fine_tune.layers[0].trainable=False #設置ResNet50這一層的參數不可訓練,因爲 weights='imagenet'
#2.這裏ResNet50中最後幾層都是可以訓練,我們可以在模型架構裏面看到 Trainable params可訓練參數會大大增加
resnet50 = keras.applications.ResNet50(include_top=False, pooling='avg', weights='imagenet')
for layers in resnet50.layers[0:-5]: #這裏遍歷最後五層之前的layers並設置其權重相關參數不可遍歷
layers.trainable = False
resnet50_fine_tune = keras.models.Sequential([
resnet50,
keras.layers.Dense(num_classes, activation='softmax'),
])
'''
model = keras.models.Sequential([
keras.layers.Conv2D(filters=128, kernel_size=3, padding="same", activation="relu",input_shape=(width, height, channels)),
keras.layers.BatchNormalization(),
keras.layers.Conv2D(filters=128, kernel_size=3, padding="same", activation="relu"),
keras.layers.BatchNormalization(),
keras.layers.MaxPool2D(pool_size=2),
keras.layers.Conv2D(filters=256, kernel_size=3, padding="same", activation="relu"),
keras.layers.BatchNormalization(),
keras.layers.Conv2D(filters=256, kernel_size=3, padding="same", activation="relu"),
keras.layers.BatchNormalization(),
keras.layers.MaxPool2D(pool_size=2),
keras.layers.Conv2D(filters=512, kernel_size=3, padding="same", activation="relu"),
keras.layers.BatchNormalization(),
keras.layers.Conv2D(filters=512, kernel_size=3, padding="same", activation="relu"),
keras.layers.BatchNormalization(),
keras.layers.MaxPool2D(pool_size=2),
keras.layers.Flatten(),
keras.layers.Dense(512,activation="relu"),
keras.layers.Dense(num_classes, activation="softmax"),
])
#損失函數 sparse_categorical_crossentropy 和 categorical_crossentropy 的選擇取決於前面設定的y值的取值類型
#如果y取值爲 2D的 one-hot編碼,則選擇 categorical_crossentropy
#如果y取值爲 1D的 整數標籤,則選擇 sparse_categorical_crossentropy
#前面的 tensorflow2------分類問題fashion_mnist 文章中有過相關描述
# metrics 表示選擇 accuracy作爲評價參數
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam",metrics=["accuracy"])
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 32, 32, 128) 3584
_________________________________________________________________
batch_normalization (BatchNo (None, 32, 32, 128) 512
_________________________________________________________________
conv2d_1 (Conv2D) (None, 32, 32, 128) 147584
_________________________________________________________________
batch_normalization_1 (Batch (None, 32, 32, 128) 512
。。。
max_pooling2d_2 (MaxPooling2 (None, 4, 4, 512) 0
_________________________________________________________________
flatten (Flatten) (None, 8192) 0
_________________________________________________________________
dense (Dense) (None, 512) 4194816
_________________________________________________________________
dense_1 (Dense) (None, 10) 5130
=================================================================
Total params: 8,783,498
Trainable params: 8,779,914
Non-trainable params: 3,584
import shutil
callback_dir = "./callback_cifar-10"
if os.path.exists(callback_dir):
shutil.rmtree(callback_dir)
os.mkdir(callback_dir)
output_model_file=os.path.join(callback_dir,"cifar10_model.h5")#在logdir中創建一個模型文件.h5
callbacks = [
keras.callbacks.TensorBoard(callback_dir),
keras.callbacks.ModelCheckpoint(output_model_file, save_best_only=True),
keras.callbacks.EarlyStopping(patience=5,min_delta=1e-3),
]
epochs = 20#使用fine_tune 不需要太多次迭代就能夠達到一個較好的效果
#使用fit_generator是因爲使用的是 ImageDataGenerator 獲取數據集數據的
history = model.fit_generator(train_generator,#steps_per_epoch: 一個epoch包含的步數(每一步是一個batch的數據送入)
steps_per_epoch = train_num // batch_size,
epochs = epochs,
validation_data = valid_generator,
validation_steps= valid_num // batch_size,
callbacks = callbacks,
)
'''
history = resnet50_fine_tune.fit_generator(train_generator,#steps_per_epoch: 一個epoch包含的步數(每一步是一個batch的數據送入)
steps_per_epoch = train_num // batch_size,
epochs = epochs,
validation_data = valid_generator,
validation_steps= valid_num // batch_size,
callbacks = callbacks,
)
'''
#運行打印看到val_accuracy的值並沒有逐漸變大而是一直保持不變,是因爲激活函數使用的是selu導致,可嘗試更換激活函數爲relu
Epoch 1/20
1406/1406 [==============================] - 110s 79ms/step - loss: 2.2732 - accuracy: 0.2828 - val_loss: 1.8249 - val_accuracy: 0.3488
Epoch 2/20
1406/1406 [==============================] - 115s 82ms/step - loss: 1.5933 - accuracy: 0.4090 - val_loss: 1.6994 - val_accuracy: 0.4183
Epoch 3/20
1406/1406 [==============================] - 118s 84ms/step - loss: 1.3849 - accuracy: 0.4980 - val_loss: 1.6454 - val_accuracy: 0.4351
Epoch 4/20
1406/1406 [==============================] - 118s 84ms/step - loss: 1.2231 - accuracy: 0.5648 - val_loss: 1.1681 - val_accuracy: 0.6034
。。。
1406/1406 [==============================] - 107s 76ms/step - loss: 0.5279 - accuracy: 0.8201 - val_loss: 0.6024 - val_accuracy: 0.8271
Epoch 19/20
1406/1406 [==============================] - 107s 76ms/step - loss: 0.5139 - accuracy: 0.8255 - val_loss: 0.5964 - val_accuracy: 0.8165
Epoch 20/20
1406/1406 [==============================] - 107s 76ms/step - loss: 0.4912 - accuracy: 0.8327 - val_loss: 0.5760 - val_accuracy: 0.8251
test_datagen = keras.preprocessing.image.ImageDataGenerator(
#preprocessing_function = keras.applications.resnet50.preprocess_input,#resnet50專門用來預處理圖像的函數,相當於歸一化,所以無需rescale
rescale = 1./255, #放縮因子, 除以255是因爲圖片中每個像素點值範圍都在0~255之間
)
#接下來讀取目錄下的圖片然後按照上面的數據增強相關操作對圖片進行處理
test_generator = test_datagen.flow_from_dataframe(test_df,
directory=test_dir,
x_col='filepath',
y_col='class',
classes=class_names,
target_size=(height,width),
batch_size=batch_size,
seed=7,
shuffle=False,
class_mode='sparse')
test_num = test_generator.samples
print(test_num)
Found 300000 validated image filenames belonging to 10 classes.
300000
test_predict = model.predict_generator(test_generator,
workers=10, #並行度
use_multiprocessing=True, #True表示使用多進程, False表示使用10個線程
)
# print(test_predict)
print(test_predict.shape)# test_predict是一個300000 * 10 的矩陣
print(test_predict[0:5])#打印這個矩陣前5行概率,每一行都是一個樣本的輸出,即所對應的十類數據的概率分佈
(300000, 10)
[[9.54611506e-03 1.80074260e-01 2.62391381e-03 5.42382039e-02
1.09696174e-02 3.63456691e-03 7.73755368e-03 8.62432702e-04
4.05505439e-03 7.26258218e-01]
[9.30483162e-01 4.34813369e-03 5.24184527e-03 3.60551313e-03
9.16590355e-03 2.50156707e-04 1.25574612e-03 2.26421675e-04
3.19052041e-02 1.35178575e-02]
[7.67244501e-07 9.99046147e-01 2.93465563e-09 1.32095090e-06
1.09673610e-10 1.53255339e-10 2.51321115e-08 2.06194639e-09
5.29228110e-08 9.51812486e-04]
[1.32097994e-04 5.31877231e-06 2.55758619e-06 3.46006732e-06
1.49765199e-07 8.87896690e-08 6.87345619e-07 2.77201764e-07
9.99815166e-01 4.02240330e-05]
[9.64022636e-01 3.00924119e-04 4.65835538e-03 1.33373905e-02
1.23353524e-03 5.36854519e-03 2.72046030e-03 9.04678775e-04
6.65733730e-03 7.96015956e-04]]
#最大概率的索引來獲得它對應的類別預測
#axis參數:對於二維向量而言,0代表對行進行最大值選取,此時對每一列進行操作;1代表對列進行最大值選取,此時對每一行進行操作
test_predict_class_index = np.argmax(test_predict, axis=1)
print(test_predict_class_index[0:5])
[9 0 1 8 0]
test_predict_class = [class_names[t] for t in test_predict_class_index]
print(test_predict_class[0:5])
#print('This is a ', test_predict_class[0])
['truck', 'airplane', 'automobile', 'ship', 'airplane']
#生成 csv文件上傳到 Kaggle中
def generate_submissions(filename, predict_class):
with open(filename, 'w') as f:
f.write('id,label\n')
for i in range(len(predict_class)):
f.write('%d,%s\n' % (i+1, predict_class[i]))
output_file = '/home/galaxy/DeepLearning/DATASETS/cifar-10/submission.csv'
generate_submissions(output_file,test_predict_class)