之前做過一些簡單的深度學習項目,在我看來主要是一些調包工程師的工作,應用現有的模型對一些項目進行訓練。初入kaggle,打算以最簡單的項目爲切入點,提升自己的姿勢水平。
環境:autokeras 0.4.0;
torch 1.3.1;
cuda10.0;
cudnn 7.5.1;
gpu rtx2070
本文記錄了這一項目進行的主要邏輯流程,主要步驟如下:
1)數據預處理,比較簡單,reshape並保存爲numpy的格式存儲
2)使用autokeras進行模型的初篩,通過短時間的預訓練搜索出較爲合適的模型
3)用pytorch加載現有的預訓練模型,進行進一步的訓練
4)對測試數據進行預測生成csv文件,上傳
代碼上傳至:
github/dogs_vs_cats
一、數據預訓練
訓練數據:將數據中的圖片和標籤信息儲存爲.npy格式,避免每次加載圖像佔用大量時間;數據名字中包含了標籤信息,cat記爲0,dog記爲1。
測試數據:將數據按照標籤進行排序(1-12500),按照標籤順序進行圖片的加載,存儲爲npy格式。
import os
import cv2
import numpy as np
image_size = (64,64)
#your path of the datasets
data_dir = '/mnt/HDD/Datasets/kaggle/dogs-vs-cats/'
train_dir = data_dir + "train/"
test_dir = data_dir + 'test1/'
test_list = os.listdir(test_dir)
train_list = os.listdir(train_dir)
train_whole_images = []
train_whole_labels = []
test_whole_images = []
test_whole_ids = []
# sort the test list by the ids
for ind,name in enumerate(test_list):
test_whole_ids.append(int(name.split('.')[0]))
test_whole_ids = sorted(test_whole_ids)
test_list = [str(id_int)+'.jpg' for id_int in test_whole_ids]
for ind,name in enumerate(test_list):
test_whole_images.append(cv2.resize(cv2.imread(test_dir+name),image_size))
for ind,name in enumerate(train_list):
print(ind)
train_whole_images.append(cv2.resize(cv2.imread(train_dir+name),image_size))
if 'cat' in name:
train_whole_labels.append(0)
else:
train_whole_labels.append(1)
# restore the datasets in numpy file
np.save('test_whole_images_'+str(image_size[0])+'.npy',np.asarray(test_whole_images))
np.save('train_whole_images_'+str(image_size[0])+'.npy',np.asarray(train_whole_images))
np.save('train_whole_labels_'+str(image_size[0])+'.npy',np.asarray(train_whole_labels))
# reload the dataset stored in numpy file
#test_whole_images = np.load('test_whole_images_'+str(image_size[0])+'.npy')
#test_whole_labels = np.load('test_whole_labels_'+str(image_size[0])+'.npy')
二、Autokeras模型初篩
目前autokeras官網有兩個版本,0.4.0和1.0。根據實際應用,發現1.0版本的文檔很少,在模型導出和進一步用其他框架(TF、pytorch)等的加載、訓練中無法實現,因此選擇了040版本。
040版本中的各種網絡搜索的參數,比如backend選擇、最大搜索次數、模型最大規模等可以在Autokeras安裝路徑下的constant.py文件中進行更改,之後加載已經生成的訓練npy文件,進行模型篩選。
import os
import os
import cv2
import numpy as np
image_size = (64,64)
#load numpy data file
train_images_npy = 'train_whole_images_'+str(image_size[0])+'.npy'
train_labels_npy = 'train_whole_labels_'+str(image_size[0])+'.npy'
train_whole_images = np.load(train_images_npy)
train_whole_labels = np.load(train_labels_npy)
import autokeras as ak
import torch
import torchvision
classifier = ak.ImageClassifier(verbose = True, path = 'autokeras_temp_train')
classifier.fit(x=np.asarray(train_whole_images, dtype = np.uint8), y=np.asarray(train_whole_labels,dtype =np.float16),time_limit = 60*50*1)
# the file name to store the model both structure and parameters
MODEL_DIR = 'model.h5'
# different methods to save the model, some of which may differ from different vision.
# 1)
#classifier.export_keras_model(MODEL_DIR)
# 2)
#from autokeras.utils import pickle_to_file,pickle_from_file
#pickle_to_file(classifier,MODEL_DIR)
# 3)
torch.save(classifier.cnn.best_model.produce_model(),MODEL_DIR)
model = torch.load(MODEL_DIR)
model.eval()
with torch.no_grad():
right = 0
cat = 0
dog = 0
# the number of samples chosen to test the model
test_num = 100
rand_ind = np.random.randint(0,len(train_whole_images)-1,test_num)
predict = model(torch.Tensor(train_whole_images[rand_ind])).cpu()
label = train_whole_labels[rand_ind]
print(torch.argmax(predict,dim=1))
print('label:', label)
print(torch.argmax(predict, dim=1).numpy()==label)
dog = np.sum(label)
cat = 100 - np.sum(label)
right = np.sum(torch.argmax(predict, dim=1).numpy()==label)
print('accuracy:', float(right)/100)
print('cat:', cat)
print('dog:', dog)
篩選得到的模型如下:
(origin_model): TorchModel(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): ReLU()
(4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(6): ReLU()
(7): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): ReLU()
(9): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
(10): TorchAdd()
(11): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(12): ReLU()
(13): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(15): ReLU()
(16): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(17): ReLU()
(18): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
(19): TorchAdd()
(20): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(21): ReLU()
(22): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(23): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(24): ReLU()
(25): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(26): ReLU()
(27): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2))
(28): TorchAdd()
(29): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(30): ReLU()
(31): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(32): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(33): ReLU()
(34): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(35): ReLU()
(36): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
(37): TorchAdd()
(38): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(39): ReLU()
(40): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(41): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(42): ReLU()
(43): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(44): ReLU()
(45): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2))
(46): TorchAdd()
(47): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(48): ReLU()
(49): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(50): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(51): ReLU()
(52): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(53): ReLU()
(54): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
(55): TorchAdd()
(56): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(57): ReLU()
(58): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(59): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(60): ReLU()
(61): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(62): ReLU()
(63): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2))
(64): TorchAdd()
(65): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(66): ReLU()
(67): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(68): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(69): ReLU()
(70): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(71): ReLU()
(72): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1))
(73): TorchAdd()
(74): GlobalAvgPool2d()
(75): Linear(in_features=512, out_features=2, bias=True)
)
三、pytorch進一步訓練
之前沒有接觸過pytorch,一直用的是tensorflow,之所以選擇是因爲autokeras生成的h5文件在TF爲backend的keras中無法加載,在pytorch中可以正常加載。
在這之中有一個問題,autokeras生成模型的最終輸出結果爲一個長度爲2的向量,但向量並未經過softmax,因此通過構造pytorch的model,通過加載模型後增加一個softmax層實現輸出的分類。
class TestModel(nn.Module):
def __init__(self):
super(TestModel,self).__init__()
#load the model pretrained from autokeras
self.origin_model = torch.load(MODEL_DIR)
self.softmax = nn.Softmax()
def forward(self,x):
x = self.origin_model(x)
x = self.softmax(x)
return x
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
image_size = (64,64)
import os
import cv2
import numpy as np
import autokeras as ak
import torch
import torchvision
import torch.optim as optim
import torch.nn as nn
class TestModel(nn.Module):
def __init__(self):
super(TestModel,self).__init__()
#load the model pretrained from autokeras
self.origin_model = torch.load(MODEL_DIR)
self.softmax = nn.Softmax()
def forward(self,x):
x = self.origin_model(x)
x = self.softmax(x)
return x
MODEL_DIR = 'model.h5'
test_model = TestModel()
test_model.train()
test_model = nn.DataParallel(test_model, device_ids=[0]).cuda()
torch.save(test_model,'test_model')
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(test_model.parameters(), lr=0.001, momentum=0.9)
index = [0,2,3,1]
train_whole_images = np.load('train_whole_images_'+str(image_size[0])+'.npy')
train_whole_labels = np.load('train_whole_labels_'+str(image_size[0])+'.npy')
#change the channel from NHWC to NCHW
train_whole_images = np.swapaxes(train_whole_images, 1, 3)
train_whole_images = np.swapaxes(train_whole_images, 2, 3)
batchsize = 128
for epoch in range(100): # loop over the dataset multiple times
for batch_ind in range(int(len(train_whole_images)/batchsize)):
running_loss = 0.0
i = batch_ind
train_whole_images_batch = train_whole_images[i*batchsize:(i+1)*batchsize-1]
train_whole_labels_batch = train_whole_labels[i*batchsize:(i+1)*batchsize-1]
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
inputs = torch.Tensor(train_whole_images_batch).cuda()
outputs = test_model(inputs)
loss = criterion(outputs.squeeze(), torch.Tensor(np.asarray(train_whole_labels_batch)).long().squeeze().cuda())
loss.backward()
optimizer.step()
if i % (20) == 0:
print('epoch:',epoch+1,',batch_ind/total_batch:',float(i/float(len(train_whole_images))*batchsize)*100,'%')
torch.save(test_model,'test_model')
with torch.no_grad():#this means the codes below is not trainable, grad dont need loaded to the gpu cache
test_num = 100
right = 0
cat = 0
dog = 0
rand_ind = np.random.randint(0,len(train_whole_images)-1,test_num)
predict = test_model(torch.Tensor(train_whole_images[rand_ind])).cpu()
print(torch.argmax(predict,dim=1))
label = train_whole_labels[rand_ind]
print('label:', label)
dog = np.sum(label)
cat = test_num - np.sum(label)
right = np.sum(torch.argmax(predict, dim=1).numpy()==label)
print(torch.argmax(predict, dim=1).numpy()==label)
print(epoch+1,':', float(right)/test_num)
print('cat:', cat)
print('dog:', dog)
#clear the cuda cache
torch.cuda.empty_cache()
print('Finished Training')
四、對測試數據進行預測
按照比賽官方要求的格式進行文件的生成。
import os
import cv2
import numpy as np
import autokeras as ak
import torch
import torchvision
import torch.optim as optim
import torch.nn as nn
# create a new model to change the last 1 layer in the origin model generated from autokeras
class TestModel(nn.Module):
def __init__(self):
super(TestModel,self).__init__()
# load origin model
self.origin_model = torch.load(MODEL_DIR)
self.softmax = nn.Softmax()
def forward(self,x):
x = self.origin_model(x)
x = self.softmax(x)
return x
image_size = (64,64)
MODEL_DIR = 'test_model'
test_model = TestModel()
# make the training done by gpu
test_model = nn.DataParallel(test_model, device_ids=[0]).cuda()
test_model.eval()
test_whole_images = np.swapaxes(test_whole_images, 1, 3)
test_whole_images = np.swapaxes(test_whole_images, 2, 3)
batchsize = 100
with torch.no_grad():
for i in range(int(len(test_whole_images)/batchsize)):
predict = test_model(torch.Tensor(test_whole_images[i*batchsize:(i+1)*batchsize])).cpu()
predict = torch.argmax(predict,dim=1).numpy()
print(predict)
ids = test_whole_ids[i*batchsize:(i+1)*batchsize]
with open('submission.csv', 'a+') as f:
#f.write('id,' + ','.join(test_whole_ids[i]) + '\n')
for i, output in zip(ids, predict):
f.write(str(i) + ',' + ','.join(
str(output)) + '\n')
print('Finished Training')