一个小目标:构建一个classfier超越 lenet5!(model1_cifar10)

新的模型结构还没,想出来.....

想先试试cifar10上训练的结果和lenet5的差距,baseline主要参考这里

然而我没有1080Ti...... 我只有1060 6G版,哭

首先先写个cifar10的数据读入和预处理的脚本,脚本在之前的repo cifar10_loader.py  这里只讲一下大概的函数:

 

import pickle
import glob
import cv2
import tqdm
import os
import sys
import logging
import random
import numpy as np
import math
class Cifa10_data: #承接cifar10 数据的对象,在训练脚本中主要用这个类
    #cropSzie 是否裁剪图像,这里为了和mnist保持一致,这里裁剪成28*28的图片(原始是32*32的)
    #rotate_ratio 随机取多少比例的图片做旋转
    #flip_ratio 随机取多少比例的图片进行水平镜像
    def __init__(self,base_dir,batch_size,rotate_ratio,flip_ratio,cropSize,validate_batch_num=3):
        self.train_data_tensor,self.test_data_tensor,\
        self.train_label_tensor,self.test_label_tensor=load_cifar10(base_dir,rotate_ratio,
                                                                            flip_ratio,
                                                                                cropSize)
        self.batch_size=batch_size
        self.batchs_for_one_epoch_train=self.train_data_tensor.shape[0]//batch_size
        self.batchs_for_one_epoch_test=self.test_data_tensor.shape[0]//batch_size
        self.train_batch_counter=0
        self.test_batch_counter=0
        self.label_map=load_label_map(base_dir)
        self.valid_batches=validate_batch_num
        self.shuffle_train()

    def next_Batch_train(self):
        if(self.train_batch_counter+1)<self.batchs_for_one_epoch_train:
            start_idx=self.train_batch_counter*self.batch_size
            end_idx=(self.train_batch_counter+1)*self.batch_size
            self.train_batch_counter+=1
        else:
            self.train_batch_counter=0
            start_idx=0
            end_idx=self.batch_size
            self.shuffle_train()

        return self.train_data_tensor[start_idx:end_idx],self.train_label_tensor[start_idx:end_idx]

    def next_Batch_test(self):
        if(self.test_batch_counter+1)<self.batchs_for_one_epoch_test:
            start_idx=self.test_batch_counter*self.batch_size
            end_idx=(self.test_batch_counter+1)*self.batch_size
            self.test_batch_counter+=1
        else:
           return None

        return self.test_data_tensor[start_idx:end_idx],self.test_label_tensor[start_idx:end_idx]
    def get_validate_datas(self):
        start_idx=0
        end_idx=self.valid_batches*self.batch_size
        return self.test_data_tensor[start_idx:end_idx],self.test_label_tensor[start_idx:end_idx]
    def shuffle_train(self):
        perm=list(range(self.train_data_tensor.shape[0]))
        np.random.shuffle(perm)
        self.train_data_tensor=self.train_data_tensor[perm]
        self.train_label_tensor=self.train_label_tensor[perm]

def file_loader(file_path):
    with open(file_path, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    #cifar10数据是1维数据这里读取图像并把图像还原成32*32的彩色图
    images=map(lambda x:rotate_image(
                                        cv2.cvtColor(
                                            np.array(x).reshape((32,32,3)
                                                                ,order="F"
                                                                ),
                                            cv2.COLOR_RGB2BGR
                                        ),
                                        270,
                                        True
                                    ),
               dict[b'data']
               )

    labels=dict[b'labels']
    return list(images),labels

def load_cifar10(base_dir:str,rotate_ratio=0.1,flip_ratio=0.1,croppedSize=None):
    train_flie_list=glob.glob(os.path.join(base_dir,"data_batch_*"))
    test_file_list=glob.glob(os.path.join(base_dir,"test_batch"))

    train_image=[]
    train_label=[]
    test_image=[]
    test_label=[]
    logging.info("train data file loading....")
    for file_path in tqdm.tqdm(train_flie_list):
        images,labels=file_loader(file_path)
        train_image.extend(images)
        train_label.extend(labels)

    logging.info("test file loading....")
    for file_path in tqdm.tqdm(test_file_list):
        images,labels=file_loader(file_path)
        test_image.extend(images)
        test_label.extend(labels)

    logging.info("data preprocessing")
    train_data_tensor,train_label_tensor=preprocess(train_image,train_label,True,rotate_ratio,flip_ratio,croppedSize)
    test_data_tensor,test_label_tensor=preprocess(test_image,test_label,False,rotate_ratio,flip_ratio,croppedSize)
    return train_data_tensor,test_data_tensor,train_label_tensor,test_label_tensor



def rotate_image(img,rotate,keep_size=False):

    height, width = img.shape[:2]
    if not keep_size:
        heightNew = int(width * math.fabs(math.sin(math.radians(rotate))) + height * math.fabs(math.cos(math.radians(rotate))))
        widthNew = int(height * math.fabs(math.sin(math.radians(rotate))) + width * math.fabs(math.cos(math.radians(rotate))))
    else:
        heightNew=height
        widthNew=width
    matRotation = cv2.getRotationMatrix2D((width / 2, height / 2), rotate, 1)

    matRotation[0, 2] += (widthNew - width) / 2
    matRotation[1, 2] += (heightNew - height) / 2

    imgRotation = cv2.warpAffine(img, matRotation, (widthNew, heightNew), borderValue=(255, 255, 255))
    return imgRotation


def preprocess(images_list,label_list,is_train=True,rotate_ratio=0.1,flip_ratio=0.1,cropSzie=None):
    rotate_angle=[30,60,90]
    flip_code=[1]
    if cropSzie==None:
        offset=0
    else:
        offset=(images_list[0].shape[0]-cropSzie)//2

    cropped_size=images_list[0].shape[0]-offset
    cropSzie=images_list[0].shape[0]-2*offset
    if not is_train:
        image_element_tensor=[item[offset:cropped_size,offset:cropped_size,:].reshape(1,cropSzie,cropSzie,3) for item in images_list]
        return np.concatenate(image_element_tensor,axis=0).astype(np.float32),build_onehot(label_list,10).astype(np.float32)
    else:
        smaple_idx_list=random.sample(range(0,len(images_list)),int(len(images_list)*rotate_ratio))
        smaple_flip_idx_list=random.sample(range(0,len(images_list)),int(len(images_list)*flip_ratio))
        rotated_images=list(map(lambda x:rotate_image(images_list[x],np.random.choice(rotate_angle),True),smaple_idx_list))
        rotate_image_labels=[label_list[item] for item in smaple_idx_list]
        fliped_images=list(map(lambda x:cv2.flip(images_list[x],np.random.choice(flip_code)),smaple_flip_idx_list))
        fliped_image_labels=[label_list[item] for item in smaple_flip_idx_list]
        images_list.extend(rotated_images)
        label_list.extend(rotate_image_labels)
        images_list.extend(fliped_images)
        label_list.extend(fliped_image_labels)

        image_element_tensor=[item[offset:cropped_size,offset:cropped_size,:].reshape(1,cropSzie,cropSzie,3) for item in images_list]
        return np.concatenate(image_element_tensor,axis=0).astype(np.float32),build_onehot(label_list,10).astype(np.float32)


def build_onehot(labels,label_num):
    label_tensor=np.zeros((len(labels),label_num),dtype=np.int)
    for i in range(len(labels)):
        label_tensor[i,labels[i]]=1
    return label_tensor

def load_label_map(base_dir):
    file_path=os.path.join(base_dir,"batches.meta")
    with open(file_path, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return [str(item, encoding = "utf-8") for item in dict[b'label_names']]

if __name__ == "__main__":
    logger = logging.getLogger()    # initialize logging class
    logger.setLevel(logging.DEBUG)  # default log level
    format = logging.Formatter("%(asctime)s - %(message)s")    # output format
    sh = logging.StreamHandler(stream=sys.stdout)    # output to standard output
    sh.setFormatter(format)
    logger.addHandler(sh)


    data_loader=Cifa10_data("C:\\Users\\rebel\\.keras\\datasets\\cifar-10-batches-py",128,0.25,0.25,28,3)
    print(data_loader.test_data_tensor.shape)
    print(data_loader.train_data_tensor.shape)
    print(data_loader.get_validate_datas()[0].shape)

注意,这里用的是cifar10 python版本的数据。

 

然后在model1的基础上进行修改

值得一提的是上一篇文章中模型在对图片标准化的过程中有bug....本来应该除标准差,结果除成方差了.....(已经在repo中修复)

mean,var=tf.nn.moments(x_image,[1,2],keep_dims=True)
x_image=tf.subtract(x_image,mean)
x_image=tf.divide(x_image,tf.sqrt(var)) #这里,修复bug

1.先修改模型的输入,因为这里是彩色图像所以输入维度要改成

 

cropSize=28
x=tf.placeholder(shape=[None,cropSize,cropSize,3],dtype=tf.float32)
y=tf.placeholder(shape=[None,10],dtype=tf.float32)
keep=tf.placeholder(tf.float32)
#change 1:normalize input
mean,var=tf.nn.moments(x,[1,2],keep_dims=True)
x_image__=tf.subtract(x,mean)
x_image1=tf.divide(x_image__,tf.sqrt(var))

 

2.由于我们的数据读入使用  Cifa10_data 类,所以对训练和测试的数据读入也有小小的修改

 

然后总steps 设置为10000步

 

按照之前的方法,训练分两个阶段第一个阶段用adam 第二个阶段用sgd。直接train一把,果不其然,adam这玩意很难伺候,经常train到3000-6000步左右时梯度崩了,loss变成了nan值.....

经过不停的修改学习率,总算train下来了:

acc:74.2%,第一阶段train 的dropout keep 0.6 第二阶段 keep 0.9  flip_ratio 和 rotate ratio 均为0.05, 学习率 adam 4e-5 sgd:4e-6

在训练过程中发现 train acc 和 validation acc 差距比较大,考虑可能有点过拟合,所以我又修改了一下:

第一阶段train 的dropout keep 0.5 第二阶段 keep 1.0

acc:75.4%

第一阶段一共7000步第二阶段3000步,训练时长13min左右

emmmm...超不过

在调整第一阶段训练和第二阶段训练的过程中突然想到可以加入wam up的过程,取一个比较大的学习率训几步然后再开始第一阶段第二阶段这样会不会比较好呢。

这样就变成了:

第一阶段:adam 学习率:4e-4 2000步 keep=0.3

第二阶段: adam 学习率:4e-5 5000步 keep=0.5

第三阶段:sgd 学习率:4e-6 3000步 keep=1.0

同时把flip ratio 改到了 0.1 引入更多的水平镜像

 

但是我手滑了一下.... 导致第二阶段和第一阶段连在了一起,也就是说前2000步在训练时train了两次.... 实际一共训了 12000步

最终 acc:达到了 77.48% 超过 baseline 1.2%个点,训练时间16min。

改正手滑后最终 acc:77.23% 训练时间 13min。

 

最终证明了我的小模型(model1)超过了lenet5!

代码放在:https://github.com/lordrebel/beatLenet5 model1_cifar10

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章