Day04 使用paddle框架進行口罩分類

由於這次項目還是跟上次的車牌識別、手勢識別大同小異,所以我也不贅述太多。不同的是這次要求是復現VggNet來實現分類任務,但我個人覺得數據集樣本數量太少,不適合用太深的網絡來進行分類,所以還是那個網絡,還是辣個AlexNet,還是那個微調~(實際是自己太懶不想搞那麼麻煩😁)。沒想到取得了當日的先鋒榜第一名,這也證明了神經網絡不是越深越好,而是越適合越好。不說了不說了,直接開始把。

還是那句話深度學習不外乎四個步驟:

1. 數據標籤處理
2. 構建網絡模型
3. 規劃網絡超參
4. 訓練評估模型

首先導入我們需要的庫

import os
import zipfile
import random
import json
import paddle
import sys
import numpy as np
from PIL import Image
from PIL import ImageEnhance
import paddle.fluid as fluid
from multiprocessing import cpu_count
import matplotlib.pyplot as plt
import time
import matplotlib.pyplot as plt
import paddle.fluid.layers as layers
from paddle.fluid.dygraph import Pool2D,Conv2D
from paddle.fluid.dygraph import Linear

設置配置參數

'''
參數配置
'''
train_parameters = {
    "input_size": [3, 224, 224],                              #輸入圖片的shape
    "class_dim": -1,                                          #分類數
    "src_path":"maskDetect.zip",#原始數據集路徑
    "target_path":"data/",                     #要解壓的路徑
    "train_list_path": "data/train.txt",       #train.txt路徑
    "eval_list_path": "data/eval.txt",         #eval.txt路徑
    "readme_path": "data/readme.json",         #readme.json路徑
    "label_dict":{},                                          #標籤字典
    "num_epochs": 50,                                         #訓練輪數
    "train_batch_size": 32,                                    #訓練時每個批次的大小
    "learning_strategy": {                                    #優化函數相關的配置
        "lr": 0.001                                           #超參數學習率
    } 
}

一、數據標籤處理

paddle爲大家準備的口罩分類數據集包含從網上爬取的116張帶口罩的,70張不帶口罩的圖片,數據集地址

def unzip_data(src_path,target_path):
    '''
    解壓原始數據集,將src_path路徑下的zip包解壓至data目錄下
    '''
    if(not os.path.isdir(target_path + "maskDetect")):     
        z = zipfile.ZipFile(src_path, 'r')
        z.extractall(path=target_path)
        z.close()


def get_data_list(target_path,train_list_path,eval_list_path):
    '''
    生成數據列表
    '''
    #存放所有類別的信息
    class_detail = []
    #獲取所有類別保存的文件夾名稱
    data_list_path=target_path+"maskDetect/"
    class_dirs = os.listdir(data_list_path)  
    #總的圖像數量
    all_class_images = 0
    #存放類別標籤
    class_label=0
    #存放類別數目
    class_dim = 0
    #存儲要寫進eval.txt和train.txt中的內容
    trainer_list=[]
    eval_list=[]
    #讀取每個類別,['maskimages', 'nomaskimages']
    for class_dir in class_dirs:
        if class_dir != ".DS_Store":
            class_dim += 1
            #每個類別的信息
            class_detail_list = {}
            eval_sum = 0
            trainer_sum = 0
            #統計每個類別有多少張圖片
            class_sum = 0
            #獲取類別路徑 
            path = data_list_path  + class_dir
            # 獲取所有圖片
            img_paths = os.listdir(path)
            for img_path in img_paths:                                  # 遍歷文件夾下的每個圖片
                name_path = path + '/' + img_path                       # 每張圖片的路徑
                if class_sum % 10 == 0:                                 # 每10張圖片取一個做驗證數據
                    eval_sum += 1                                       # test_sum爲測試數據的數目
                    eval_list.append(name_path + "\t%d" % class_label + "\n")
                else:
                    trainer_sum += 1 
                    trainer_list.append(name_path + "\t%d" % class_label + "\n")#trainer_sum測試數據的數目
                class_sum += 1                                          #每類圖片的數目
                all_class_images += 1                                   #所有類圖片的數目
             
            # 說明的json文件的class_detail數據
            class_detail_list['class_name'] = class_dir             #類別名稱,如jiangwen
            class_detail_list['class_label'] = class_label          #類別標籤
            class_detail_list['class_eval_images'] = eval_sum       #該類數據的測試集數目
            class_detail_list['class_trainer_images'] = trainer_sum #該類數據的訓練集數目
            class_detail.append(class_detail_list)  
            #初始化標籤列表
            train_parameters['label_dict'][str(class_label)] = class_dir
            class_label += 1 
            
    #初始化分類數
    train_parameters['class_dim'] = class_dim

   
    
    #亂序  
    random.shuffle(eval_list)
    with open(eval_list_path, 'a') as f:
        for eval_image in eval_list:
            f.write(eval_image) 
            
    random.shuffle(trainer_list)
    with open(train_list_path, 'a') as f2:
        for train_image in trainer_list:
            f2.write(train_image) 

    # 說明的json文件信息
    readjson = {}
    readjson['all_class_name'] = data_list_path                  #文件父目錄
    readjson['all_class_images'] = all_class_images
    readjson['class_detail'] = class_detail
    jsons = json.dumps(readjson, sort_keys=True, indent=4, separators=(',', ': '))
    with open(train_parameters['readme_path'],'w') as f:
        f.write(jsons)
    print ('生成數據列表完成!')

def custom_reader(file_list):
    '''
    自定義reader
    '''
    def reader():
        with open(file_list, 'r') as f:
            lines = [line.strip() for line in f]
            for line in lines:
                img_path, lab = line.strip().split('\t')
                img = Image.open(img_path) 
                if img.mode != 'RGB': 
                    img = img.convert('RGB') 
                img = img.resize((224, 224), Image.BILINEAR)
                img = np.array(img).astype('float32') 
                img = img.transpose((2, 0, 1))  # HWC to CHW 
                img = img/255                # 像素值歸一化 
                yield img, int(lab) 
    return reader


'''
參數初始化
'''
src_path=train_parameters['src_path']
target_path=train_parameters['target_path']
train_list_path=train_parameters['train_list_path']
eval_list_path=train_parameters['eval_list_path']
batch_size=train_parameters['train_batch_size']

'''
解壓原始數據到指定路徑
'''
unzip_data(src_path,target_path)

'''
劃分訓練集與驗證集,亂序,生成數據列表
'''
#每次生成數據列表前,首先清空train.txt和eval.txt
with open(train_list_path, 'w') as f: 
    f.seek(0)
    f.truncate() 
with open(eval_list_path, 'w') as f: 
    f.seek(0)
    f.truncate() 
#生成數據列表   
get_data_list(target_path,train_list_path,eval_list_path)

'''
構造數據提供器
'''
print(train_list_path)
print(eval_list_path)
train_reader = paddle.batch(custom_reader(train_list_path),
                            batch_size=batch_size,
                            drop_last=True)
eval_reader = paddle.batch(custom_reader(eval_list_path),
                            batch_size=19,
                            drop_last=True)

構建神經網絡

這裏我們以典型的AlexNet構建我們的神經網絡結構,並進行微調
構建代碼如下:

class MyDNN(fluid.dygraph.Layer):
    def __init__(self, name_scope, num_classes=2):
        super(MyDNN, self).__init__(name_scope)
        name_scope = self.full_name()

        self.conv1 = Conv2D(num_channels=3, num_filters=96, filter_size=3, stride=2, padding=2, act='relu')
        self.pool1 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
        self.conv2 = Conv2D(num_channels=96, num_filters=256, filter_size=3, stride=2, padding=2, act='relu')
        self.pool2 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
        self.conv3 = Conv2D(num_channels=256, num_filters=384, filter_size=3, stride=2, padding=2, act='relu')
        self.conv4 = Conv2D(num_channels=384, num_filters=384, filter_size=3, stride=1, padding=1, act='relu')
        self.conv5 = Conv2D(num_channels=384, num_filters=256, filter_size=3, stride=1, padding=1, act='relu')
        self.pool5 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
        self.fc1 = Linear(input_dim=4096, output_dim=4096, act='relu')
        self.drop_ratio1 = 0.5
        self.fc2 = Linear(input_dim=4096, output_dim=4096, act='relu')
        self.drop_ratio2 = 0.5
        self.fc3 = Linear(input_dim=4096, output_dim=num_classes)

        
    def forward(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.pool5(x)
        x = fluid.layers.reshape(x, [x.shape[0], -1])
        x = self.fc1(x)
        # 在全連接之後使用dropout抑制過擬合
        x= fluid.layers.dropout(x, self.drop_ratio1)
        x = self.fc2(x)
        # 在全連接之後使用dropout抑制過擬合
        x = fluid.layers.dropout(x, self.drop_ratio2)
        x = self.fc3(x)
        return x

規劃網絡超參和訓練評估模型

這裏我們的優化器是

MomentumOptimizer

,損失函數是

softmax_with_cross_entropy

'''
模型訓練
'''



def draw_train_process(title,iters,costs,accs,label_cost,lable_acc):
    plt.title(title, fontsize=24)
    plt.xlabel("iter", fontsize=20)
    plt.ylabel("cost/acc", fontsize=20)
    plt.plot(iters, costs,color='red',label=label_cost) 
    plt.plot(iters, accs,color='green',label=lable_acc) 
    plt.legend()
    plt.grid()
    plt.show()
def draw_process(title,color,iters,data,label):
    plt.title(title, fontsize=24)
    plt.xlabel("iter", fontsize=20)
    plt.ylabel(label, fontsize=20)
    plt.plot(iters, data,color=color,label=label) 
    plt.legend()
    plt.grid()
    plt.show()

all_train_iter=0
all_train_iters=[]
all_train_costs=[]
all_train_accs=[]
with fluid.dygraph.guard():
    print(train_parameters['class_dim'])
    print(train_parameters['label_dict'])
    alex = MyDNN('alexnet')
    alex.train()
    optimizer = fluid.optimizer.Momentum(learning_rate=0.01,momentum=0.9,parameter_list=alex.parameters())

    for epoch_num in range(train_parameters['num_epochs']):
        for batch_id, data in enumerate(train_reader()):
            dy_x_data = np.array([x[0] for x in data]).astype('float32')           
            y_data = np.array([x[1] for x in data]).astype('int64')      
            y_data = y_data[:, np.newaxis]
            #將Numpy轉換爲DyGraph接收的輸入
            img = fluid.dygraph.to_variable(dy_x_data)
            label = fluid.dygraph.to_variable(y_data)

            out = alex(img)
            loss = fluid.layers.softmax_with_cross_entropy(out, label)
            avg_loss = fluid.layers.mean(loss)
            acc=fluid.layers.accuracy(out,label)
            #使用backward()方法可以執行反向網絡
            avg_loss.backward()
            optimizer.minimize(avg_loss)
             
            #將參數梯度清零以保證下一輪訓練的正確性
            alex.clear_gradients()
            

            all_train_iter=all_train_iter+train_parameters['train_batch_size']
            all_train_iters.append(all_train_iter)
            all_train_costs.append(loss.numpy()[0])
            all_train_accs.append(acc.numpy()[0])
                
            if batch_id>=1 and batch_id % 2 == 0:
                print("Loss at epoch {} step {}: avg_loss:{}, acc: {}".format(epoch_num, batch_id, avg_loss.numpy(), acc.numpy()))

    draw_train_process("training",all_train_iters,all_train_costs,all_train_accs,"trainning cost","trainning acc")  
    draw_process("trainning loss","red",all_train_iters,all_train_costs,"trainning loss")
    draw_process("trainning acc","green",all_train_iters,all_train_accs,"trainning acc")  
    
    #保存模型參數
    fluid.save_dygraph(alex.state_dict(), "alex")   
    print("Final loss: {}".format(avg_loss.numpy()))



'''
模型校驗
'''
with fluid.dygraph.guard():
    model, _ = fluid.load_dygraph("alex")
    #vgg = VGGNet()
    alex = MyDNN('alexnet')
    alex.load_dict(model)
    alex.eval()
    accs = []
    for batch_id, data in enumerate(eval_reader()):
        dy_x_data = np.array([x[0] for x in data]).astype('float32')
        y_data = np.array([x[1] for x in data]).astype('int')
        y_data = y_data[:, np.newaxis]
        img = fluid.dygraph.to_variable(dy_x_data)
        label = fluid.dygraph.to_variable(y_data)

        out = alex(img)
        
        acc=fluid.layers.accuracy(out,label)
        lab = np.argsort(out.numpy())
        accs.append(acc.numpy()[0])
print(np.mean(accs))

到此整個訓練過程就結束了~
我訓練了50個epoch的結果接近100%,大家可以嘗試着更高的精度,比如換不同的優化器,多訓練幾個epoch,增大圖像分辨率等等

圖形分類預測

def load_image(img_path):
    '''
    預測圖片預處理
    '''
    img = Image.open(img_path) 
    if img.mode != 'RGB': 
        img = img.convert('RGB') 
    img = img.resize((224, 224), Image.BILINEAR)
    img = np.array(img).astype('float32') 
    img = img.transpose((2, 0, 1))  # HWC to CHW 
    img = img/255                # 像素值歸一化 
    return img

label_dic = train_parameters['label_dict']

'''
模型預測
'''
with fluid.dygraph.guard():
    model, _ = fluid.dygraph.load_dygraph("alex")
    #vgg = VGGNet()
    alex = MyDNN('alexnet')
    alex.load_dict(model)
    alex.eval()
    
    #展示預測圖片
    infer_path_mask='/home/aistudio/data/data23615/infer_mask01.jpg'
    infer_path='/home/aistudio/nomask.jpg'
    img = Image.open(infer_path)
    img_mask = Image.open(infer_path_mask)
    plt.imshow(img)          #根據數組繪製圖像
    plt.show()               #顯示圖像
    plt.imshow(img_mask)
    plt.show()
    #對預測圖片進行預處理
    infer_imgs = []
    infer_imgs.append(load_image(infer_path))
    infer_imgs.append(load_image(infer_path_mask))
    infer_imgs = np.array(infer_imgs)
   
    for  i in range(len(infer_imgs)):
        data = infer_imgs[i]
        dy_x_data = np.array(data).astype('float32')
        dy_x_data=dy_x_data[np.newaxis,:, : ,:]
        img = fluid.dygraph.to_variable(dy_x_data)
        out = alex(img)
        lab = np.argmax(out.numpy())  #argmax():返回最大數的索引
        print("第{}個樣本,被預測爲:{}".format(i+1,label_dic[str(lab)]))
        
print("結束")

結果如下:
在這裏插入圖片描述
在這裏插入圖片描述

結束心得

這次訓練能取得那麼高的精度還是因爲數據集數量太少了,如果數據集大的話,精度不會有這麼高的,不過具體情況具體分析,如果是以打比賽來講的話,這次的成果還是不錯的。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章