PaddlePaddle|CV疫情特輯(四):口罩識別

PaddlePaddle|CV疫情特輯(四):口罩識別

本節內容來自:百度AIstudio課程
做一個記錄。
 這就是一個圖像二分類問題:
 在這裏插入圖片描述
 在這裏插入圖片描述
任務描述:

  • 口罩識別,是指可以有效檢測在密集人流區域中攜帶和未攜戴口罩的所有人臉,同時判斷該者是否佩戴口罩。通常由兩個功能單元組成,可以分別完成口罩人臉的檢測和口罩人臉的分類。

導入相關的包:

import os
import zipfile
import random
import json
import paddle
import sys
import numpy as np
from PIL import Image
from PIL import ImageEnhance
import paddle.fluid as fluid
from multiprocessing import cpu_count
import matplotlib.pyplot as plt
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.layer_helper import LayerHelper

一、數據準備

  • (1)解壓原始數據集
  • (2)按照比例劃分訓練集與驗證集
  • (3)亂序,生成數據列表
  • (4)構造訓練數據集提供器和驗證數據集提供器

def unzip_data(src_path,target_path):
    '''
    解壓原始數據集,將src_path路徑下的zip包解壓至data目錄下
    '''
    if(not os.path.isdir(target_path + "maskDetect")):     
        z = zipfile.ZipFile(src_path, 'r')
        z.extractall(path=target_path)
        z.close()

def get_data_list(target_path,train_list_path,eval_list_path):
    '''
    生成數據列表
    '''
    #存放所有類別的信息
    class_detail = []
    #獲取所有類別保存的文件夾名稱
    data_list_path=target_path+"maskDetect/"
    class_dirs = os.listdir(data_list_path)  
    #總的圖像數量
    all_class_images = 0
    #存放類別標籤
    class_label=0
    #存放類別數目
    class_dim = 0
    #存儲要寫進eval.txt和train.txt中的內容
    trainer_list=[]
    eval_list=[]
    #讀取每個類別,['maskimages', 'nomaskimages']
    for class_dir in class_dirs:
        if class_dir != ".DS_Store":
            class_dim += 1
            #每個類別的信息
            class_detail_list = {}
            eval_sum = 0
            trainer_sum = 0
            #統計每個類別有多少張圖片
            class_sum = 0
            #獲取類別路徑 
            path = data_list_path  + class_dir
            # 獲取所有圖片
            img_paths = os.listdir(path)
            for img_path in img_paths:                                  # 遍歷文件夾下的每個圖片
                name_path = path + '/' + img_path                       # 每張圖片的路徑
                if class_sum % 10 == 0:                                 # 每10張圖片取一個做驗證數據
                    eval_sum += 1                                       # test_sum爲測試數據的數目
                    eval_list.append(name_path + "\t%d" % class_label + "\n")
                else:
                    trainer_sum += 1 
                    trainer_list.append(name_path + "\t%d" % class_label + "\n")#trainer_sum測試數據的數目
                class_sum += 1                                          #每類圖片的數目
                all_class_images += 1                                   #所有類圖片的數目
             
            # 說明的json文件的class_detail數據
            class_detail_list['class_name'] = class_dir             #類別名稱,如jiangwen
            class_detail_list['class_label'] = class_label          #類別標籤
            class_detail_list['class_eval_images'] = eval_sum       #該類數據的測試集數目
            class_detail_list['class_trainer_images'] = trainer_sum #該類數據的訓練集數目
            class_detail.append(class_detail_list)  
            #初始化標籤列表
            train_parameters['label_dict'][str(class_label)] = class_dir
            class_label += 1          
    #初始化分類數
    train_parameters['class_dim'] = class_dim    
    #亂序  
    random.shuffle(eval_list)
    with open(eval_list_path, 'a') as f:
        for eval_image in eval_list:
            f.write(eval_image) 
            
    random.shuffle(trainer_list)
    with open(train_list_path, 'a') as f2:
        for train_image in trainer_list:
            f2.write(train_image) 

    # 說明的json文件信息
    readjson = {}
    readjson['all_class_name'] = data_list_path                  #文件父目錄
    readjson['all_class_images'] = all_class_images
    readjson['class_detail'] = class_detail
    jsons = json.dumps(readjson, sort_keys=True, indent=4, separators=(',', ': '))
    with open(train_parameters['readme_path'],'w') as f:
        f.write(jsons)
    print ('生成數據列表完成!')
def custom_reader(file_list):
    '''
    自定義reader
    '''
    def reader():
        with open(file_list, 'r') as f:
            lines = [line.strip() for line in f]
            for line in lines:
                img_path, lab = line.strip().split('\t')
                img = Image.open(img_path) 
                if img.mode != 'RGB': 
                    img = img.convert('RGB') 
                img = img.resize((224, 224), Image.BILINEAR)
                img = np.array(img).astype('float32') 
                img = img.transpose((2, 0, 1))  # HWC to CHW 
                img = img/255                # 像素值歸一化 
                yield img, int(lab) 
    return reader
'''
參數初始化
'''
src_path=train_parameters['src_path']
target_path=train_parameters['target_path']
train_list_path=train_parameters['train_list_path']
eval_list_path=train_parameters['eval_list_path']
batch_size=train_parameters['train_batch_size']

'''
解壓原始數據到指定路徑
'''
unzip_data(src_path,target_path)

'''
劃分訓練集與驗證集,亂序,生成數據列表
'''
#每次生成數據列表前,首先清空train.txt和eval.txt
with open(train_list_path, 'w') as f: 
    f.seek(0)
    f.truncate() 
with open(eval_list_path, 'w') as f: 
    f.seek(0)
    f.truncate() 
#生成數據列表   
get_data_list(target_path,train_list_path,eval_list_path)
'''
構造數據提供器
'''
train_reader = paddle.batch(custom_reader(train_list_path),
                            batch_size=batch_size,
                            drop_last=True)
eval_reader = paddle.batch(custom_reader(eval_list_path),
                            batch_size=batch_size,
                            drop_last=True)

二、模型配置

在這裏插入圖片描述
VGG的核心是五組卷積操作,每兩組之間做Max-Pooling空間降維。同一組內採用多次連續的3X3卷積,卷積核的數目由較淺組的64增多到最深組的512,同一組內的卷積核數目是一樣的。卷積之後接兩層全連接層,之後是分類層。由於每組內卷積層的不同,有11、13、16、19層這幾種模型,上圖展示一個16層的網絡結構。
關鍵點:

  • 1.五次下采樣,即池化層操作。
  • 2.根據五次下采樣,可以把VGG16由兩部分組成:卷積塊和全連接層。

所以可知:

class ConvPool(fluid.dygraph.Layer):
    '''卷積+池化'''
    def __init__(self,
                 num_channels,
                 num_filters,
                 filter_size,
                 pool_size,
                 pool_stride,
                 groups,
                 pool_padding=0,
                 pool_type='max',
                 conv_stride=1,
                 conv_padding=0,
                 act=None,
                 param_attr = fluid.initializer.Xavier(uniform=False)):
        super(ConvPool, self).__init__()  

        self._conv2d_list = []
        in_channels = num_channels
        out_channels = num_filters
        for i in range(groups):
            conv2d = self.add_sublayer(   #返回一個由所有子層組成的列表。
                'bb_%d' % i,
                fluid.dygraph.Conv2D(
                num_channels=in_channels, #通道數
                num_filters=out_channels,   #卷積核個數
                filter_size=filter_size,   #卷積核大小
                stride=conv_stride,        #步長
                padding=conv_padding,      #padding大小,默認爲0
                act=act,
                bias_attr=False,
                param_attr=param_attr)
            )
            self._conv2d_list.append(conv2d)
            in_channels = out_channels

        self._pool2d = fluid.dygraph.Pool2D(
            pool_size=pool_size,           #池化核大小
            pool_type=pool_type,           #池化類型,默認是最大池化
            pool_stride=pool_stride,       #池化步長
            pool_padding=pool_padding      #填充大小
            )

    def forward(self, inputs):
        x = inputs
        for (conv) in zip(self._conv2d_list):
            x = conv(x)
            x = bn(x)
        x = self._pool2d(x)

        return x

完成VGG網絡的定義

class VGGNet(fluid.dygraph.Layer):
    '''
    VGG網絡
    '''
    def __init__(self, output_dim = 1):
        super(VGGNet, self).__init__()
        # num_convs 和 num_channels
        # 減少conv層
        conv_arch=((2, 64), (2, 128), (3, 256), (3, 512), (3, 512))
        # conv_arch=((1, 64), (1, 128), (1, 256), (1, 512), (1, 512))
        self.vgg_blocks=[]
        iter_id = 0
        # 添加vgg_block
        # 這裏一共5個vgg_block,每個block裏面的卷積層數目和輸出通道數由conv_arch指定
        in_channels = [3, 64, 128, 256, 512, 512]
        for (num_convs, num_channels) in conv_arch:
            block = self.add_sublayer('block_' + str(iter_id), 
                    ConvPool(num_channels=in_channels[iter_id], 
                            num_filters=num_channels, 
                            filter_size=3,
                            pool_size=2, 
                            pool_stride=2, 
                            groups=num_convs,
                            pool_padding=0,
                            conv_padding=1,
                            act="leaky_relu"))
            self.vgg_blocks.append(block)
            iter_id += 1

        self.fc1 = Linear(input_dim=512*7*7, output_dim=4096,
                      act='leaky_relu',
                      param_attr = fluid.initializer.Xavier(uniform=False))
        self.drop1_ratio = 0.5  
        self.fc2= Linear(input_dim=4096, output_dim=4096,
                      act='leaky_relu',
                      param_attr = fluid.initializer.Xavier(uniform=False))
        self.drop2_ratio = 0.5

        self.fc3 = Linear(input_dim=4096, output_dim=output_dim,
                        param_attr = fluid.initializer.Xavier(uniform=False))

    def forward(self, x, label=None):
        """前向計算"""
        for item in self.vgg_blocks:
            x = item(x)
        x = fluid.layers.reshape(x, [x.shape[0], -1])
  
        x = fluid.layers.dropout(self.fc1(x), self.drop1_ratio)
     
        x = fluid.layers.dropout(self.fc2(x), self.drop2_ratio)
        x = self.fc3(x)
        
        return x

提幾個關鍵參數:conv_arch=((2, 64), (2, 128), (3, 256), (3, 512), (3, 512)),這個其實是每個元組的的第一個數表示卷積層的group數,第二個數表示這個卷積塊的輸出通道數,如(2, 64),表示這個卷積塊有兩個卷積核大小爲333*3的卷積層。in_channels = [3, 64, 128, 256, 512, 512]則表示每個卷積塊輸入通道數。

三、模型訓練 && 四、模型評估

'''
模型訓練
'''
#with fluid.dygraph.guard(place = fluid.CUDAPlace(0)):
with fluid.dygraph.guard():
    print(train_parameters['class_dim'])
    print(train_parameters['label_dict'])
    vgg = VGGNet(output_dim=2)
    optimizer=fluid.optimizer.AdamOptimizer(learning_rate=train_parameters['learning_strategy']['lr'],parameter_list=vgg.parameters()) 
    # optimizer=fluid.optimizer.AdamOptimizer(learning_rate=fluid.layers.cosine_decay( learning_rate = train_parameters['learning_strategy']['lr'], 
    #                                                                                 step_each_epoch=1000, epochs=train_parameters['num_epochs']),parameter_list=vgg.parameters()) 
    for epoch_num in range(train_parameters['num_epochs']):
        # print(epoch_num)
        for batch_id, data in enumerate(train_reader()):
            dy_x_data = np.array([x[0] for x in data]).astype('float32')           
            y_data = np.array([x[1] for x in data]).astype('int64')      
            y_data = y_data[:, np.newaxis]

            #將Numpy轉換爲DyGraph接收的輸入
            img = fluid.dygraph.to_variable(dy_x_data)
            label = fluid.dygraph.to_variable(y_data)
            # print(img.shape)
            # print(label.shape)
            
            # out,acc = vgg(img,label)
            out = vgg(img)
            # print(acc)
            # print(out.shape)
            acc = fluid.layers.accuracy(out, label)
            
            # loss = fluid.layers.cross_entropy(out, label)
            loss = fluid.layers.softmax_with_cross_entropy(out, label)
            avg_loss = fluid.layers.mean(loss)

            #使用backward()方法可以執行反向網絡
            avg_loss.backward()
            optimizer.minimize(avg_loss)
             
            #將參數梯度清零以保證下一輪訓練的正確性
            vgg.clear_gradients()
            

            all_train_iter=all_train_iter+train_parameters['train_batch_size']
            all_train_iters.append(all_train_iter)
            all_train_costs.append(loss.numpy()[0])
            all_train_accs.append(acc.numpy()[0])
                
            if (batch_id+1) % 10 == 0:
                print("Loss at epoch {} step {}: {}, acc: {}".format(epoch_num, batch_id, avg_loss.numpy(), acc.numpy()))
    
    print(len(all_train_iters))
    print(len(all_train_costs))
    print(len(all_train_accs))
    draw_train_process("training",all_train_iters,all_train_costs,all_train_accs,"trainning cost","trainning acc")  
    draw_process("trainning loss","red",all_train_iters,all_train_costs,"trainning loss")
    draw_process("trainning acc","green",all_train_iters,all_train_accs,"trainning acc")  
    
    #保存模型參數
    fluid.save_dygraph(vgg.state_dict(), "vgg")   
    print("Final loss: {}".format(avg_loss.numpy()))

和以前一樣的,使用損失函數:loss = fluid.layers.softmax_with_cross_entropy(out, label)避免nan。

'''
模型校驗
'''
with fluid.dygraph.guard():
    model, _ = fluid.load_dygraph("vgg")
    vgg = VGGNet(output_dim=2)
    vgg.load_dict(model)
    vgg.eval()
    accs = []
    for batch_id, data in enumerate(eval_reader()):
        dy_x_data = np.array([x[0] for x in data]).astype('float32')
        y_data = np.array([x[1] for x in data]).astype('int')
        y_data = y_data[:, np.newaxis]
        
        img = fluid.dygraph.to_variable(dy_x_data)
      

        label = fluid.dygraph.to_variable(y_data)

        # out, acc = vgg(img, label)
        out = vgg(img)
       
        acc = fluid.layers.accuracy(out, label)
        
        lab = np.argsort(out.numpy())
        accs.append(acc.numpy()[0])
print(np.mean(accs))

五、模型預測

def load_image(img_path):
    '''
    預測圖片預處理
    '''
    img = Image.open(img_path) 
    if img.mode != 'RGB': 
        img = img.convert('RGB') 
    img = img.resize((224, 224), Image.BILINEAR)
    img = np.array(img).astype('float32') 
    img = img.transpose((2, 0, 1))  # HWC to CHW 
    img = img/255                # 像素值歸一化 
    return img

label_dic = train_parameters['label_dict']

'''
模型預測
'''
with fluid.dygraph.guard():
    model, _ = fluid.dygraph.load_dygraph("vgg")
    vgg = VGGNet()
    vgg.load_dict(model)
    vgg.eval()
    
    #展示預測圖片
    infer_path='/home/aistudio/data/data23615/infer_mask01.jpg'
    img = Image.open(infer_path)
    plt.imshow(img)          #根據數組繪製圖像
    plt.show()               #顯示圖像

    #對預測圖片進行預處理
    infer_imgs = []
    infer_imgs.append(load_image(infer_path))
    infer_imgs = np.array(infer_imgs)
   
    for  i in range(len(infer_imgs)):
        data = infer_imgs[i]
        dy_x_data = np.array(data).astype('float32')
        dy_x_data=dy_x_data[np.newaxis,:, : ,:]
        img = fluid.dygraph.to_variable(dy_x_data)
        out = vgg(img)
        lab = np.argmax(out.numpy())  #argmax():返回最大數的索引
        print("第{}個樣本,被預測爲:{}".format(i+1,label_dic[str(lab)]))
        
print("結束")

五、模型預測

def load_image(img_path):
    '''
    預測圖片預處理
    '''
    img = Image.open(img_path) 
    if img.mode != 'RGB': 
        img = img.convert('RGB') 
    img = img.resize((224, 224), Image.BILINEAR)
    img = np.array(img).astype('float32') 
    img = img.transpose((2, 0, 1))  # HWC to CHW 
    img = img/255                # 像素值歸一化 
    return img

label_dic = train_parameters['label_dict']

'''
模型預測
'''
with fluid.dygraph.guard():
    model, _ = fluid.dygraph.load_dygraph("vgg")
    vgg = VGGNet(output_dim=2)
    vgg.load_dict(model)
    vgg.eval()
    
    #展示預測圖片
    infer_path='/home/aistudio/data/data23615/infer_mask01.jpg'
    img = Image.open(infer_path)
    plt.imshow(img)          #根據數組繪製圖像
    plt.show()               #顯示圖像

    #對預測圖片進行預處理
    infer_imgs = []
    infer_imgs.append(load_image(infer_path))
    infer_imgs = np.array(infer_imgs)
   
    for  i in range(len(infer_imgs)):
        data = infer_imgs[i]
        dy_x_data = np.array(data).astype('float32')
        dy_x_data=dy_x_data[np.newaxis,:, : ,:]
        img = fluid.dygraph.to_variable(dy_x_data)
        out = vgg(img)
        lab = np.argmax(out.numpy())  #argmax():返回最大數的索引
        print("第{}個樣本,被預測爲:{}".format(i+1,label_dic[str(lab)]))
        
print("結束")

在這裏插入圖片描述

1個樣本,被預測爲:maskimages
結束
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章