Day02-運用百度paddle進行手勢識別

百度的飛漿框架從16年開源到現在也又四年時光了,經過四年的打磨,這款框架也展現出了不俗的實力,這次文章我就爲大家講講這麼運用paddle實現手勢識別,爲什麼不是minist入門數據呢,因爲minist的數據都是各個框架默認集成的,僅僅只需簡單的導入就完成了,而一般我們的項目都是實打實的圖片和標籤,所以以手勢識別講paddle更加專業有效果

深度學習不外乎四個步驟:

1. 數據標籤處理
2. 構建網絡模型
3. 規劃網絡超參
4. 訓練評估模型

我將分四個步驟爲大家講解,先導入庫

import os
import time
import random
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from multiprocessing import cpu_count
from paddle.fluid.dygraph import Pool2D,Conv2D
from paddle.fluid.dygraph import Linear

一數據標籤處理

paddle爲大家準備的數據集是0-9的手勢,每個手勢有200+張彩色圖片,分辨率爲100x100.數據集鏈接

首先是數據處理

# 生成圖像列表
data_path = 'Dataset'#這裏是你的數據集路徑
character_folders = os.listdir(data_path)
# print(character_folders)
if(os.path.exists('./train_data.list')):
    os.remove('./train_data.list')
if(os.path.exists('./test_data.list')):
    os.remove('./test_data.list')
    
for character_folder in character_folders:
    
    with open('./train_data.list', 'a') as f_train:
        with open('./test_data.list', 'a') as f_test:
            if character_folder == '.DS_Store':
                continue
            character_imgs = os.listdir(os.path.join(data_path,character_folder))
            count = 0 
            for img in character_imgs:
                if img =='.DS_Store':
                    continue
                if count%10 == 0:
                    f_test.write(os.path.join(data_path,character_folder,img) + '\t' + character_folder + '\n')
                else:
                    f_train.write(os.path.join(data_path,character_folder,img) + '\t' + character_folder + '\n')
                count +=1
print('列表已生成')

講圖片路徑及文件夾標籤寫到list文件方便讀取,接下來使用paddle的reader模塊製作訓練集和測試集

# 定義訓練集和測試集的reader
def data_mapper(sample):
    img, label = sample
    img = Image.open(img)
    img = img.resize((100, 100), Image.ANTIALIAS)
    img = np.array(img).astype('float32')
    img = img.transpose((2, 0, 1))
    img = img/255.0
    return img, label

def data_reader(data_list_path):
    def reader():
        with open(data_list_path, 'r') as f:
            lines = f.readlines()
            for line in lines:
                img, label = line.split('\t')
                yield img, int(label)
    return paddle.reader.xmap_readers(data_mapper, reader, cpu_count(), 512)

# 用於訓練的數據提供器
#buf_size是打亂數據集的參數,size越大,圖片順序越亂
train_reader = paddle.batch(reader=paddle.reader.shuffle(reader=data_reader('./train_data.list'), buf_size=1024), batch_size=32)
# 用於測試的數據提供器
test_reader = paddle.batch(reader=data_reader('./test_data.list'), batch_size=32) 

構建神經網絡

這裏我們以典型的AlexNet構建我們的神經網絡結構
構建代碼如下:

#定義DNN網絡
class MyDNN(fluid.dygraph.Layer):
    def __init__(self, name_scope, num_classes=10):
        super(MyDNN, self).__init__(name_scope)
        name_scope = self.full_name()

        self.conv1 = Conv2D(num_channels=3, num_filters=96, filter_size=11, stride=4, padding=5, act='relu')
        self.pool1 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
        self.conv2 = Conv2D(num_channels=96, num_filters=256, filter_size=5, stride=1, padding=2, act='relu')
        self.pool2 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
        self.conv3 = Conv2D(num_channels=256, num_filters=384, filter_size=3, stride=1, padding=1, act='relu')
        self.conv4 = Conv2D(num_channels=384, num_filters=384, filter_size=3, stride=1, padding=1, act='relu')
        self.conv5 = Conv2D(num_channels=384, num_filters=256, filter_size=3, stride=1, padding=1, act='relu')
        self.pool5 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
        self.fc1 = Linear(input_dim=9216, output_dim=4096, act='relu')
        self.drop_ratio1 = 0.5
        self.fc2 = Linear(input_dim=4096, output_dim=4096, act='relu')
        self.drop_ratio2 = 0.5
        self.fc3 = Linear(input_dim=4096, output_dim=num_classes)

        
    def forward(self, x):

        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.pool5(x)
        x = fluid.layers.reshape(x, [x.shape[0], -1])
        x = self.fc1(x)
        # 在全連接之後使用dropout抑制過擬合
        x= fluid.layers.dropout(x, self.drop_ratio1)
        x = self.fc2(x)
        # 在全連接之後使用dropout抑制過擬合
        x = fluid.layers.dropout(x, self.drop_ratio2)
        x = self.fc3(x)
        return x

由於paddle的高度封裝,我們的Alex net網絡結構看着很簡潔易懂

規劃網絡超參

paddle訓練是以動態圖訓練,更加靈活

#用動態圖進行訓練

with fluid.dygraph.guard():
    model=MyDNN('Alexnet') #模型實例化
    model.train() #訓練模式
    opt = fluid.optimizer.Momentum(learning_rate=0.001,momentum=0.9,parameter_list=model.parameters())
    epochs_num=50 #迭代次數
    
    for pass_num in range(epochs_num):
        
        for batch_id,data in enumerate(train_reader()):
            
            images=np.array([x[0].reshape(3,100,100) for x in data],np.float32)
            
            labels = np.array([x[1] for x in data]).astype('int64')
            labels = labels[:, np.newaxis]
            image=fluid.dygraph.to_variable(images)
            label=fluid.dygraph.to_variable(labels)
            predict=model(image)#預測
            loss=fluid.layers.softmax_with_cross_entropy(predict,label)
            avg_loss=fluid.layers.mean(loss)#獲取loss值
            
            acc=fluid.layers.accuracy(predict,label)#計算精度
            if batch_id!=0 and batch_id%50==0:
                print("train_pass:{},batch_id:{},train_loss:{},train_acc:{}".format(pass_num,batch_id,avg_loss.numpy(),acc.numpy()))
            
            avg_loss.backward()
            opt.minimize(avg_loss)
            model.clear_gradients()
            
    fluid.save_dygraph(model.state_dict(),'MyDNN')#保存模型

簡簡單單是不是~,paddle的模型有train,eval兩種模式,前者訓練時dropout啓用,後者關閉,接下來就是評估模型了

with fluid.dygraph.guard():
    accs = []
    model_dict, _ = fluid.load_dygraph('MyDNN')
    model = MyDNN('Alexnet')
    model.load_dict(model_dict) #加載模型參數
    model.eval() #訓練模式
    for batch_id,data in enumerate(test_reader()):#測試集
        images=np.array([x[0].reshape(3,100,100) for x in data],np.float32)
        labels = np.array([x[1] for x in data]).astype('int64')
        labels = labels[:, np.newaxis]

        image=fluid.dygraph.to_variable(images)
        label=fluid.dygraph.to_variable(labels)
        
        predict=model(image)       
        acc=fluid.layers.accuracy(predict,label)
        accs.append(acc.numpy()[0])
        avg_acc = np.mean(accs)
    print(avg_acc)

到此整個訓練過程就結束了~
我訓練了五十個epoch的結果是96%,大家可以嘗試着更高的精度,比如換不同的優化器,多訓練幾個epoch,增大圖像分辨率等等

心得

paddle深度學習框架對於新手來講是一個不錯的選擇,作爲國內的第一款開源框架,paddle有個更好的國內生態,模型,代碼,框架等都可以很方便的在國內下載,還有aistudio這種雲平臺面去初學者搭建環境的痛苦,現在還有有gpu資源,不愧是IT三巨頭,壕無人性~
不過缺點也是有的,框架本身的錯誤代碼提示不友好,有時候不知所云,有些api封裝的太“高級”了,讓人不明白這一步的邏輯情況
希望paddle努力加強自身,繼續打磨這款國產框架!

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章