PaddlePaddle|CV疫情特輯(五):人流密度檢測

PaddlePaddle|CV疫情特輯(五):人流密度檢測

本節內容來自:百度AIstudio課程
做一個記錄。

試題說明
近年來,應用於監控場景的行人分析視覺技術日益受到廣泛關注。包括人體檢測、人體屬性識別、人流密度估計等技術在內的多種視覺技術,已獲得在居家、安防、新零售等多個重要領域的廣泛應用。其中作用於人流密集場景的人流密度估計技術(crowd density estimation)因其遠高於肉眼計數的準確率和速度,已廣泛應用於機場、車站、運營車輛、藝術展館等多種場景,一方面可有效防止擁擠踩踏、超載等隱患發生,另一方面還可幫助零售商等統計客流。本試題以人流密度估計作爲內容,答題者需要以對應主題作爲技術核心,開發出能適用於密集、稀疏、高空、車載等多種複雜場景的通用人流密度估計算法,準確估計出輸入圖像中的總人數。

任務描述
要求參賽者給出一個算法或模型,對於給定的圖片,統計圖片中的總人數。給定圖片數據,選手據此訓練模型,爲每張測試數據預測出最準確的人數。

數據說明
本競賽所用訓練和測試圖片均來自一般監控場景,但包括多種視角(如低空、高空、魚眼等),圖中行人的相對尺寸也會有較大差異。部分訓練數據參考了公開數據集(如ShanghaiTech [1], UCF-CC-50 [2], WorldExpo’10 [3],Mall [4] 等)。

本競賽的數據標註均在對應json文件中,每張訓練圖片的標註爲以下兩種方式之一:

  • (1)部分數據對圖中行人提供了方框標註(boundingbox),格式爲[x, y, w, h][x,y,w,h];
  • (2)部分圖對圖中行人提供了頭部的打點標註,座標格式爲[x, y][x,y]。

此外部分圖片還提供了忽略區(ignore_region)標註,格式爲[x_0, y_0, x_1, y_1, …, x_n, y_n]組成的多邊形(注意一張圖片可能有多個多邊形忽略區),圖片在忽略區內的部分不參與訓練/測試

提交答案
考試提交,需要提交模型代碼項目版本和結果文件。結果文件爲CSV文件格式,可以自定義文件名稱,文件內的字段需要按照指定格式寫入,其中,id表示圖片文件名,predicted表示圖片中行人個數。

1.解壓數據集


# 解壓數據集
!unzip -q -o data/data1917/train_new.zip
!unzip -q -o data/data1917/test_new.zip

2. 觀察數據

# 導入包
import zipfile
import paddle
import paddle.fluid as fluid
import matplotlib.pyplot as plt
import matplotlib.image as mping
from PIL import Image
import json
import numpy as np
import cv2
import sys
import time
# import scipy.io as io
from matplotlib import pyplot as plt
from scipy.ndimage.filters import gaussian_filter 
import scipy
from matplotlib import cm as CM
from paddle.utils.plot import Ploter

import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear, Conv2DTranspose
from paddle.fluid.dygraph.base import to_variable
start = time.time()
#把圖片對應的標籤裝入字典
f = open('/home/aistudio/data/data1917/train.json',encoding='utf-8')
content = json.load(f)

print(content.keys())
print('info:',content['info'])
print('stage:',content['stage'])
print('split:',content['split'])
print(content['annotations'][0].keys())
print(content['annotations'][0]['type'])
print(content['annotations'][0][ 'id'])
print(content['annotations'][0]['ignore_region'])
print(content['annotations'][0]['name'])
print(content['annotations'][0]['num'])

輸出:

dict_keys(['info', 'split', 'annotations', 'stage'])
info: Baidu Star AI Competition 2018
stage: 1
split: train
dict_keys(['name', 'id', 'num', 'ignore_region', 'type', 'annotation'])
bbox
625
[]
stage1/train/61a4091324d1983534ca23b6f007f841.jpg
28
#把stage1都去掉:
for j in range(len(content['annotations'])):
    content['annotations'][j]['name'] = content['annotations'][j]['name'].lstrip('stage1').lstrip('/')
print(content['annotations'][1]['name'])

train/71e5bc76196e91f26426b7facbcc0843.jpg

#讀取解壓文件裏的信息
zfile = zipfile.ZipFile("data/data1917/train_new.zip")
l = []  # l中存儲了train中所有的圖片路徑
for fname in zfile.namelist()[1:]:
    # print(fname)
    l.append(fname)
print(l[3])
name = l[3]
im = Image.open(name)
plt.imshow(im)
train/002be7f228f584630bde7582c9dbaffb.jpg
<matplotlib.image.AxesImage at 0x7fc4ad838e90>

在這裏插入圖片描述

#查看標註的信息

for j in range(len(content['annotations'])):
    if content['annotations'][j]['name'] == name:
        print('id = ',content['annotations'][j]['id'])   #圖片id
        ann = content['annotations'][j]['annotation']
print(ann)       #圖片標註格式是x,y,w,h,有些只有x,y
print('有標註的個數:',len(ann))
#可視化第三個標註的信息
lab = 1
box = (ann[lab]['x'],ann[lab]['y'],ann[lab]['x']+ann[lab]['w'],ann[lab]['y']+ann[lab]['h'])
new_img = im.crop(box=box)
plt.imshow(new_img)
id =  668
[{'y': 693, 'x': 1108, 'w': 196, 'h': 373}, {'y': 424, 'x': 1009, 'w': 118, 'h': 448}, {'y': 361, 'x': 864, 'w': 250, 'h': 249}, {'y': 300, 'x': 882, 'w': 104, 'h': 342}, {'y': 128, 'x': 846, 'w': 28, 'h': 99}, {'y': 131, 'x': 870, 'w': 48, 'h': 86}, {'y': 94, 'x': 899, 'w': 22, 'h': 90}, {'y': 97, 'x': 878, 'w': 19, 'h': 74}, {'y': 60, 'x': 827, 'w': 23, 'h': 62}, {'y': 44, 'x': 792, 'w': 16, 'h': 48}, {'y': 46, 'x': 799, 'w': 22, 'h': 59}, {'y': 67, 'x': 778, 'w': 26, 'h': 84}, {'y': 98, 'x': 788, 'w': 38, 'h': 86}, {'y': 148, 'x': 653, 'w': 103, 'h': 114}, {'y': 97, 'x': 712, 'w': 35, 'h': 114}, {'y': 90, 'x': 704, 'w': 26, 'h': 108}, {'y': 89, 'x': 733, 'w': 28, 'h': 130}, {'y': 177, 'x': 637, 'w': 76, 'h': 130}, {'y': 378, 'x': 460, 'w': 240, 'h': 281}, {'y': 527, 'x': 361, 'w': 256, 'h': 332}, {'y': 498, 'x': 182, 'w': 242, 'h': 557}, {'y': 906, 'x': 164, 'w': 410, 'h': 173}, {'y': 861, 'x': 1286, 'w': 213, 'h': 218}]
有標註的個數: 23

在這裏插入圖片描述

#可視化圖片所有標註信息
width = im.size[0]    #獲取寬度
height = im.size[1]   #獲取長度
print(width,height)
for a in range(len(ann)):        #遍歷所有標註
    for x in range(width):
        for y in range(height):             
            # r,g,b = im.getpixel((x,y))	
            if(x > (ann[a]['x']-5) and x < (ann[a]['x']+5) and y > ann[a]['y'] and y < (ann[a]['y']+ann[a]['h'])):  
                im.putpixel((x,y),(255,0,0))        #畫一條長(x,y)到(x,y+h)的紅線,紅線寬爲正負5個像素點
            if(x > (ann[a]['x']+ann[a]['w']-5) and x < (ann[a]['x']+ann[a]['w']+5) and y > ann[a]['y'] and y < (ann[a]['y']+ann[a]['h'])):
                im.putpixel((x,y),(255,0,0))       #畫一條長(x+w,y)到(x+w,y+h)的紅線,紅線寬爲正負5個像素點
            if(y > (ann[a]['y']-5) and y < (ann[a]['y']+5) and x > ann[a]['x'] and x < (ann[a]['x']+ann[a]['w'])):
                im.putpixel((x,y),(255,0,0))        #畫一條長(x,y)到(x+w,y)的紅線,紅線寬爲正負5個像素點
            if(y > (ann[a]['y']+ann[a]['h']-5) and y < (ann[a]['y']+ann[a]['h']+5) and x > ann[a]['x'] and x < (ann[a]['x']+ann[a]['w'])):
                im.putpixel((x,y),(255,0,0))        #畫一條長(x,y+h)到(x+w,y+h)的紅線,紅線寬爲正負5個像素點
plt.imshow(im)

在這裏插入圖片描述

# 根據圖片的大小,對圖片的來源進行分類
l_set = []
s_2560_1920 = []   #方框   魚眼電梯   63張 
s_928_576 = []     #點   自動售貨機   248張
s_1024_768 = []    #點   街拍        302
s_640_480 = []     #點   家拍        92
s_2048_2048 =[]    #方框   魚眼電梯   41
s_1080_1618 =[]    #濾掉             1
s_1920_1080 = []   #方框   超市       1240
s_1440_1080 =[]    #濾掉               1
s_1920_1200 =[]    #方框   街拍        12
for inde in range(2000):
    imm = Image.open(content['annotations'][inde]['name'])
    l_set.append(imm.size)
    if imm.size == (2560, 1920):s_2560_1920.append(content['annotations'][inde]['name'])
    elif imm.size == (928, 576):s_928_576.append(content['annotations'][inde]['name'])
    elif imm.size == (1024, 768):s_1024_768.append(content['annotations'][inde]['name'])
    elif imm.size == (640, 480):s_640_480.append(content['annotations'][inde]['name'])
    elif imm.size == (2048, 2048):s_2048_2048.append(content['annotations'][inde]['name'])
    elif imm.size == (1080, 1618):s_1080_1618.append(content['annotations'][inde]['name'])
    elif imm.size == (1920, 1080):s_1920_1080.append(content['annotations'][inde]['name'])
    elif imm.size == (1440, 1080):s_1440_1080.append(content['annotations'][inde]['name'])
    elif imm.size == (1920, 1200):s_1920_1200.append(content['annotations'][inde]['name'])
print(len(l_set))
sett = set(l_set)
print(sett)
print(len(s_2560_1920),len(s_928_576),len(s_1024_768),len(s_640_480),len(s_2048_2048),len(s_1080_1618),len(s_1920_1080),len(s_1440_1080),len(s_1920_1200))
print(s_1440_1080)
print(s_1080_1618)
# print(s_1024_768)

輸出:

2000
{(928, 576), (1024, 768), (640, 480), (2560, 1920), (2048, 2048), (1080, 1618), (1920, 1080), (1440, 1080), (1920, 1200)}
63 248 302 92 41 1 1240 1 12
['train/8538edb45aaf7df78336aa5b49001be6.jpg']
['train/377df0a7a9abc44e840e938521df3b54.jpg']
# 統計出所有的,以點爲圖中每個人標註的樣本
point_l = []
for f in range(2000):
    if 'w' not in content['annotations'][f]['annotation'][0]:
        point_l.append(content['annotations'][f]['name'])
# for p_name in point_l:
#     print(p_name)
print(len(point_l))
#如果標註是一個座標不是區域, 展示其中一幅圖像上 是如何使用一個點來標註人的
# name1 = 'train/b179764112252559b76a59db9fa18021.jpg'
name1 = point_l[1]
im1 = Image.open(name1)
for j in range(len(content['annotations'])):
    if content['annotations'][j]['name'] == name1:
        print('id = ',content['annotations'][j]['id'])
        ann1 = content['annotations'][j]['annotation']
# print(ann1)
print('有標註的個數:',len(ann1))
for a in range(len(ann1)):
    for x in range(im1.size[0]):
        for y in range(im1.size[1]):
            if(x > (ann1[a]['x']-10) and x < (ann1[a]['x']+10) and y > ann1[a]['y']-10 and y < (ann1[a]['y']+10)):  #取座標範圍正負10的像素        
                im1.putpixel((x,y),(255,0,0))           #對所取範圍的像素變成紅色
plt.imshow(im1)

輸出:

id =  628
有標註的個數: 7

在這裏插入圖片描述

# 上段代碼塊中的標註的gt
gt = []
for a in range(len(ann1)):
    gt.append([ann1[a]['x'],ann1[a]['y']])
print(gt)
gt = np.array(gt)
print(gt.shape)
[[43, 257], [98, 206], [333, 247], [102, 236], [247, 1032], [660, 919], [1414, 1057]]
(7, 2)
# 使用高斯濾波變換生成密度圖
def gaussian_filter_density(gt):
    #Generates a density map using Gaussian filter transformation
    # 初始化密度圖
    density = np.zeros(gt.shape, dtype=np.float32)
    
    # 獲取gt中不爲0的元素的個數
    gt_count = np.count_nonzero(gt)
    
    # 如果gt全爲0,就返回全0的密度圖
    if gt_count == 0:
        return density

    # FInd out the K nearest neighbours using a KDTree
    
    pts = np.array(list(zip(np.nonzero(gt)[1].ravel(), np.nonzero(gt)[0].ravel())))
    
    # if gt_count > 0 and gt_count < 20: 
    
    # leafsize = 2048

    # # build kdtree
    # tree = scipy.spatial.KDTree(pts.copy(), leafsize=leafsize)

    # query kdtree
    # distances, locations = tree.query(pts, k=4)

    for i, pt in enumerate(pts):
        pt2d = np.zeros(gt.shape, dtype=np.float32)
        pt2d[pt[1],pt[0]] = 1.
        if gt_count > 1:
            # sigma = (distances[i][1]+distances[i][2]+distances[i][3])*0.1
            sigma = 25
        else:
            sigma = np.average(np.array(gt.shape))/2./2. #case: 1 point
        
        #Convolve with the gaussian filter
        
        density += scipy.ndimage.filters.gaussian_filter(pt2d, sigma, mode='constant')

    return density
print(gt.shape)  

img= plt.imread(name1)
k = np.zeros((img.shape[0],img.shape[1]))

for i in range(0,len(gt)):
    if int(gt[i][1])<img.shape[0] and int(gt[i][0])<img.shape[1]:
        k[int(gt[i][1]),int(gt[i][0])]=1
    
    # generate density map
k = gaussian_filter_density(k)
# 可視化 密度圖
print(k.shape)
# groundtruth = np.asarray(k)
groundtruth = k
# groundtruth = groundtruth.resize((80,60))
print(groundtruth.shape)
plt.imshow(groundtruth,cmap=CM.jet)
print("Sum = " ,np.sum(groundtruth))
# print(groundtruth[0][59:100])

輸出:

(1080, 1920)
(1080, 1920)
Sum =  6.7463903
#圖片操作
# 1. resize 到固定尺寸 本例用做448*448
# 2. 歸一化到0~1之間
def picture_opt(img,ann):
    size_x,size_y = img.size
    # print("size_x:",size_x)
    # print("size_y:",size_y)
    train_img_size = (448,448)
    img = img.resize(train_img_size,Image.ANTIALIAS)
    img = np.array(img)                  
    img = img / 255.0

    gt = []
    for b_l in range(len(ann)):
        # print("ann[b_l]:",ann[b_l])
        # 假設人體是使用方框標註的,通過求均值的方法將框變爲點
        if 'w' in ann[b_l].keys(): 
            x = (ann[b_l]['x']+(ann[b_l]['x']+ann[b_l]['w']))/2
            y = ann[b_l]['y']+20
            x = (x*448/size_x)/4
            y = (y*448/size_y)/4
            gt.append((x,y))   
        else:
            x = ann[b_l]['x']
            y = ann[b_l]['y']
            x = (x*448/size_x)/4
            y = (y*448/size_y)/4
            gt.append((x,y)) 
    # 返回resize後的圖片 和 gt
    # print("img.shape:",img.shape)
    # print("gt:",gt)
    return img,gt
#密度圖處理
def ground(img,gt):
    imgs = img
    x = imgs.shape[0]/4
    y = imgs.shape[1]/4
    k = np.zeros((int(x),int(y)))

    for i in range(0,len(gt)):
        if int(gt[i][1]) < int(x) and int(gt[i][0]) < int(y):
            k[int(gt[i][1]),int(gt[i][0])]=1

        # generate density map
    k = gaussian_filter_density(k)
    return k
#方框變點
qt = []
img = Image.open(content['annotations'][2]['name'])   
ann = content['annotations'][2]['annotation']
print(img.size)
temp = img.resize((112, 112),Image.ANTIALIAS)
im,qt = picture_opt(img,ann)
print(im.shape)
print(qt)
plt.imshow(im)
for a in range(len(qt)):
    for x in range(temp.size[0]):
        for y in range(temp.size[1]):
            if(x > (qt[a][0]-1) and x < (qt[a][0]+1) and y > qt[a][1]-1 and y < (qt[a][1]+1)):  #取座標範圍正負10的像素        
                temp.putpixel((x,y),(255,0,0))           #對所取範圍的像素變成紅色
# plt.imshow(temp)
k = ground(im,qt)
print(type(k))
# plt.imshow(k)
print(np.sum(k))
print(len(ann))

輸出:

(928, 576)
(448, 448, 3)
[(40.43103448275862, 48.416666666666664), (34.63793103448276, 36.94444444444444)]
<class 'numpy.ndarray'>
1.7635766
2

在這裏插入圖片描述

# 定義數據生成器
def train_set():
    def inner():
        for ig_index in range(2000):                  #遍歷所有圖片
            if len(content['annotations'][ig_index]['annotation']) == 2:continue
            if len(content['annotations'][ig_index]['annotation']) == 3:continue
            if content['annotations'][ig_index]['name'] == 'train/8538edb45aaf7df78336aa5b49001be6.jpg':continue
            if content['annotations'][ig_index]['name'] == 'train/377df0a7a9abc44e840e938521df3b54.jpg':continue
            if content['annotations'][ig_index]['ignore_region']:                      #把忽略區域都用像素爲0填上
                ig_list = []                                                           #存放忽略區1的數據
                ig_list1 = []                                                          #存放忽略區2的數據
                # print(content['annotations'][ig_index]['ignore_region'])
                if len(content['annotations'][ig_index]['ignore_region'])==1:           #因爲每張圖的忽略區域最多2個,這裏是爲1的情況
                    # print('ig1',ig_index)
                    ign_rge = content['annotations'][ig_index]['ignore_region'][0]       #取第一個忽略區的數據
                    for ig_len in range(len(ign_rge)):                                   #遍歷忽略區座標個數,組成多少變型
                        ig_list.append([ign_rge[ig_len]['x'],ign_rge[ig_len]['y']])       #取出每個座標的x,y然後組成一個小列表放到ig_list
                    ig_cv_img = cv2.imread(content['annotations'][ig_index]['name'])      #用cv2讀取一張圖片
                    pts = np.array(ig_list,np.int32)                                      #把ig_list轉成numpy.ndarray數據格式,爲了填充需要
                    cv2.fillPoly(ig_cv_img,[pts],(0,0,0),cv2.LINE_AA)                           #使用cv2.fillPoly方法對有忽略區的圖片用像素爲0填充
                
                    ig_img = Image.fromarray(cv2.cvtColor(ig_cv_img,cv2.COLOR_BGR2RGB))   #cv2轉PIL
                    
                    ann = content['annotations'][ig_index]['annotation']          #把所有標註的信息讀取出來
                                                                  
                    ig_im,gt = picture_opt(ig_img,ann)
                    k = ground(ig_im,gt)
                   
                    groundtruth = np.asarray(k)
                    groundtruth = groundtruth.T.astype('float32')
                    ig_im = ig_im.transpose().astype('float32')
                    yield ig_im,groundtruth
                    
                if len(content['annotations'][ig_index]['ignore_region'])==2:           #有2個忽略區域
                    # print('ig2',ig_index)
                    ign_rge = content['annotations'][ig_index]['ignore_region'][0]
                    ign_rge1 = content['annotations'][ig_index]['ignore_region'][1]
                    for ig_len in range(len(ign_rge)):
                        ig_list.append([ign_rge[ig_len]['x'],ign_rge[ig_len]['y']])
                    for ig_len1 in range(len(ign_rge1)):
                        ig_list1.append([ign_rge1[ig_len1]['x'],ign_rge1[ig_len1]['y']])  
                    ig_cv_img2 = cv2.imread(content['annotations'][ig_index]['name'])
                    pts = np.array(ig_list,np.int32)
                    pts1 = np.array(ig_list1,np.int32)
                    cv2.fillPoly(ig_cv_img2,[pts],(0,0,0),cv2.LINE_AA)                
                    cv2.fillPoly(ig_cv_img2,[pts1],(0,0,0),cv2.LINE_AA)
                    
                    ig_img2 = Image.fromarray(cv2.cvtColor(ig_cv_img2,cv2.COLOR_BGR2RGB))   #cv2轉PIL
                    
                    ann = content['annotations'][ig_index]['annotation']          #把所有標註的信息讀取出來
                                                                  
                    ig_im,gt = picture_opt(ig_img2,ann)
                    k = ground(ig_im,gt)
                    k = np.zeros((int(ig_im.shape[0]/4),int(ig_im.shape[1]/4))) # 8 --> 4
                    
                    groundtruth = np.asarray(k)
                    groundtruth = groundtruth.T.astype('float32')
                    ig_im = ig_im.transpose().astype('float32')
                    yield ig_im,groundtruth
                    
            else:
                # print('else',ig_index,content['annotations'][ig_index]['name'])
                img = Image.open(content['annotations'][ig_index]['name'])
                ann = content['annotations'][ig_index]['annotation']          #把所有標註的信息讀取出來
                
                im,gt = picture_opt(img,ann)
                k = ground(im,gt)
                
                groundtruth = np.asarray(k)
                groundtruth = groundtruth.T.astype('float32')
                im = im.transpose().astype('float32')
                yield im,groundtruth
    return inner
BATCH_SIZE= 16     #每次取16張
# 設置訓練reader
train_reader = paddle.batch(
    paddle.reader.shuffle(
        train_set(), buf_size=5),
    batch_size=BATCH_SIZE)

3.定義模型

Resnet去掉最後的全連接層,添加四層反捲積進行上採樣,和標註生成的稠密圖大小一致:112112112∗112

模型:

# 定義卷積批歸一化塊
class ConvBNLayer(fluid.dygraph.Layer):
    def __init__(self,
                 num_channels,
                 num_filters,
                 filter_size,
                 stride=1,
                 groups=1,
                 act=None,
                 param_attr = fluid.initializer.Xavier(uniform=False)):
        """
        name_scope, 模塊的名字
        num_channels, 卷積層的輸入通道數
        num_filters, 卷積層的輸出通道數
        stride, 卷積層的步幅
        groups, 分組卷積的組數,默認groups=1不使用分組卷積
        act, 激活函數類型,默認act=None不使用激活函數
        """
        super(ConvBNLayer, self).__init__()

        # 創建卷積層
        self._conv = Conv2D(
            num_channels=num_channels,
            num_filters=num_filters,
            filter_size=filter_size,
            stride=stride,
            padding=(filter_size - 1) // 2,
            groups=groups,
            act=None,
            bias_attr=False,
            param_attr=param_attr)

        # 創建BatchNorm層
        self._batch_norm = BatchNorm(num_filters, act=act)

    def forward(self, inputs):
        y = self._conv(inputs)
        y = self._batch_norm(y)
        return y

# 定義殘差塊
# 每個殘差塊會對輸入圖片做三次卷積,然後跟輸入圖片進行短接
# 如果殘差塊中第三次卷積輸出特徵圖的形狀與輸入不一致,則對輸入圖片做1x1卷積,將其輸出形狀調整成一致
class BottleneckBlock(fluid.dygraph.Layer):
    def __init__(self,
                 name_scope,
                 num_channels,
                 num_filters,
                 stride,
                 shortcut=True):
        super(BottleneckBlock, self).__init__(name_scope)
        # 創建第一個卷積層 1x1
        self.conv0 = ConvBNLayer(
            num_channels=num_channels,
            num_filters=num_filters,
            filter_size=1,
            act='leaky_relu')
        # 創建第二個卷積層 3x3
        self.conv1 = ConvBNLayer(
            num_channels=num_filters,
            num_filters=num_filters,
            filter_size=3,
            stride=stride,
            act='leaky_relu')
        # 創建第三個卷積 1x1,但輸出通道數乘以4
        self.conv2 = ConvBNLayer(
            num_channels=num_filters,
            num_filters=num_filters * 4,
            filter_size=1,
            act=None)

        # 如果conv2的輸出跟此殘差塊的輸入數據形狀一致,則shortcut=True
        # 否則shortcut = False,添加1個1x1的卷積作用在輸入數據上,使其形狀變成跟conv2一致
        if not shortcut:
            self.short = ConvBNLayer(
                num_channels=num_channels,
                num_filters=num_filters * 4,
                filter_size=1,
                stride=stride)

        self.shortcut = shortcut

        self._num_channels_out = num_filters * 4

    def forward(self, inputs):
        y = self.conv0(inputs)
        conv1 = self.conv1(y)
        conv2 = self.conv2(conv1)

        # 如果shortcut=True,直接將inputs跟conv2的輸出相加
        # 否則需要對inputs進行一次卷積,將形狀調整成跟conv2輸出一致
        if self.shortcut:
            short = inputs
        else:
            short = self.short(inputs)

        y = fluid.layers.elementwise_add(x=short, y=conv2)
        layer_helper = LayerHelper(self.full_name(), act='relu')
        return layer_helper.append_activation(y)

# 定義ResNet模型
class ResNet(fluid.dygraph.Layer):
    def __init__(self, name_scope, layers=50, class_dim=1):
        """
        name_scope,模塊名稱
        layers, 網絡層數,可以是50, 101或者152
        class_dim,分類標籤的類別數
        """
        super(ResNet, self).__init__(name_scope)
        self.layers = layers
        supported_layers = [18, 50, 101, 152]
        assert layers in supported_layers, \
            "supported layers are {} but input layer is {}".format(supported_layers, layers)

        if layers == 50:
            #ResNet50包含多個模塊,其中第2到第5個模塊分別包含3、4、6、3個殘差塊
            depth = [3, 4, 6, 3]
        elif layers == 101:
            #ResNet101包含多個模塊,其中第2到第5個模塊分別包含3、4、23、3個殘差塊
            depth = [3, 4, 23, 3]
        elif layers == 152:
            #ResNet50包含多個模塊,其中第2到第5個模塊分別包含3、8、36、3個殘差塊
            depth = [3, 8, 36, 3]
        elif layers == 18:
            #新建ResNet18
            depth = [2, 2, 2, 2]
        # 殘差塊中使用到的卷積的輸出通道數
        num_filters = [64, 128, 256, 512]

        # ResNet的第一個模塊,包含1個7x7卷積,後面跟着1個最大池化層
        self.conv = ConvBNLayer(
            num_channels=3,
            num_filters=64,
            filter_size=7,
            stride=2,
            act='relu')
        self.pool2d_max = Pool2D(
            pool_size=3,
            pool_stride=2,
            pool_padding=1,
            pool_type='max')

        # ResNet的第二到第五個模塊c2、c3、c4、c5
        self.bottleneck_block_list = []
        num_channels = 64
        for block in range(len(depth)):
            shortcut = False
            for i in range(depth[block]):
                bottleneck_block = self.add_sublayer(
                    'bb_%d_%d' % (block, i),
                    BottleneckBlock(
                        self.full_name(),
                        num_channels=num_channels,
                        num_filters=num_filters[block],
                        stride=2 if i == 0 and block != 0 else 1, # c3、c4、c5將會在第一個殘差塊使用stride=2;其餘所有殘差塊stride=1
                        shortcut=shortcut))
                num_channels = bottleneck_block._num_channels_out
                self.bottleneck_block_list.append(bottleneck_block)
                shortcut = True

        # 在c5的輸出特徵圖上使用全局池化
        # self.pool2d_avg = Pool2D(pool_size=7, pool_type='avg', global_pooling=False)
        self.pool2d_avg =  Pool2D(
                            pool_size=3,
                            pool_stride=2,
                            pool_padding=1,
                            pool_type='max')
        
        self.upConv1 = Conv2DTranspose(
                        num_channels=2048,
                        num_filters=1024,
                        filter_size=2,
                        stride=2,
                        padding=0,
                        dilation=1,
                        bias_attr=None,
                        act="relu")
        self.exConv1 = ConvBNLayer(
                        num_channels=3072,
                        num_filters=1024,
                        filter_size=3,
                        stride=1,
                        act='relu')
        self.upConv2 = Conv2DTranspose(
                        num_channels=1024,
                        num_filters=512,
                        filter_size=2,
                        stride=2,
                        padding=0,
                        dilation=1,
                        bias_attr=None,
                        act="relu")
        self.exConv2 =  ConvBNLayer(
                        num_channels=1536,
                        num_filters=512,
                        filter_size=3,
                        stride=1,
                        act='relu')
        self.upConv3 = Conv2DTranspose(
                        num_channels=512,
                        num_filters=256,
                        filter_size=2,
                        stride=2,
                        padding=0,
                        dilation=1,
                        bias_attr=None,
                        act="relu")
        self.exConv3 = ConvBNLayer(
                        num_channels=768,
                        num_filters=256,
                        filter_size=3,
                        stride=1,
                        act='relu')
        self.upConv4 = Conv2DTranspose(
                        num_channels=256,
                        num_filters=128,
                        filter_size=2,
                        stride=2,
                        padding=0,
                        dilation=1,
                        bias_attr=None,
                        act="relu")
        self.exConv4 = ConvBNLayer(
                        num_channels=384,
                        num_filters=128,
                        filter_size=3,
                        stride=1,
                        act='relu')
        self.feature1 = Conv2D(num_channels=128, 
                              num_filters=128, 
                              filter_size=3,
                              padding=1,
                              act="relu")
        self.feature2 = Conv2D(num_channels=128, 
                        num_filters=1, 
                        filter_size=3,
                        padding=1,
                        act="relu")
        # stdv用來作爲全連接層隨機初始化參數的方差
        import math
        stdv = 1.0 / math.sqrt(2048 * 1.0)
        
        # 創建全連接層,輸出大小爲類別數目
        self.out = Linear(input_dim=2048, output_dim=class_dim,
                      param_attr=fluid.param_attr.ParamAttr(
                          initializer=fluid.initializer.Uniform(-stdv, stdv)))

    def forward(self, inputs):
        # print("inputs:",inputs.shape)
        contact_list = list()
        depth = [1, 3, 5, 7]
        y = self.conv(inputs)
        # print("y:",y.shape)
        y = self.pool2d_max(y)
        # print(len(self.bottleneck_block_list))
        # print("y:",y.shape)
        count = 0
        for bottleneck_block in self.bottleneck_block_list:
            y = bottleneck_block(y)
            if count in depth:
                contact_list.append(y)
            #     print(y.shape)
            # print(count)
            count += 1
            # print("y:",y.shape)
        # print(len(contact_list))
        y = self.pool2d_avg(y)
        # print("y:",y.shape)
        y = self.upConv1(y) # 1024*14*14
        y = fluid.layers.concat((y, contact_list[3]), axis=1)
        y = self.exConv1(y)
        y = self.upConv2(y)
        y = fluid.layers.concat((y, contact_list[2]), axis=1)
        y = self.exConv2(y)
        y = self.upConv3(y)
        y = fluid.layers.concat((y, contact_list[1]), axis=1)
        y = self.exConv3(y)
        y = self.upConv4(y)
        y = fluid.layers.concat((y, contact_list[0]), axis=1)
        y = self.exConv4(y)
        y = self.feature1(y)
        y = self.feature2(y)
        return y

4.開始訓練

with fluid.dygraph.guard():
    model = ResNet("ResNet", layers = 18)
    # # model = ResNet("ResNet", layers = 50, class_dim = 65) #嘗試resnet50
    model.train() #訓練模式
    opt=fluid.optimizer.AdamOptimizer(learning_rate=fluid.layers.cosine_decay( learning_rate = 1e-5, step_each_epoch=120, epochs=10), parameter_list=model.parameters())
    epochs_num= 10#迭代次數爲2
    print("start")
    train_loss = list()
    for pass_num in range(epochs_num):
        
        for batch_id,datas in enumerate(train_reader()):
            
            for i in range(len(datas)):
                if i == 0:
                    imgs, labels = datas[i] 
                    imgs = imgs[np.newaxis,:]
                    labels = labels[np.newaxis,:]# 這個是通道
                    labels = labels[np.newaxis,:]# 這個是batch
                else:
                    img, label = datas[i]
                    img = img[np.newaxis, :]
                    label = label[np.newaxis, :]
                    label = label[np.newaxis, :]
                    imgs = np.concatenate((imgs, img), axis=0)
                    labels = np.concatenate((labels, label), axis=0)
            
            imgs = imgs.astype(np.float32)
            images = fluid.dygraph.to_variable(imgs)
            targets = fluid.dygraph.to_variable(labels)
            predict = model(images)  
           
            # print(predict.shape)
            # print(targets.shape)
            cost = fluid.layers.square_error_cost(predict, targets)
            # cost = fluid.layers.sqrt(cost)
            avg_loss = fluid.layers.mean(cost)
            
            train_loss.append(avg_loss.numpy())
            if batch_id!=0 and batch_id%5==0:
                print("train_pass:{},batch_id:{},train_loss:{}".format(pass_num,batch_id,avg_loss.numpy()))
                 
            
            avg_loss.backward()
            opt.minimize(avg_loss)
            model.clear_gradients()            
        if (pass_num + 1) % 2 == 0: 
            fluid.save_dygraph(model.state_dict(),'MyLeNet_{}'.format(pass_num))#保存模型
      
    fluid.save_dygraph(model.state_dict(),'MyLeNet_final')#保存模型
    print("finished")
    plt.figure(dpi = 120)
    x = range(len(train_loss))
    y = train_loss
    plt.plot(x, y, label='train')
    plt.legend(loc='upper right')
    plt.ylabel('loss')
    plt.xlabel('itear')
    plt.show()

在這裏插入圖片描述

5.模型測試

#測試圖片
import numpy as np
from PIL import Image
import paddle.fluid as fluid
import matplotlib.pyplot as plt
import zipfile

test_zfile = zipfile.ZipFile("/home/aistudio/data/data1917/test_new.zip")
l_test = []
for test_fname in test_zfile.namelist()[1:]:
    l_test.append(test_fname)
    
test_img = Image.open(l_test[2])

plt.imshow(test_img)
test_img = test_img.resize((448,448))
test_im = np.array(test_img)
test_im = test_im / 255.0
test_im = test_im.transpose().reshape(1,3,448,448).astype('float32')
with fluid.dygraph.guard():
    model, _ = fluid.load_dygraph("MyLeNet_final")
    resnet = ResNet("ResNet", layers = 18)
    resnet.load_dict(model)
    resnet.eval()
    images = fluid.dygraph.to_variable(test_im)
    predict = resnet(images)

    print(predict.numpy().sum())

輸出:8.57991
在這裏插入圖片描述

6.測試輸出保存CSV

import numpy as np
from PIL import Image
import paddle.fluid as fluid
import matplotlib.pyplot as plt
import zipfile

test_zfile = zipfile.ZipFile("/home/aistudio/data/data1917/test_new.zip")
l_test = []
for test_fname in test_zfile.namelist()[1:]:
    # print(fname)
    l_test.append(test_fname)

data_dict = {}
with fluid.dygraph.guard():
    # 加載模型
    model, _ = fluid.load_dygraph("MyLeNet_final")
    resnet = ResNet("ResNet", layers = 18)
    resnet.load_dict(model)
    resnet.eval()
    for index in range(len(l_test)):
        test_img = Image.open(l_test[index])
        test_img = test_img.resize((448,448))
        test_im = np.array(test_img)
        test_im = test_im / 255.0
        test_im = test_im.transpose().reshape(1,3,448,448).astype('float32')
        images = fluid.dygraph.to_variable(test_im)
        predict = resnet(images)

        temp=predict
        temp=temp.numpy()
        people =np.sum(temp)
        # print(people)
        name = l_test[index].split("/")[1]
        # print(name)
        
        data_dict[name]=int(people)
        
import csv
with open('results2.csv', 'w') as csvfile:

    fieldnames = ['id', 'predicted']

    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()

    for k,v in data_dict.items():
        writer.writerow({'id': k, 'predicted':v})        
    print("finished")

注意:稠密圖有一個超參數sigma,這個具體設置大小和最後檢測的人流密度息息相關,設置的越小,稠密度圖與人流密度標註的誤差就越小,但是稠密度圖就越不明顯,造成很難訓練,因此本例使用的稠密度圖是有很大限制的,可以嘗試修改成目標檢測網絡,可以提高更大的精度。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章