這是根據CVPR2017的論文代碼改寫的Pytorch版,也可以看做是OpenPose的代碼。
今天對這個repo裏的一個demo代碼進行了閱讀,把其中比較難懂的部分做了註釋。
後續可能會根據論文做一個代碼對比流程。
- Source Code:
https://github.com/tensorboy/pytorch_Realtime_Multi-Person_Pose_Estimation/blob/master/picture_demo.py - Require:
- Pytorch 0.3.1
- Python
參考文章:
https://www.cnblogs.com/demian/p/8988396.html
Code:
import os
import re
import sys
import cv2
import math
import time
import scipy
import argparse
import matplotlib
from torch import np # Pytorch裏可以直接加載numpy,但是官網文檔裏查不到
import pylab as plt
from joblib import Parallel, delayed
import util
import torch
import torch as T
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from collections import OrderedDict
from config_reader import config_reader
from scipy.ndimage.filters import gaussian_filter
#parser = argparse.ArgumentParser()
#parser.add_argument('--t7_file', required=True)
#parser.add_argument('--pth_file', required=True)
#args = parser.parse_args()
torch.set_num_threads(torch.get_num_threads())
weight_name = './model/pose_model.pth'
blocks = {}
'''
18個關節,pt19爲背景
part_str = [nose, neck, Rsho, Relb, Rwri, Lsho, Lelb, Lwri, Rhip, Rkne, Rank, Lhip, Lkne, Lank, Leye, Reye, Lear, Rear, pt19]
'''
# find connection in the specified sequence, center 29 is in the position 15
# 18個關節對應19個關節鏈接(也就是肢體)
limbSeq = [[2,3], [2,6], [3,4], [4,5], [6,7], [7,8], [2,9], [9,10], \
[10,11], [2,12], [12,13], [13,14], [2,1], [1,15], [15,17], \
[1,16], [16,18], [3,17], [6,18]]
# the middle joints heatmap correpondence
# 肢體對應的PAF特徵圖(19-56共38張,每兩張表示一個二維方向向量)
mapIdx = [[31,32], [39,40], [33,34], [35,36], [41,42], [43,44], [19,20], [21,22], \
[23,24], [25,26], [27,28], [29,30], [47,48], [49,50], [53,54], [51,52], \
[55,56], [37,38], [45,46]]
# visualize
colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
[0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
[170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
# 以下爲構造網絡部分,很好理解
block0 = [{'conv1_1':[3,64,3,1,1]},{'conv1_2':[64,64,3,1,1]},{'pool1_stage1':[2,2,0]},{'conv2_1':[64,128,3,1,1]},{'conv2_2':[128,128,3,1,1]},{'pool2_stage1':[2,2,0]},{'conv3_1':[128,256,3,1,1]},{'conv3_2':[256,256,3,1,1]},{'conv3_3':[256,256,3,1,1]},{'conv3_4':[256,256,3,1,1]},{'pool3_stage1':[2,2,0]},{'conv4_1':[256,512,3,1,1]},{'conv4_2':[512,512,3,1,1]},{'conv4_3_CPM':[512,256,3,1,1]},{'conv4_4_CPM':[256,128,3,1,1]}]
blocks['block1_1'] = [{'conv5_1_CPM_L1':[128,128,3,1,1]},{'conv5_2_CPM_L1':[128,128,3,1,1]},{'conv5_3_CPM_L1':[128,128,3,1,1]},{'conv5_4_CPM_L1':[128,512,1,1,0]},{'conv5_5_CPM_L1':[512,38,1,1,0]}]
blocks['block1_2'] = [{'conv5_1_CPM_L2':[128,128,3,1,1]},{'conv5_2_CPM_L2':[128,128,3,1,1]},{'conv5_3_CPM_L2':[128,128,3,1,1]},{'conv5_4_CPM_L2':[128,512,1,1,0]},{'conv5_5_CPM_L2':[512,19,1,1,0]}]
# 185 = 128 + 19 + 38
for i in range(2,7):
blocks['block%d_1'%i] = [{'Mconv1_stage%d_L1'%i:[185,128,7,1,3]},{'Mconv2_stage%d_L1'%i:[128,128,7,1,3]},{'Mconv3_stage%d_L1'%i:[128,128,7,1,3]},{'Mconv4_stage%d_L1'%i:[128,128,7,1,3]},
{'Mconv5_stage%d_L1'%i:[128,128,7,1,3]},{'Mconv6_stage%d_L1'%i:[128,128,1,1,0]},{'Mconv7_stage%d_L1'%i:[128,38,1,1,0]}]
blocks['block%d_2'%i] = [{'Mconv1_stage%d_L2'%i:[185,128,7,1,3]},{'Mconv2_stage%d_L2'%i:[128,128,7,1,3]},{'Mconv3_stage%d_L2'%i:[128,128,7,1,3]},{'Mconv4_stage%d_L2'%i:[128,128,7,1,3]},
{'Mconv5_stage%d_L2'%i:[128,128,7,1,3]},{'Mconv6_stage%d_L2'%i:[128,128,1,1,0]},{'Mconv7_stage%d_L2'%i:[128,19,1,1,0]}]
def make_layers(cfg_dict):
layers = []
for i in range(len(cfg_dict)-1):
one_ = cfg_dict[i]
for k,v in one_.iteritems():
if 'pool' in k:
layers += [nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2] )]
else:
conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride = v[3], padding=v[4])
layers += [conv2d, nn.ReLU(inplace=True)]
one_ = cfg_dict[-1].keys()
k = one_[0]
v = cfg_dict[-1][k]
conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride = v[3], padding=v[4])
layers += [conv2d]
return nn.Sequential(*layers)
layers = []
for i in range(len(block0)):
one_ = block0[i]
for k,v in one_.iteritems():
if 'pool' in k:
layers += [nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2] )]
else:
conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride = v[3], padding=v[4])
layers += [conv2d, nn.ReLU(inplace=True)]
models = {}
models['block0']=nn.Sequential(*layers)
for k,v in blocks.iteritems():
models[k] = make_layers(v)
class pose_model(nn.Module):
def __init__(self,model_dict,transform_input=False):
super(pose_model, self).__init__()
self.model0 = model_dict['block0']
self.model1_1 = model_dict['block1_1']
self.model2_1 = model_dict['block2_1']
self.model3_1 = model_dict['block3_1']
self.model4_1 = model_dict['block4_1']
self.model5_1 = model_dict['block5_1']
self.model6_1 = model_dict['block6_1']
self.model1_2 = model_dict['block1_2']
self.model2_2 = model_dict['block2_2']
self.model3_2 = model_dict['block3_2']
self.model4_2 = model_dict['block4_2']
self.model5_2 = model_dict['block5_2']
self.model6_2 = model_dict['block6_2']
def forward(self, x):
out1 = self.model0(x)
out1_1 = self.model1_1(out1)
out1_2 = self.model1_2(out1)
out2 = torch.cat([out1_1,out1_2,out1],1)
out2_1 = self.model2_1(out2)
out2_2 = self.model2_2(out2)
out3 = torch.cat([out2_1,out2_2,out1],1)
out3_1 = self.model3_1(out3)
out3_2 = self.model3_2(out3)
out4 = torch.cat([out3_1,out3_2,out1],1)
out4_1 = self.model4_1(out4)
out4_2 = self.model4_2(out4)
out5 = torch.cat([out4_1,out4_2,out1],1)
out5_1 = self.model5_1(out5)
out5_2 = self.model5_2(out5)
out6 = torch.cat([out5_1,out5_2,out1],1)
out6_1 = self.model6_1(out6)
out6_2 = self.model6_2(out6)
return out6_1,out6_2 #分別輸入38(PAF),19(關節+背景)維置信圖
model = pose_model(models)
model.load_state_dict(torch.load(weight_name))
model.cuda()
model.float()
# 因爲這是demo代碼,所以是直接用訓練好的模型,把Model調整爲eval模式
model.eval()
param_, model_ = config_reader()
#torch.nn.functional.pad(img pad, mode='constant', value=model_['padValue'])
tic = time.time()
test_image = './sample_image/ski.jpg'
#test_image = 'a.jpg'
oriImg = cv2.imread(test_image) # B,G,R order
imageToTest = Variable(T.transpose(T.transpose(T.unsqueeze(torch.from_numpy(oriImg).float(),0),2,3),1,2),volatile=True).cuda()
#multiplier是用四種不同尺度的圖像去作爲輸入,有利於學習關節的空間關係
multiplier = [x * model_['boxsize'] / oriImg.shape[0] for x in param_['scale_search']]
heatmap_avg = torch.zeros((len(multiplier),19,oriImg.shape[0], oriImg.shape[1])).cuda()
paf_avg = torch.zeros((len(multiplier),38,oriImg.shape[0], oriImg.shape[1])).cuda()
#print heatmap_avg.size()
toc =time.time()
print 'time is %.5f'%(toc-tic)
tic = time.time()
#對不同尺度的圖進行處理
for m in range(len(multiplier)):
scale = multiplier[m]
h = int(oriImg.shape[0]*scale)
w = int(oriImg.shape[1]*scale)
pad_h = 0 if (h%model_['stride']==0) else model_['stride'] - (h % model_['stride'])
pad_w = 0 if (w%model_['stride']==0) else model_['stride'] - (w % model_['stride'])
new_h = h+pad_h
new_w = w+pad_w
imageToTest = cv2.resize(oriImg, (0,0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) # 原圖進行scale變換
imageToTest_padded, pad = util.padRightDownCorner(imageToTest, model_['stride'], model_['padValue'])
imageToTest_padded = np.transpose(np.float32(imageToTest_padded[:,:,:,np.newaxis]), (3,2,0,1))/256 - 0.5
feed = Variable(T.from_numpy(imageToTest_padded)).cuda()
output1,output2 = model(feed)
print output1.size()
print output2.size()
#利用上採樣將特徵圖變換成原圖大小
heatmap = nn.UpsamplingBilinear2d((oriImg.shape[0], oriImg.shape[1])).cuda()(output2)
paf = nn.UpsamplingBilinear2d((oriImg.shape[0], oriImg.shape[1])).cuda()(output1)
heatmap_avg[m] = heatmap[0].data
paf_avg[m] = paf[0].data
toc =time.time()
print 'time is %.5f'%(toc-tic)
tic = time.time()
heatmap_avg = T.transpose(T.transpose(T.squeeze(T.mean(heatmap_avg, 0)),0,1),1,2).cuda()
paf_avg = T.transpose(T.transpose(T.squeeze(T.mean(paf_avg, 0)),0,1),1,2).cuda()
heatmap_avg=heatmap_avg.cpu().numpy()
paf_avg = paf_avg.cpu().numpy()
toc =time.time()
print 'time is %.5f'%(toc-tic)
tic = time.time()
#以下是根據預測的19張關節特徵圖尋找關節點對應的位置(類似於圖像分割)
all_peaks = []
peak_counter = 0
for part in range(18):
map_ori = heatmap_avg[:,:,part]
map = gaussian_filter(map_ori, sigma=3)
map_left = np.zeros(map.shape)
map_left[1:,:] = map[:-1,:]
map_right = np.zeros(map.shape)
map_right[:-1,:] = map[1:,:]
map_up = np.zeros(map.shape)
map_up[:,1:] = map[:,:-1]
map_down = np.zeros(map.shape)
map_down[:,:-1] = map[:,1:]
#尋找局部極值
peaks_binary = np.logical_and.reduce((map>=map_left, map>=map_right, map>=map_up, map>=map_down, map > param_['thre1']))
# peaks_binary = T.eq(
# peaks = zip(T.nonzero(peaks_binary)[0],T.nonzero(peaks_binary)[0])
peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse
peaks_with_score = [x + (map_ori[x[1],x[0]],) for x in peaks]
id = range(peak_counter, peak_counter + len(peaks))
peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))]
all_peaks.append(peaks_with_score_and_id) # [[y, x, peak_score, id)],...]
peak_counter += len(peaks)
#以下是根據預測出的38張paf特徵圖來預測關節鏈接(肢體)
connection_all = []
special_k = []
mid_num = 10
#計算線性積分(對應論文part assosiation部分和Fig 6)
for k in range(len(mapIdx)):
score_mid = paf_avg[:,:,[x-19 for x in mapIdx[k]]]# channel爲2的paf_avg,表示PAF向量
candA = all_peaks[limbSeq[k][0]-1]#第k個limb中左關節點的候選集合A(不同人的關節點)
candB = all_peaks[limbSeq[k][1]-1]#第k個limb中右關節點的候選集合B(不同人的關節點)
nA = len(candA)
nB = len(candB)
indexA, indexB = limbSeq[k]
if(nA != 0 and nB != 0):#有候選開始連接
connection_candidate = []
#連接所有檢測出的關節點(nA * nB)
for i in range(nA):
for j in range(nB):
#計算單位向量
vec = np.subtract(candB[j][:2], candA[i][:2])
norm = math.sqrt(vec[0]*vec[0] + vec[1]*vec[1])
vec = np.divide(vec, norm)
#在A[i],B[j]連線上取mid_num個採樣點
startend = zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
np.linspace(candA[i][1], candB[j][1], num=mid_num))
#根據特徵圖取採樣點的paf向量
vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
for I in range(len(startend))])
vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
for I in range(len(startend))])
#計算餘弦值,用來衡量相似度
score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
score_with_dist_prior = sum(score_midpts)/len(score_midpts) + min(0.5*oriImg.shape[0]/norm-1, 0)
#評判連接有效的兩個標準
criterion1 = len(np.nonzero(score_midpts > param_['thre2'])[0]) > 0.8 * len(score_midpts)
criterion2 = score_with_dist_prior > 0
if criterion1 and criterion2:
connection_candidate.append([i, j, score_with_dist_prior, score_with_dist_prior+candA[i][2]+candB[j][2]])
#對所有連接進行排序
connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
connection = np.zeros((0,5))
#留下對於每個關節點得分最高的連接,連接數保證不大於nA,nB的最小值
for c in range(len(connection_candidate)):
i,j,s = connection_candidate[c][0:3]
if(i not in connection[:,3] and j not in connection[:,4]):
connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]]) # A_id, B_id, score, i, j
if(len(connection) >= min(nA, nB)):
break
connection_all.append(connection)
else:
special_k.append(k)
connection_all.append([])
'''
function: 將檢測的關節點連接拼成人
subset: last number in each row is the total parts number of that person
subset: the second last number in each row is the score of the overall configuration
candidate: 候選關節點
connection_all: 候選limb
ps: 這段代碼要先看not found的狀態,生成subset
'''
subset = -1 * np.ones((0, 20))
candidate = np.array([item for sublist in all_peaks for item in sublist]# 一個id的(y,x,score,id)(關節點)
for k in range(len(mapIdx)):
if k not in special_k:
partAs = connection_all[k][:,0]# 第k個limb,左端點的候選id集合
partBs = connection_all[k][:,1]# 第k個limb,右端點的候選id集合
indexA, indexB = np.array(limbSeq[k]) - 1# 關節點index
for i in range(len(connection_all[k])): #= 1:size(temp,1)
found = 0
subset_idx = [-1, -1]
for j in range(len(subset)): #1:size(subset,1):遍歷每個人(subset)
if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
subset_idx[found] = j
found += 1
# 關節點在subset裏只出現一次(比如人的肩肘已經連接,此時要連接肘腕,而肘就是公共點),這構造新連接,此subset的關節數+1
if found == 1:
j = subset_idx[0]
if(subset[j][indexB] != partBs[i]):
subset[j][indexB] = partBs[i]
subset[j][-1] += 1
subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
# 對一個新連接來說,左右端點都出現,說明左右端點間還沒有連接起來。
elif found == 2: # if found 2 and disjoint, merge them
j1, j2 = subset_idx
print "found = 2"
membership = ((subset[j1]>=0).astype(int) + (subset[j2]>=0).astype(int))[:-2]
# 如果兩個人的相同關節點沒有在各自的subset中都連成limb,那麼合併兩個subset構成一個人,關節數爲兩人各自關節數相加。
if len(np.nonzero(membership == 2)[0]) == 0: #merge
subset[j1][:-2] += (subset[j2][:-2] + 1)#+1的原因是初始值爲-1
subset[j1][-2:] += subset[j2][-2:]
subset[j1][-2] += connection_all[k][i][2]
subset = np.delete(subset, j2, 0)
# 以下這段沒看出具體的作用
else: # as like found == 1
subset[j1][indexB] = partBs[i]
subset[j1][-1] += 1
subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
# 每出現新的關節連接組合,則說明多了一個人,於是加一個subset,且關節數+2
# if find no partA in the subset, create a new subset
elif not found and k < 17:
row = -1 * np.ones(20)
row[indexA] = partAs[i]
row[indexB] = partBs[i]
row[-1] = 2
row[-2] = sum(candidate[connection_all[k][i,:2].astype(int), 2]) + connection_all[k][i][2]
subset = np.vstack([subset, row])
# 設置評判條件,不滿足條件則不可稱爲人(刪除subset)
# delete some rows of subset which has few parts occur
deleteIdx = [];
for i in range(len(subset)):
if subset[i][-1] < 4 or subset[i][-2]/subset[i][-1] < 0.4:
deleteIdx.append(i)
subset = np.delete(subset, deleteIdx, axis=0)
canvas = cv2.imread(test_image) # B,G,R order
for i in range(18):
for j in range(len(all_peaks[i])):
cv2.circle(canvas, all_peaks[i][j][0:2], 4, colors[i], thickness=-1)
stickwidth = 4
# 關節及肢體顯示
for i in range(17):
for n in range(len(subset)):
index = subset[n][np.array(limbSeq[i])-1]
if -1 in index:
continue
cur_canvas = canvas.copy()
Y = candidate[index.astype(int), 0]
X = candidate[index.astype(int), 1]
mX = np.mean(X)
mY = np.mean(Y)
length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
polygon = cv2.ellipse2Poly((int(mY),int(mX)), (int(length/2), stickwidth), int(angle), 0, 360, 1)
cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
#Parallel(n_jobs=1)(delayed(handle_one)(i) for i in range(18))
toc =time.time()
print 'time is %.5f'%(toc-tic)
cv2.imwrite('result.png',canvas)