姿態識別過程需要對人體進行檢測,然後再對姿態進行識別,下面是基於SSD進行的單目標檢測
# -*- coding: utf-8 -*-
# @FileName: ssd_body_detect.py
#### 主要想引入之前訓練的單人體檢測模型
import numpy as np
import os
import tensorflow as tf
import cv2
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
PATH_TO_CKPT ='./frozen_inference_graph.pb'
PATH_TO_LABELS = './pascal_label_map.pbtxt'
image_ext = ['jpg', 'jpeg', 'png']
video_ext = ['avi', 'mp4']
### 是圖片
def is_image(file_name):
ext = file_name[file_name.rfind('.') + 1:].lower()
return ext in image_ext
### 是視頻
def is_video(file_name):
ext = file_name[file_name.rfind('.') + 1:].lower()
return ext in video_ext
def is_cap(file_name):
file_name=0
return file_name
def parse_label_files(file_path):
f = open(file_path)
lines = f.read().splitlines()
lines = [l.strip() for l in lines]
f.close()
try:
lines.remove("")
except ValueError:
pass
ids = []
classes = []
for line in lines:
if ":" in line:
current_line_split = line.split(" ")
try:
current_line_split.remove("")
except ValueError:
pass
if "id:" in current_line_split:
ids.append(current_line_split[1])
if "name:" in current_line_split:
classes.append(current_line_split[1].strip("'"))
result = {}
for i in range(len(ids)):
result[i + 1] = {"id": ids[i], "name": classes[i]}
return result
def bbox_result(img_width, img_height, boxes, classes, scores,category_index, use_normalized_coordinates=False,max_boxes_to_draw=20, min_score_thresh=.7, agnostic_mode=False):
import collections
box_to_display_str_map = collections.defaultdict(list)
if not max_boxes_to_draw:
max_boxes_to_draw = boxes.shape[0]
for i in range(min(max_boxes_to_draw, boxes.shape[0])):
if scores is None or scores[i] > min_score_thresh:
box = tuple(boxes[i].tolist())
if scores is None:
return None
else:
if not agnostic_mode:
if classes[i] in category_index.keys():
class_name = category_index[classes[i]]['name']
else:
class_name = 'N/A'
display_str = '{}: {}%'.format(class_name, int(100 * scores[i]))
else:
display_str = 'score: {}%'.format(int(100 * scores[i]))
box_to_display_str_map[box].append(display_str)
results = []
for _key, _val in box_to_display_str_map.items():
y_min, x_min, y_max, x_max = _key
name_confidence = _val[0]
name_confidence_split = name_confidence.split(":")
class_name = name_confidence_split[0]
confidence = int(name_confidence_split[1][:-1])
if use_normalized_coordinates:
(x_min, x_max, y_min, y_max) = (x_min * img_width, x_max * img_width, y_min * img_height, y_max * img_height)
results.append([x_min, y_min, x_max, y_max, class_name, confidence])
return results
def body_detection_frame(sess,image_np, category_index, image_tensor,detection_boxes, detection_scores, detection_classes, num_detections):
##### 人體檢測
img_height = image_np.shape[0]
img_width = image_np.shape[1]
image_np_expanded = np.expand_dims(image_np, axis=0)
#### Actual detection.
(boxes, scores, classes, num) = sess.run([detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
##### 目標框的檢測
final_result = bbox_result(img_width, img_height, np.squeeze(boxes), np.squeeze(classes).astype(np.int32),
np.squeeze(scores), category_index, use_normalized_coordinates=True)
if len(final_result) == 0:
return image_np, final_result
else:
for result in final_result:
class_name = result[4]
# confidence = result[1] #### 主要是預測的分值
x_min = int(result[0])
y_min = int(result[1])
x_max = int(result[2])
y_max = int(result[3])
cv2.rectangle(image_np, (x_min, y_min), (x_max, y_max), (0, 0, 255), 8)
cv2.putText(image_np, class_name, (x_min, y_min), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
return image_np, final_result
def body_detection(sess,image_np,category_index,image_tensor,detection_boxes,detection_scores,detection_classes, num_detections):
img_height = image_np.shape[0]
img_width = image_np.shape[1]
image_np_expanded = np.expand_dims(image_np, axis=0)
#### Actual detection.
(boxes, scores, classes, num) = sess.run([detection_boxes, detection_scores, detection_classes, num_detections],feed_dict={image_tensor: image_np_expanded})
##### 目標框的檢測
final_result = bbox_result(img_width, img_height, np.squeeze(boxes), np.squeeze(classes).astype(np.int32),
np.squeeze(scores), category_index, use_normalized_coordinates=True)
if len(final_result)==0:
return image_np, final_result
else:
for result in final_result:
class_name = result[4]
# confidence = result[1] #### 主要是預測的分值
x_min = int(result[0])
y_min = int(result[1])
x_max = int(result[2])
y_max = int(result[3])
cv2.rectangle(image_np, (x_min, y_min), (x_max, y_max), (0, 0, 255), 8)
cv2.putText(image_np, class_name, (x_min, y_min), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
return image_np, final_result
def body_detection_main(sess, category_index,image_tensor,detection_boxes, detection_scores, detection_classes, num_detections):
# if os.path.isdir(test_file_path):
# filenames = [os.path.join(test_file_path, '{}'.format(i)) for i in os.listdir(test_file_path)]
# for filename in filenames:
# image_np = cv2.imread(filename)
# image_np, boxes = body_detection(sess,image_np,category_index,image_tensor,detection_boxes,detection_scores,detection_classes, num_detections)
# # cv2.imshow("body_detection", image_np)
# # cv2.waitKey(0)
# return image_np, boxes
# elif is_image(test_file_path):
# image_np = cv2.imread(test_file_path)
# image_np, boxes = body_detection(sess,image_np,category_index,image_tensor,detection_boxes,detection_scores,detection_classes, num_detections)
# # cv2.imshow("body_detect_image", image_np)
# # cv2.waitKey(0)
# return image_np, boxes
# elif is_video(test_file_path):
# #### 需要對視頻進行人體檢測處理
# cam = cv2.VideoCapture(test_file_path)
# while True:
# frame_got, frame = cam.read()
# if frame_got is False:
# break
# start = cv2.getTickCount()
# image_np, boxes = body_detection_frame(sess, frame, category_index,image_tensor,detection_boxes, detection_scores, detection_classes, num_detections)
# fps_time = (cv2.getTickCount() - start) / cv2.getTickFrequency()
# print('body detect %.1ffps fps' % (1 / fps_time))
#
# cv2.imshow("body_detect_video", image_np)
# if cv2.waitKey(1) & 0xFF == ord('q'):
# break
#
# # cv2.waitKey(0)
# cam.release()
# cv2.destroyAllWindows()
# if is_cap():
cam = cv2.VideoCapture(0)
while True:
frame_got, frame = cam.read()
if frame_got is False:
break
start = cv2.getTickCount()
image_np, boxes = body_detection_frame(sess, frame, category_index, image_tensor, detection_boxes,
detection_scores, detection_classes, num_detections)
fps_time = (cv2.getTickCount() - start) / cv2.getTickFrequency()
print('body detect %.1ffps fps' % (1 / fps_time))
cv2.imshow("body_detect_video", image_np)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# cv2.waitKey(0)
cam.release()
cv2.destroyAllWindows()
# else:
# print("傳入的參數需是:文件夾、圖片或者視頻的路徑!!!")
if __name__ == '__main__':
filepath = 0
#### 標籤的解析以及模型的初始化操作
category_index = parse_label_files(PATH_TO_LABELS)
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
##### 進行人體檢測
body_detection_main( sess,category_index,image_tensor,detection_boxes, detection_scores, detection_classes, num_detections)
權重下載地址:
鏈接:https://pan.baidu.com/s/1nBe1tsDqHdetBxgGkOcYQg
提取碼:t0je