人臉檢測之CenterFace

論文:CenterFace: Joint Face Detection and Alignment Using Face as Point

Github:https://github.com/Star-Clouds/CenterFace

 

論文基於centerNet進行改進,提出了anchor free形式的人臉檢測框架,可以同時實現人臉檢測+關鍵點檢測。精度和速度都優於主流的MTCNN,Face Box等框架。

 

主要貢獻:

  1. 提出了anchor free的人臉檢測設計,將人臉檢測問題轉化爲關鍵點估計問題。相比之前的檢測算法,該方法的的模型輸出的下采樣率只爲4。
  2. 基於多任務學習策略,同時學習人臉檢測+關鍵點定位
  3. 網絡結構採用了FPN結構
  4. 大量的實驗表明,速度和精度都空前的好

 

級聯檢測器的缺點:

  1. 推理速度受圖片中人臉數目的影響,當人臉數目增多的時候,推理速度也會大大降低。
  2. 每一個模型都單獨訓練,訓練過程繁瑣。非端到端的訓練模式,整體精度有限。

 

網絡結構:

網絡整體結構採用MobileNetV2結構,MobileNetV2進行了5次下采樣,在MobileNetV2的最後一層,增加了3個上採樣層。最終輸出的大小進行了2次下采樣,輸出維度爲原圖的1/4。

 

Loss函數:

人臉分類loss,

其中,α = 2 ,β = 4

 

人臉框中心點偏移loss,

 

人臉框寬,高的loss,

關鍵點的loss,

 

整體loss,

 

實驗結果:

推理速度,

FDDB精度,

WIDER FACE 精度,

 

Onnx推理:

Onnx模型格式,可以方便的使用程序進行op的增刪改查操作。包括節點的增加,去除,輸入輸出維度的修改等。同時,基於onnx runtime的推理可以獲得比基於pytorch推理略快的速度。缺點就是整個graph已經固定,不支持動態輸入大小。

首先使用change_onnx.py修改作者提供的onnx模型的輸入維度,

import onnx

model = onnx.load("../models/onnx/centerface.onnx")

# The model is represented as a protobuf structure and it can be accessed
# using the standard python-for-protobuf methods

# iterate through inputs of the graph
for input in model.graph.input:
    print (input.name, end=": ")
    # get type of input tensor
    tensor_type = input.type.tensor_type
    # check if it has a shape:
    if (tensor_type.HasField("shape")):
        # iterate through dimensions of the shape:
        for num,d in enumerate(tensor_type.shape.dim):
            # the dimension may have a definite (integer) value or a symbolic identifier or neither:
            if (d.HasField("dim_value")):
                if num ==0:
                    d.dim_value = 1
                if num ==2:
                    d.dim_value = 480
                if num ==3:
                    d.dim_value = 640
                print (d.dim_value, end=", ")  # known dimension
            elif (d.HasField("dim_param")):
                print (d.dim_param, end=", ")  # unknown dimension with symbolic name
            else:
                print ("?", end=", ")  # unknown dimension with no name
    else:
        print ("unknown rank", end="")
    print()
    break
onnx.checker.check_model(model)
onnx.save(model, 'out.onnx')

模型另存在out.onnx,和原始的centerface.onnx的輸入維度進行對比,

推理代碼,centerface.py,

import numpy as np
import cv2
import datetime
import torch
import onnxruntime
import onnx

class CenterFace(object):
    def __init__(self, landmarks=True):
        self.landmarks = landmarks
        self.session = onnxruntime.InferenceSession("out.onnx")
        self.inputs = self.session.get_inputs()[0].name
        self.outputs = ["537", "538", "539", '540']


        self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = 0, 0, 0, 0

    def __call__(self, img, height, width, threshold=0.5):
        #self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = self.transform(height, width)
        self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = 480, 640 , 480/height, 640/width
        return self.inference_opencv(img, threshold)

    def inference_opencv(self, img, threshold):

        begin = datetime.datetime.now()
        image = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
        image =cv2.resize(image,(self.img_w_new, self.img_h_new))
        input_image = np.expand_dims(np.swapaxes(np.swapaxes(image,0,2),1,2),0).astype(np.float32)

        heatmap,scale , offset ,lms = self.session.run(None, {self.inputs: input_image})



        end = datetime.datetime.now()
        print("cpu times = ", end - begin)
        return self.postprocess(heatmap, lms, offset, scale, threshold)

    def transform(self, h, w):
        img_h_new, img_w_new = int(np.ceil(h / 32) * 32), int(np.ceil(w / 32) * 32)
        scale_h, scale_w = img_h_new / h, img_w_new / w
        return img_h_new, img_w_new, scale_h, scale_w

    def postprocess(self, heatmap, lms, offset, scale, threshold):
        if self.landmarks:
            dets, lms = self.decode(heatmap, scale, offset, lms, (self.img_h_new, self.img_w_new), threshold=threshold)
        else:
            dets = self.decode(heatmap, scale, offset, None, (self.img_h_new, self.img_w_new), threshold=threshold)
        if len(dets) > 0:
            dets[:, 0:4:2], dets[:, 1:4:2] = dets[:, 0:4:2] / self.scale_w, dets[:, 1:4:2] / self.scale_h
            if self.landmarks:
                lms[:, 0:10:2], lms[:, 1:10:2] = lms[:, 0:10:2] / self.scale_w, lms[:, 1:10:2] / self.scale_h
        else:
            dets = np.empty(shape=[0, 5], dtype=np.float32)
            if self.landmarks:
                lms = np.empty(shape=[0, 10], dtype=np.float32)
        if self.landmarks:
            return dets, lms
        else:
            return dets

    def decode(self, heatmap, scale, offset, landmark, size, threshold=0.1):
        heatmap = np.squeeze(heatmap)
        scale0, scale1 = scale[0, 0, :, :], scale[0, 1, :, :]
        offset0, offset1 = offset[0, 0, :, :], offset[0, 1, :, :]
        c0, c1 = np.where(heatmap > threshold)
        if self.landmarks:
            boxes, lms = [], []
        else:
            boxes = []
        if len(c0) > 0:
            for i in range(len(c0)):
                s0, s1 = np.exp(scale0[c0[i], c1[i]]) * 4, np.exp(scale1[c0[i], c1[i]]) * 4
                o0, o1 = offset0[c0[i], c1[i]], offset1[c0[i], c1[i]]
                s = heatmap[c0[i], c1[i]]
                x1, y1 = max(0, (c1[i] + o1 + 0.5) * 4 - s1 / 2), max(0, (c0[i] + o0 + 0.5) * 4 - s0 / 2)
                x1, y1 = min(x1, size[1]), min(y1, size[0])
                boxes.append([x1, y1, min(x1 + s1, size[1]), min(y1 + s0, size[0]), s])
                if self.landmarks:
                    lm = []
                    for j in range(5):
                        lm.append(landmark[0, j * 2 + 1, c0[i], c1[i]] * s1 + x1)
                        lm.append(landmark[0, j * 2, c0[i], c1[i]] * s0 + y1)
                    lms.append(lm)
            boxes = np.asarray(boxes, dtype=np.float32)
            keep = self.nms(boxes[:, :4], boxes[:, 4], 0.3)
            boxes = boxes[keep, :]
            if self.landmarks:
                lms = np.asarray(lms, dtype=np.float32)
                lms = lms[keep, :]
        if self.landmarks:
            return boxes, lms
        else:
            return boxes

    def nms(self, boxes, scores, nms_thresh):
        x1 = boxes[:, 0]
        y1 = boxes[:, 1]
        x2 = boxes[:, 2]
        y2 = boxes[:, 3]
        areas = (x2 - x1 + 1) * (y2 - y1 + 1)
        order = np.argsort(scores)[::-1]
        num_detections = boxes.shape[0]
        suppressed = np.zeros((num_detections,), dtype=np.bool)

        keep = []
        for _i in range(num_detections):
            i = order[_i]
            if suppressed[i]:
                continue
            keep.append(i)

            ix1 = x1[i]
            iy1 = y1[i]
            ix2 = x2[i]
            iy2 = y2[i]
            iarea = areas[i]

            for _j in range(_i + 1, num_detections):
                j = order[_j]
                if suppressed[j]:
                    continue

                xx1 = max(ix1, x1[j])
                yy1 = max(iy1, y1[j])
                xx2 = min(ix2, x2[j])
                yy2 = min(iy2, y2[j])
                w = max(0, xx2 - xx1 + 1)
                h = max(0, yy2 - yy1 + 1)

                inter = w * h
                ovr = inter / (iarea + areas[j] - inter)
                if ovr >= nms_thresh:
                    suppressed[j] = True

        return keep

推理代碼,demo.py

import cv2
import scipy.io as sio
import os
from centerface import CenterFace


def camera():
    cap = cv2.VideoCapture(0)
    ret, frame = cap.read()
    h, w = frame.shape[:2]
    centerface = CenterFace()
    while True:
        ret, frame = cap.read()
        dets, lms = centerface(frame, h, w, threshold=0.35)
        for det in dets:
            boxes, score = det[:4], det[4]
            cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
        for lm in lms:
            for i in range(0, 5):
                cv2.circle(frame, (int(lm[i * 2]), int(lm[i * 2 + 1])), 2, (0, 0, 255), -1)
        cv2.imshow('out', frame)
        # Press Q on keyboard to stop recording
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cap.release()


def test_image():
    #frame = cv2.imread('000388.jpg')
    frame = cv2.imread('test.jpg')
    h, w = frame.shape[:2]
    landmarks = True
    centerface = CenterFace(landmarks=landmarks)
    if landmarks:
        dets, lms = centerface(frame, h, w, threshold=0.35)
    else:
        dets = centerface(frame, threshold=0.35)

    for det in dets:
        boxes, score = det[:4], det[4]
        cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
    if landmarks:
        for lm in lms:
            for i in range(0, 5):
                cv2.circle(frame, (int(lm[i * 2]), int(lm[i * 2 + 1])), 2, (0, 0, 255), -1)
    #cv2.imshow('out', frame)
    cv2.imwrite('out.jpg',frame)
    #cv2.waitKey(0)


def test_image_tensorrt():
    frame = cv2.imread('000388.jpg')
    h, w = 480, 640  # must be 480* 640
    landmarks = True
    centerface = CenterFace(landmarks=landmarks, backend="tensorrt")
    if landmarks:
        dets, lms = centerface(frame, h, w, threshold=0.35)
    else:
        dets = centerface(frame, threshold=0.35)

    for det in dets:
        boxes, score = det[:4], det[4]
        cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
    if landmarks:
        for lm in lms:
            for i in range(0, 5):
                cv2.circle(frame, (int(lm[i * 2]), int(lm[i * 2 + 1])), 2, (0, 0, 255), -1)
    cv2.imshow('out', frame)
    cv2.waitKey(0)


def test_widerface():
    Path = 'widerface/WIDER_val/images/'
    wider_face_mat = sio.loadmat('widerface/wider_face_split/wider_face_val.mat')
    event_list = wider_face_mat['event_list']
    file_list = wider_face_mat['file_list']
    save_path = 'save_out/'

    for index, event in enumerate(event_list):
        file_list_item = file_list[index][0]
        im_dir = event[0][0]
        # print(save_path + im_dir)
        if not os.path.exists(save_path + im_dir):
            os.makedirs(save_path + im_dir)
        landmarks = True
        centerface = CenterFace(landmarks=landmarks)
        for num, file in enumerate(file_list_item):
            im_name = file[0][0]
            zip_name = '%s/%s.jpg' % (im_dir, im_name)
            print(os.path.join(Path, zip_name))
            img = cv2.imread(os.path.join(Path, zip_name))
            h, w = img.shape[:2]
            if landmarks:
                dets, lms = centerface(img, h, w, threshold=0.05)
            else:
                dets = centerface(img, threshold=0.05)
            f = open(save_path + im_dir + '/' + im_name + '.txt', 'w')
            f.write('{:s}\n'.format('%s/%s.jpg' % (im_dir, im_name)))
            f.write('{:d}\n'.format(len(dets)))
            for b in dets:
                x1, y1, x2, y2, s = b
                f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(x1, y1, (x2 - x1 + 1), (y2 - y1 + 1), s))
            f.close()
            print('event:%d num:%d' % (index + 1, num + 1))


if __name__ == '__main__':
    # camera()
    test_image()
    # test_widerface()

最終效果,

 

 

 

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章