關於人臉關鍵點的數據集WFLW數據預處理

參考：

由6,14以及68點人臉關鍵點計算頭部姿態

dlib https://www.zhihu.com/question/34524316

模型：https://astuteinternet.dl.sourceforge.net/project/dclib/dlib/v18.10/shape_predictor_68_face_landmarks.dat.bz2

https://blog.csdn.net/ChuiGeDaQiQiu/article/details/88623267

3D數據

https://github.com/yinguobing/head-pose-estimation

https://www.learnopencv.com/facial-landmark-detection/

重磅！頭部姿態估計「原理詳解 + 實戰代碼」來啦！:

https://zhuanlan.zhihu.com/p/51208197

使用opencv和dlib進行人臉姿態估計(python)

https://blog.csdn.net/yuanlulu/article/details/82763170

head-pose-estimation--提供3D模型

https://github.com/YadiraF/face3d

https://github.com/lincolnhard/head-pose-estimation

其中關於可視化的一些問題：參考：https://github.com/lincolnhard/head-pose-estimation

比如

還有這種：

原始的：

import os
import cv2
import numpy as np

# self.img = cv2.imread(self.line[0])
# self.landmark = np.asarray(self.line[1:197], dtype=np.float32)
# self.attribute = np.asarray(self.line[197:203], dtype=np.int32)
# self.euler_angle = np.asarray(self.line[203:206], dtype=np.float32)

def groberect(points, ww, hh):
    x1 = points[0]
    y1 = points[1]
    x2 = points[2]
    y2 = points[3]

    w = x2 - x1 + 1
    h = y2 - y1 + 1
    px = float(x1 + x2) / 2
    py = float(y1 + y2) / 2

    w = w * 1.3
    h = h * 1.3

    l = max(0, px - w / 2)
    r = min(ww - 1, px + w / 2)
    t = max(0, py - h / 2)
    b = min(hh - 1, py + h / 2)

    # x1 y1 x2 y2
    return [int(l), int(t), int(r), int(b)]

file_ynh = open("./WFLW_annotations/list_98pt_rect_attr_train_test/list_98pt_rect_attr_train.txt",'r')
lines = file_ynh.readlines()
count = len(lines)
num = 1
for line in lines:
    print(num, "/", count)
    num += 1
    line = line.strip().split()
    landmark = line[0:196]
    detection = line[196:200]
    attributes = line[200:206]
    name = line[206:207]
    img = cv2.imread("./WFLW_images/" + str(name[0]))
    if img is None:
        exit()
        continue
    h,w = img.shape[0:2]

    # cv2.rectangle(img, (int(detection[0]),int(detection[1])), (int(detection[2]),int(detection[3])), (0, 255, 0), 2, 8)
    # for index in range(0, len(landmark), 2):
    #     cv2.circle(img, (int(float(landmark[index])), int(float(landmark[index+1]))), 1, (255, 0, 0), -1)
    # cv2.imshow("img.jpg", img)
    # cv2.waitKey(0)

    detection = list(map(int, detection))
    rect = groberect(detection, w, h)
    rectimg = img[rect[1]:rect[3],rect[0]:rect[2],:]

    landmark = list(map(float, landmark))
    for index in range(0, len(landmark), 2):
        if( (landmark[index]-rect[0])<0 or (landmark[index+1]-rect[1])<0 ):
            print("特徵點超出擴展框，應該改變策略，使用特徵點和原始框的最小外接矩形做擴展框")
            print(landmark[index]-rect[0],"\n")
            print(landmark[index+1]-rect[1], "\n")
        cv2.circle(rectimg, (int(landmark[index]-rect[0]), int(landmark[index+1]-rect[1])), 1, (255, 0, 0), -1)
    cv2.imwrite("./result/"+"img_%s.jpg"%(str(num)), rectimg)
    # cv2.imshow("rectimg.jpg", rectimg)
    # cv2.waitKey(0)

file_ynh.close()

寫完自己的代碼才發現，官方有，我，，，，，，內心羊駝在奔跑。。。。。。。。官方的放在了最後

最小外接矩形改進：

import os
import cv2
import numpy as np

# self.img = cv2.imread(self.line[0])
# self.landmark = np.asarray(self.line[1:197], dtype=np.float32)
# self.attribute = np.asarray(self.line[197:203], dtype=np.int32)
# self.euler_angle = np.asarray(self.line[203:206], dtype=np.float32)

def groberect(points, ww, hh):
    x1 = points[0]
    y1 = points[1]
    x2 = points[2]
    y2 = points[3]

    w = x2 - x1 + 1
    h = y2 - y1 + 1
    px = float(x1 + x2) / 2
    py = float(y1 + y2) / 2

    w = w * 1.2
    h = h * 1.2

    l = max(0, px - w / 2)
    r = min(ww - 1, px + w / 2)
    t = max(0, py - h / 2)
    b = min(hh - 1, py + h / 2)

    # x1 y1 x2 y2
    return [int(l), int(t), int(r), int(b)]

file_ynh = open("./WFLW_annotations/list_98pt_rect_attr_train_test/list_98pt_rect_attr_train.txt",'r')
lines = file_ynh.readlines()
count = len(lines)
num = 1
for line in lines:
    print(num, "/", count)
    num += 1
    line = line.strip().split()
    landmark = line[0:196]
    detection = line[196:200]
    attributes = line[200:206]
    name = line[206:207]
    img = cv2.imread("./WFLW_images/" + str(name[0]))
    if img is None:
        exit()
        continue
    h,w = img.shape[0:2]

    # cv2.rectangle(img, (int(detection[0]),int(detection[1])), (int(detection[2]),int(detection[3])), (0, 255, 0), 2, 8)
    # for index in range(0, len(landmark), 2):
    #     cv2.circle(img, (int(float(landmark[index])), int(float(landmark[index+1]))), 1, (255, 0, 0), -1)
    # cv2.imshow("img.jpg", img)
    # cv2.waitKey(0)

    landmark = list(map(float, landmark))
    new_landmark = []
    for index in range(0, len(landmark), 2):
        new_landmark.append([landmark[index], landmark[index+1]])

    landmark_array = np.asarray(new_landmark)
    xmax, ymax = landmark_array.max(axis=0)
    xmin, ymin = landmark_array.min(axis=0)
    #座標中有負數，爲了crop，應該判斷
    xmin = xmin if xmin > 0 else 0
    ymin = ymin if ymin > 0 else 0
    xmax = xmax if xmax < w-1 else w-1
    ymax = ymax if ymax < h-1 else h-1

    detection = list(map(int, detection))
    new_detection = []
    new_detection.append(detection[0] if detection[0] < int(xmin+0.5) else int(xmin+0.5))
    new_detection.append(detection[1] if detection[1] < int(ymin+0.5) else int(ymin+0.5))
    new_detection.append(detection[2] if detection[2] > int(xmax+0.5) else int(xmax+0.5))
    new_detection.append(detection[3] if detection[3] > int(ymax+0.5) else int(ymax+0.5))
    #new_detection = [detection[x] if detection[x] > landList[x] else landList[x] for x in range(len(detection))]

    rect = groberect(new_detection, w, h)
    rectimg = img[rect[1]:rect[3],rect[0]:rect[2],:]

    for index in range(0, len(landmark), 2):
        x,y = landmark[index], landmark[index+1]
        x = x if x > 0 else 0
        y = y if y > 0 else 0
        x = x if x < w - 1 else w - 1
        y = y if y < h - 1 else h - 1

        if( (x-rect[0])<0 or (y-rect[1])<0 ):
            print("特徵點超出擴展框，應該改變策略，使用特徵點和原始框的最小外接矩形做擴展框")
            print(x-rect[0],"\n")
            print(y-rect[1], "\n")

        cv2.circle(rectimg, (int(x-rect[0]), int(y-rect[1])), 1, (255, 0, 0), -1)
    cv2.imwrite("./result/"+"img_%s.jpg"%(str(num)), rectimg)
    # cv2.imshow("rectimg.jpg", rectimg)
    # cv2.waitKey(0)

file_ynh.close()

官方處理代碼：

#-*- coding: utf-8 -*-
import os
import numpy as np
import cv2
import shutil
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
#from pfld.utils import calculate_pitch_yaw_roll
debug = False

import math
def calculate_pitch_yaw_roll(landmarks_2D, cam_w=256, cam_h=256,
                             radians=False):
    """ Return the the pitch  yaw and roll angles associated with the input image.
    @param radians When True it returns the angle in radians, otherwise in degrees.
    """

    assert landmarks_2D is not None, 'landmarks_2D is None'

    # Estimated camera matrix values.
    c_x = cam_w / 2
    c_y = cam_h / 2
    f_x = c_x / np.tan(60 / 2 * np.pi / 180)
    f_y = f_x
    camera_matrix = np.float32([[f_x, 0.0, c_x], [0.0, f_y, c_y],
                                [0.0, 0.0, 1.0]])
    camera_distortion = np.float32([0.0, 0.0, 0.0, 0.0, 0.0])

    # dlib (68 landmark) trached points
    # TRACKED_POINTS = [17, 21, 22, 26, 36, 39, 42, 45, 31, 35, 48, 54, 57, 8]
    # wflw(98 landmark) trached points
    # TRACKED_POINTS = [33, 38, 50, 46, 60, 64, 68, 72, 55, 59, 76, 82, 85, 16]
    # X-Y-Z with X pointing forward and Y on the left and Z up.
    # The X-Y-Z coordinates used are like the standard coordinates of ROS (robotic operative system)
    # OpenCV uses the reference usually used in computer vision:
    # X points to the right, Y down, Z to the front
    landmarks_3D = np.float32([
        [6.825897, 6.760612, 4.402142],  # LEFT_EYEBROW_LEFT,
        [1.330353, 7.122144, 6.903745],  # LEFT_EYEBROW_RIGHT,
        [-1.330353, 7.122144, 6.903745],  # RIGHT_EYEBROW_LEFT,
        [-6.825897, 6.760612, 4.402142],  # RIGHT_EYEBROW_RIGHT,
        [5.311432, 5.485328, 3.987654],  # LEFT_EYE_LEFT,
        [1.789930, 5.393625, 4.413414],  # LEFT_EYE_RIGHT,
        [-1.789930, 5.393625, 4.413414],  # RIGHT_EYE_LEFT,
        [-5.311432, 5.485328, 3.987654],  # RIGHT_EYE_RIGHT,
        [-2.005628, 1.409845, 6.165652],  # NOSE_LEFT,
        [-2.005628, 1.409845, 6.165652],  # NOSE_RIGHT,
        [2.774015, -2.080775, 5.048531],  # MOUTH_LEFT,
        [-2.774015, -2.080775, 5.048531],  # MOUTH_RIGHT,
        [0.000000, -3.116408, 6.097667],  # LOWER_LIP,
        [0.000000, -7.415691, 4.070434],  # CHIN
    ])
    landmarks_2D = np.asarray(landmarks_2D, dtype=np.float32).reshape(-1, 2)

    # Applying the PnP solver to find the 3D pose of the head from the 2D position of the landmarks.
    # retval - bool
    # rvec - Output rotation vector that, together with tvec, brings points from the world coordinate system to the camera coordinate system.
    # tvec - Output translation vector. It is the position of the world origin (SELLION) in camera co-ords
    _, rvec, tvec = cv2.solvePnP(landmarks_3D, landmarks_2D,
                                 camera_matrix, camera_distortion)
    # Get as input the rotational vector, Return a rotational matrix

    # const double PI = 3.141592653;
    # double thetaz = atan2(r21, r11) / PI * 180;
    # double thetay = atan2(-1 * r31, sqrt(r32*r32 + r33*r33)) / PI * 180;
    # double thetax = atan2(r32, r33) / PI * 180;

    rmat, _ = cv2.Rodrigues(rvec)
    pose_mat = cv2.hconcat((rmat, tvec))
    _, _, _, _, _, _, euler_angles = cv2.decomposeProjectionMatrix(pose_mat)
    return map(lambda k: k[0], euler_angles)  # euler_angles contain (pitch, yaw, roll)

def rotate(angle, center, landmark):
    rad = angle * np.pi / 180.0
    alpha = np.cos(rad)
    beta = np.sin(rad)
    M = np.zeros((2,3), dtype=np.float32)
    M[0, 0] = alpha
    M[0, 1] = beta
    M[0, 2] = (1-alpha)*center[0] - beta*center[1]
    M[1, 0] = -beta
    M[1, 1] = alpha
    M[1, 2] = beta*center[0] + (1-alpha)*center[1]

    landmark_ = np.asarray([(M[0,0]*x+M[0,1]*y+M[0,2],
                             M[1,0]*x+M[1,1]*y+M[1,2]) for (x,y) in landmark])
    return M, landmark_

class ImageDate():
    def __init__(self, line, imgDir, image_size=112):
        self.image_size = image_size
        line = line.strip().split()
        #0-195: landmark 座標點  196-199: bbox 座標點;
        #200: 姿態(pose)         0->正常姿態(normal pose)          1->大的姿態(large pose)
        #201: 表情(expression)   0->正常表情(normal expression)    1->誇張的表情(exaggerate expression)
        #202: 照度(illumination) 0->正常照明(normal illumination)  1->極端照明(extreme illumination)
        #203: 化妝(make-up)      0->無化妝(no make-up)             1->化妝(make-up)
        #204: 遮擋(occlusion)    0->無遮擋(no occlusion)           1->遮擋(occlusion)
        #205: 模糊(blur)         0->清晰(clear)                    1->模糊(blur)
        #206: 圖片名稱
        assert(len(line) == 207)
        self.list = line
        self.landmark = np.asarray(list(map(float, line[:196])), dtype=np.float32).reshape(-1, 2)
        self.box = np.asarray(list(map(int, line[196:200])),dtype=np.int32)
        flag = list(map(int, line[200:206]))
        flag = list(map(bool, flag))
        self.pose = flag[0]
        self.expression = flag[1]
        self.illumination = flag[2]
        self.make_up = flag[3]
        self.occlusion = flag[4]
        self.blur = flag[5]
        self.path = os.path.join(imgDir, line[206])
        self.img = None

        self.imgs = []
        self.landmarks = []
        self.boxes = []

    def load_data(self, is_train, repeat, mirror=None):
        if (mirror is not None):
            with open(mirror, 'r') as f:
                lines = f.readlines()
                assert len(lines) == 1
                mirror_idx = lines[0].strip().split(',')
                mirror_idx = list(map(int, mirror_idx))
        xy = np.min(self.landmark, axis=0).astype(np.int32) 
        zz = np.max(self.landmark, axis=0).astype(np.int32)
        wh = zz - xy + 1

        center = (xy + wh/2).astype(np.int32)
        img = cv2.imread(self.path)
        boxsize = int(np.max(wh)*1.2)
        xy = center - boxsize//2
        x1, y1 = xy
        x2, y2 = xy + boxsize
        height, width, _ = img.shape
        dx = max(0, -x1)
        dy = max(0, -y1)
        x1 = max(0, x1)
        y1 = max(0, y1)

        edx = max(0, x2 - width)
        edy = max(0, y2 - height)
        x2 = min(width, x2)
        y2 = min(height, y2)

        imgT = img[y1:y2, x1:x2]
        if (dx > 0 or dy > 0 or edx > 0 or edy > 0):
            imgT = cv2.copyMakeBorder(imgT, dy, edy, dx, edx, cv2.BORDER_CONSTANT, 0)
        if imgT.shape[0] == 0 or imgT.shape[1] == 0:
            imgTT = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
            for x, y in (self.landmark+0.5).astype(np.int32):
                cv2.circle(imgTT, (x, y), 1, (0, 0, 255))
            cv2.imshow('0', imgTT)
            if cv2.waitKey(0) == 27:
                exit()
        imgT = cv2.resize(imgT, (self.image_size, self.image_size))
        landmark = (self.landmark - xy)/boxsize
        assert (landmark >= 0).all(), str(landmark) + str([dx, dy])
        assert (landmark <= 1).all(), str(landmark) + str([dx, dy])
        self.imgs.append(imgT)
        self.landmarks.append(landmark)

        if is_train:
            while len(self.imgs) < repeat:
                angle = np.random.randint(-30, 30)
                cx, cy = center
                cx = cx + int(np.random.randint(-boxsize*0.1, boxsize*0.1))
                cy = cy + int(np.random.randint(-boxsize * 0.1, boxsize * 0.1))
                M, landmark = rotate(angle, (cx,cy), self.landmark)

                imgT = cv2.warpAffine(img, M, (int(img.shape[1]*1.1), int(img.shape[0]*1.1)))

                
                wh = np.ptp(landmark, axis=0).astype(np.int32) + 1
                size = np.random.randint(int(np.min(wh)), np.ceil(np.max(wh) * 1.25))
                xy = np.asarray((cx - size // 2, cy - size//2), dtype=np.int32)
                landmark = (landmark - xy) / size
                if (landmark < 0).any() or (landmark > 1).any():
                    continue

                x1, y1 = xy
                x2, y2 = xy + size
                height, width, _ = imgT.shape
                dx = max(0, -x1)
                dy = max(0, -y1)
                x1 = max(0, x1)
                y1 = max(0, y1)

                edx = max(0, x2 - width)
                edy = max(0, y2 - height)
                x2 = min(width, x2)
                y2 = min(height, y2)

                imgT = imgT[y1:y2, x1:x2]
                if (dx > 0 or dy > 0 or edx >0 or edy > 0):
                    imgT = cv2.copyMakeBorder(imgT, dy, edy, dx, edx, cv2.BORDER_CONSTANT, 0)

                imgT = cv2.resize(imgT, (self.image_size, self.image_size))

                if mirror is not None and np.random.choice((True, False)):
                    landmark[:,0] = 1 - landmark[:,0]
                    landmark = landmark[mirror_idx]
                    imgT = cv2.flip(imgT, 1)
                self.imgs.append(imgT)
                self.landmarks.append(landmark)

    def save_data(self, path, prefix):
        attributes = [self.pose, self.expression, self.illumination, self.make_up, self.occlusion, self.blur]
        attributes = np.asarray(attributes, dtype=np.int32)
        attributes_str = ' '.join(list(map(str, attributes)))
        labels = []
        TRACKED_POINTS = [33, 38, 50, 46, 60, 64, 68, 72, 55, 59, 76, 82, 85, 16]
        for i, (img, lanmark) in enumerate(zip(self.imgs, self.landmarks)):
            assert lanmark.shape == (98, 2)
            save_path = os.path.join(path, prefix+'_'+str(i)+'.png')
            assert not os.path.exists(save_path), save_path
            cv2.imwrite(save_path, img)

            euler_angles_landmark = []
            for index in TRACKED_POINTS:
                euler_angles_landmark.append(lanmark[index])
            euler_angles_landmark = np.asarray(euler_angles_landmark).reshape((-1, 28))
            pitch, yaw, roll = calculate_pitch_yaw_roll(euler_angles_landmark[0])
            euler_angles = np.asarray((pitch, yaw, roll), dtype=np.float32)
            euler_angles_str = ' '.join(list(map(str, euler_angles)))

            landmark_str = ' '.join(list(map(str,lanmark.reshape(-1).tolist())))

            label = '{} {} {} {}\n'.format(save_path, landmark_str, attributes_str, euler_angles_str)

            labels.append(label)
        return labels
def get_dataset_list(imgDir, outDir, landmarkDir, is_train):
    with open(landmarkDir,'r') as f:
        lines = f.readlines()
        labels = []
        save_img = os.path.join(outDir, 'imgs')
        if not os.path.exists(save_img):
            os.mkdir(save_img)

        if debug:
            lines = lines[:100]
        for i, line in enumerate(lines):
            Img = ImageDate(line, imgDir)
            img_name = Img.path
            Img.load_data(is_train, 10, Mirror_file)
            _, filename = os.path.split(img_name)
            filename, _ = os.path.splitext(filename)
            label_txt = Img.save_data(save_img, str(i)+'_' + filename)
            labels.append(label_txt)
            if ((i + 1) % 100) == 0:
                print('file: {}/{}'.format(i+1, len(lines)))

    with open(os.path.join(outDir, 'list.txt'),'w') as f:
        for label in labels:
            f.writelines(label)

if __name__ == '__main__':
    root_dir = os.path.dirname(os.path.realpath(__file__))
    imageDirs = 'WFLW/WFLW_images'
    Mirror_file = 'WFLW/WFLW_annotations/Mirror98.txt'

    landmarkDirs = ['WFLW/WFLW_annotations/list_98pt_rect_attr_train_test/list_98pt_rect_attr_test.txt',
                    'WFLW/WFLW_annotations/list_98pt_rect_attr_train_test/list_98pt_rect_attr_train.txt']

    outDirs = ['test_data', 'train_data']
    for landmarkDir, outDir in zip(landmarkDirs, outDirs):
        outDir = os.path.join(root_dir, outDir)
        print(outDir)
        if os.path.exists(outDir):
            shutil.rmtree(outDir)
        os.mkdir(outDir)
        if 'list_98pt_rect_attr_test.txt' in landmarkDir:
            is_train = False
        else:
            is_train = True
        imgs = get_dataset_list(imageDirs, outDir, landmarkDir, is_train)
    print('end')

另外：

博客https://blog.csdn.net/ChuiGeDaQiQiu/article/details/88623267

中使用的是通用3D模型，相機內參和畸變也用的是通用方式，效果並不好：

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import print_function

import os
import cv2
import sys
import numpy as np
import math


class PoseEstimator:
    """Estimate head pose according to the facial landmarks"""

    def __init__(self, img_size=(480, 640)):
        self.size = img_size

        # 3D model points.
        self.model_points_6 = np.array([
            (0.0, 0.0, 0.0),  # Nose tip
            (0.0, -330.0, -65.0),  # Chin
            (-225.0, 170.0, -135.0),  # Left eye left corner
            (225.0, 170.0, -135.0),  # Right eye right corne
            (-150.0, -150.0, -125.0),  # Left Mouth corner
            (150.0, -150.0, -125.0)  # Right mouth corner
        ], dtype=float) / 4.5

        self.model_points_14 = np.array([
            (6.825897, 6.760612, 4.402142),
            (1.330353, 7.122144, 6.903745),
            (-1.330353, 7.122144, 6.903745),
            (-6.825897, 6.760612, 4.402142),
            (5.311432, 5.485328, 3.987654),
            (1.789930, 5.393625, 4.413414),
            (-1.789930, 5.393625, 4.413414),
            (-5.311432, 5.485328, 3.987654),
            (2.005628, 1.409845, 6.165652),
            (-2.005628, 1.409845, 6.165652),
            (2.774015, -2.080775, 5.048531),
            (-2.774015, -2.080775, 5.048531),
            (0.000000, -3.116408, 6.097667),
            (0.000000, -7.415691, 4.070434)], dtype=float)

        self.model_points_68 = np.array([
            [-73.393523, -29.801432, -47.667532],
            [-72.775014, -10.949766, -45.909403],
            [-70.533638, 7.929818, -44.84258],
            [-66.850058, 26.07428, -43.141114],
            [-59.790187, 42.56439, -38.635298],
            [-48.368973, 56.48108, -30.750622],
            [-34.121101, 67.246992, -18.456453],
            [-17.875411, 75.056892, -3.609035],
            [0.098749, 77.061286, 0.881698],
            [17.477031, 74.758448, -5.181201],
            [32.648966, 66.929021, -19.176563],
            [46.372358, 56.311389, -30.77057],
            [57.34348, 42.419126, -37.628629],
            [64.388482, 25.45588, -40.886309],
            [68.212038, 6.990805, -42.281449],
            [70.486405, -11.666193, -44.142567],
            [71.375822, -30.365191, -47.140426],
            [-61.119406, -49.361602, -14.254422],
            [-51.287588, -58.769795, -7.268147],
            [-37.8048, -61.996155, -0.442051],
            [-24.022754, -61.033399, 6.606501],
            [-11.635713, -56.686759, 11.967398],
            [12.056636, -57.391033, 12.051204],
            [25.106256, -61.902186, 7.315098],
            [38.338588, -62.777713, 1.022953],
            [51.191007, -59.302347, -5.349435],
            [60.053851, -50.190255, -11.615746],
            [0.65394, -42.19379, 13.380835],
            [0.804809, -30.993721, 21.150853],
            [0.992204, -19.944596, 29.284036],
            [1.226783, -8.414541, 36.94806],
            [-14.772472, 2.598255, 20.132003],
            [-7.180239, 4.751589, 23.536684],
            [0.55592, 6.5629, 25.944448],
            [8.272499, 4.661005, 23.695741],
            [15.214351, 2.643046, 20.858157],
            [-46.04729, -37.471411, -7.037989],
            [-37.674688, -42.73051, -3.021217],
            [-27.883856, -42.711517, -1.353629],
            [-19.648268, -36.754742, 0.111088],
            [-28.272965, -35.134493, 0.147273],
            [-38.082418, -34.919043, -1.476612],
            [19.265868, -37.032306, 0.665746],
            [27.894191, -43.342445, -0.24766],
            [37.437529, -43.110822, -1.696435],
            [45.170805, -38.086515, -4.894163],
            [38.196454, -35.532024, -0.282961],
            [28.764989, -35.484289, 1.172675],
            [-28.916267, 28.612716, 2.24031],
            [-17.533194, 22.172187, 15.934335],
            [-6.68459, 19.029051, 22.611355],
            [0.381001, 20.721118, 23.748437],
            [8.375443, 19.03546, 22.721995],
            [18.876618, 22.394109, 15.610679],
            [28.794412, 28.079924, 3.217393],
            [19.057574, 36.298248, 14.987997],
            [8.956375, 39.634575, 22.554245],
            [0.381549, 40.395647, 23.591626],
            [-7.428895, 39.836405, 22.406106],
            [-18.160634, 36.677899, 15.121907],
            [-24.37749, 28.677771, 4.785684],
            [-6.897633, 25.475976, 20.893742],
            [0.340663, 26.014269, 22.220479],
            [8.444722, 25.326198, 21.02552],
            [24.474473, 28.323008, 5.712776],
            [8.449166, 30.596216, 20.671489],
            [0.205322, 31.408738, 21.90367],
            [-7.198266, 30.844876, 20.328022]])

        self.focal_length = self.size[1]
        self.camera_center = (self.size[1] / 2, self.size[0] / 2)
        self.camera_matrix = np.array(
            [[self.focal_length, 0, self.camera_center[0]],
             [0, self.focal_length, self.camera_center[1]],
             [0, 0, 1]], dtype="double")

        # Assuming no lens distortion
        self.dist_coeefs = np.zeros((4, 1))

        # Rotation vector and translation vector
        self.r_vec = np.array([[0.01891013], [0.08560084], [-3.14392813]])
        self.t_vec = np.array([[-14.97821226], [-10.62040383], [-2053.03596872]])

    def get_euler_angle(self, rotation_vector):
        # calc rotation angles
        theta = cv2.norm(rotation_vector, cv2.NORM_L2)

        # transform to quaterniond
        w = math.cos(theta / 2)
        x = math.sin(theta / 2) * rotation_vector[0][0] / theta
        y = math.sin(theta / 2) * rotation_vector[1][0] / theta
        z = math.sin(theta / 2) * rotation_vector[2][0] / theta

        # pitch (x-axis rotation)
        t0 = 2.0 * (w * x + y * z)
        t1 = 1.0 - 2.0 * (x ** 2 + y ** 2)
        pitch = math.atan2(t0, t1)

        # yaw (y-axis rotation)
        t2 = 2.0 * (w * y - z * x)
        if t2 > 1.0:
            t2 = 1.0
        if t2 < -1.0:
            t2 = -1.0
        yaw = math.asin(t2)

        # roll (z-axis rotation)
        t3 = 2.0 * (w * z + x * y)
        t4 = 1.0 - 2.0 * (y ** 2 + z ** 2)
        roll = math.atan2(t3, t4)

        return pitch, yaw, roll

    def solve_pose_by_6_points(self, image_points):
        """
        Solve pose from image points
        Return (rotation_vector, translation_vector) as pose.
        """
        points_6 = np.float32([
            image_points[30], image_points[36], image_points[45],
            image_points[48], image_points[54], image_points[8]])

        _, rotation_vector, translation_vector = cv2.solvePnP(
            self.model_points_6,
            points_6,
            self.camera_matrix,
            self.dist_coeefs,
            rvec=self.r_vec,
            tvec=self.t_vec,
            useExtrinsicGuess=True)

        return rotation_vector, translation_vector

    def solve_pose_by_14_points(self, image_points):
        points_14 = np.float32([
            image_points[17], image_points[21], image_points[22], image_points[26], image_points[36],
            image_points[39], image_points[42], image_points[45], image_points[31], image_points[35],
            image_points[48], image_points[54], image_points[57], image_points[8]])

        _, rotation_vector, translation_vector = cv2.solvePnP(
            self.model_points_14,
            points_14,
            self.camera_matrix,
            self.dist_coeefs,
            rvec=self.r_vec,
            tvec=self.t_vec,
            useExtrinsicGuess=True)

        return rotation_vector, translation_vector

    def solve_pose_by_68_points(self, image_points):
        image_points = np.float32(np.expand_dims(image_points, axis=1))
        _, rotation_vector, translation_vector = cv2.solvePnP(
            self.model_points_68,
            image_points,
            self.camera_matrix,
            self.dist_coeefs,
            rvec=self.r_vec,
            tvec=self.t_vec,
            useExtrinsicGuess=True)

        return rotation_vector, translation_vector

    def draw_annotation_box(self, image, rotation_vector, translation_vector, color=(255, 255, 255), line_width=2):
        """Draw a 3D box as annotation of pose"""
        point_3d = []
        rear_size = 75
        rear_depth = 0
        point_3d.append((-rear_size, -rear_size, rear_depth))
        point_3d.append((-rear_size, rear_size, rear_depth))
        point_3d.append((rear_size, rear_size, rear_depth))
        point_3d.append((rear_size, -rear_size, rear_depth))
        point_3d.append((-rear_size, -rear_size, rear_depth))

        front_size = 100
        front_depth = 100
        point_3d.append((-front_size, -front_size, front_depth))
        point_3d.append((-front_size, front_size, front_depth))
        point_3d.append((front_size, front_size, front_depth))
        point_3d.append((front_size, -front_size, front_depth))
        point_3d.append((-front_size, -front_size, front_depth))
        point_3d = np.array(point_3d, dtype=np.float).reshape(-1, 3)

        # Map to 2d image points
        (point_2d, _) = cv2.projectPoints(point_3d,
                                          rotation_vector,
                                          translation_vector,
                                          self.camera_matrix,
                                          self.dist_coeefs)
        point_2d = np.int32(point_2d.reshape(-1, 2))

        # Draw all the lines
        cv2.polylines(image, [point_2d], True, color, line_width, cv2.LINE_AA)
        cv2.line(image, tuple(point_2d[1]), tuple(
            point_2d[6]), color, line_width, cv2.LINE_AA)
        cv2.line(image, tuple(point_2d[2]), tuple(
            point_2d[7]), color, line_width, cv2.LINE_AA)
        cv2.line(image, tuple(point_2d[3]), tuple(
            point_2d[8]), color, line_width, cv2.LINE_AA)


def run(pic_path):
    import dlib
    detector = dlib.get_frontal_face_detector()  # 加載dlib自帶的人臉檢測器
    img = cv2.imread(pic_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # opencv讀到的是BGR的矩陣
    faces = detector(img, 1)  # 檢測人臉,返回檢出的人臉框,可能有多張
    r = faces[0]  # 只取第一張臉
    predictor = dlib.shape_predictor('./shape_predictor_68_face_landmarks.dat')  # 加載關鍵點檢測模型
    ldmk = predictor(img, faces[0])  # 對指定的人臉進行特徵點檢測
    points_68 = np.matrix([[p.x, p.y] for p in ldmk.parts()])
    # points_68 = [[p.x, p.y] for p in ldmk.parts()]
    # x0, y0, x1, y1 = r.left(), r.top(), r.right(), r.bottom()
    # cv2.rectangle(img, (x0, y0), (x1, y1), (255, 0, 0), 2)  # 畫個人臉框框
    # for p in points_68:
    #     cv2.circle(img, (int(p[0]), int(p[1])), 2, (0, 255, 0), -1, 0)
    # cv2.imshow("img",cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
    # cv2.waitKey()

    pose_estimator = PoseEstimator(img_size=img.shape)
    #pose = pose_estimator.solve_pose_by_6_points(points_68)
    #pose = pose_estimator.solve_pose_by_14_points(points_68)
    pose = pose_estimator.solve_pose_by_68_points(points_68)
    pitch, yaw, roll = pose_estimator.get_euler_angle(pose[0])

    def _radian2angle(r):
        return (r / math.pi) * 180

    Y, X, Z = map(_radian2angle, [pitch, yaw, roll])
    line = 'Y:{:.1f}\nX:{:.1f}\nZ:{:.1f}'.format(Y, X, Z)
    print('{},{}'.format(os.path.basename(pic_path), line.replace('\n', ',')))

    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    y = 20
    for _, txt in enumerate(line.split('\n')):
        cv2.putText(img, txt, (20, y), cv2.FONT_HERSHEY_PLAIN, 1.3, (0, 0, 255), 1)
        y = y + 15

    ori_68 = [[p.x, p.y] for p in ldmk.parts()]
    for p in ori_68:
        cv2.circle(img, (int(p[0]), int(p[1])), 2, (0, 255, 0), -1, 0)

    cv2.imshow('img', img)
    if cv2.waitKey(-1) == 27:
        pass

    return 0


if __name__ == "__main__":
    sys.exit(run("./8_Election_Campain_Election_Campaign_8_326.jpg"))

關於人臉關鍵點的數據集WFLW數據預處理

原始的：

最小外接矩形改進：

官方處理代碼：

ubuntu-apt軟件搜索-文件搜索

mmdetection-2.1.0訓練數據

張子良汽車知識圖譜

人臉對齊算法PRNet--face3d庫(c++擴展實現和僅僅numpy實現兩個版本)

讀取xlsx保存到json

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結