製作lfw格式的數據集

1 製作lfw格式的數據集代碼

import glob
import os.path
import numpy as np
import os
from PIL import Image


def make_lfw_format_dataset(INPUT_DATA, extensions, save_path):
    sub_dirs = [x[0] for x in os.walk(INPUT_DATA)]
    for sub_dir in sub_dirs[1:]:
        # 獲取當前目錄下所有的有效圖片文件
        dir_name = os.path.basename(sub_dir)
        file_glob = os.path.join(INPUT_DATA, dir_name, '*.' + extensions)
        # glob.glob(file_glob)獲取指定目錄下的所有圖片,存放在file_list中
        if not os.path.exists(save_path + dir_name):
            os.makedirs(save_path + dir_name)
        for j, img_path in enumerate(glob.glob(file_glob)):
            # print(j, img_path)
            format = "%s_%04d%s"
            img = Image.open(img_path)
            img.save(format % (save_path + dir_name + "\\" + dir_name, j, ".png"))
    return


if __name__ == '__main__':
    make_lfw_format_dataset("test\\", "jpg", "test0\\")
    print("SUCCEED !!!")
    pass

2 生成lfw格式的人臉對代碼

import glob
import os.path
import numpy as np
import os


def find_not_zero_pos(sstr):
    s_flag = False
    for i in range(len(sstr)):
        if sstr[i] != "0":
            s_flag = True
            return i
    if s_flag == False:
        return len(sstr) - 1


def get_real_str(sstr):
    i = 0
    i = find_not_zero_pos(sstr)
    return sstr[i:]


def create_match_content():
    matched_result = set()
    k = 0
    sub_dirs = [x[0] for x in os.walk(INPUT_DATA)]
    while len(matched_result) < 3000:
        for sub_dir in sub_dirs[1:]:
            extensions = 'png'
            file_list = []
            dir_name = os.path.basename(sub_dir)
            file_glob = os.path.join(INPUT_DATA, dir_name, '*.' + extensions)
            # glob.glob(file_glob)獲取指定目錄下的所有圖片
            file_list.extend(glob.glob(file_glob))
            if not file_list: continue
            if len(file_list) >= 2:
                label_name = dir_name
                length = len(file_list)
                random_number1 = np.random.randint(length)
                random_number2 = np.random.randint(length)
                while random_number1 == random_number2:
                    random_number1 = np.random.randint(length)
                    random_number2 = np.random.randint(length)
                base_name1 = os.path.basename(file_list[random_number1 % length])
                base_name2 = os.path.basename(file_list[random_number2 % length])
                if(file_list[random_number1%length] != file_list[random_number2%length]):
                    base_name1 = base_name1.split("_")[-1]
                    base_name1 = base_name1.split(".")[0]
                    base_name2 = base_name2.split("_")[-1]
                    base_name2 = base_name2.split(".")[0]
                    matched_result.add(label_name + ' ' + get_real_str(base_name1) + ' ' + get_real_str(base_name2))
                    # print(label_name + ' ' + get_real_str(base_name1) + ' ' + get_real_str(base_name2))
                    k = k + 1
    return matched_result, k


def create_unmatch_content():
    """不同類的匹配對"""
    unmatched_result = set()
    k = 0
    while len(unmatched_result) < 3000:
        sub_dirs = [x[0] for x in os.walk(INPUT_DATA)]
        length_of_dir = len(sub_dirs)
        for j in range(24):
            for i in range(1, length_of_dir):
                class1 = sub_dirs[i]
                random_number = np.random.randint(length_of_dir)
                while random_number == 0 | random_number == i:
                    random_number = np.random.randint(length_of_dir)
                class2 = sub_dirs[random_number]
                class1_name = os.path.basename(class1)
                class2_name = os.path.basename(class2)
                extensions = 'png'
                file_list1 = []
                file_list2 = []
                file_glob1 = os.path.join(INPUT_DATA, class1_name, '*.' + extensions)
                file_list1.extend(glob.glob(file_glob1))
                file_glob2 = os.path.join(INPUT_DATA, class2_name, '*.' + extensions)
                file_list2.extend(glob.glob(file_glob2))
                if file_list1 and file_list2:
                    base_name1 = os.path.basename(file_list1[j % len(file_list1)])
                    base_name2 = os.path.basename(file_list2[j % len(file_list2)])
                    base_name1 = base_name1.split("_")[-1]
                    base_name1 = base_name1.split(".")[0]
                    base_name1 = get_real_str(base_name1)
                    base_name2 = base_name2.split("_")[-1]
                    base_name2 = base_name2.split(".")[0]
                    base_name2 = get_real_str(base_name2)
                    s = class2_name + ' ' + base_name2 + ' ' + class1_name + ' ' + base_name1
                    if(s not in unmatched_result):
                        unmatched_result.add(s)
                        if len(unmatched_result) > 3000:
                            break
                    k = k + 1
    return unmatched_result, k


if __name__ == '__main__':
    # TODO 圖片數據文件夾
    INPUT_DATA = r'lfw'
    txt_path = 'pairs.txt'
    if os.path.isfile(txt_path):
        os.remove(txt_path)
    result, k1 = create_match_content()
    print(k1)
    # print(result)
    result_un, k2 = create_unmatch_content()
    print(k2)
    # print(result_un)
    file = open(txt_path, 'w')
    result1 = list(result)
    result2 = list(result_un)
    file.write('10 300\n')
    for i in range(10):
        for pair in result1[i*300:i*300+300]:
            file.write(pair + '\n')
        for pair in result2[i*300:i*300+300]:
            file.write(pair + '\n')
    file.close()

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章