1 製作lfw格式的數據集代碼
import glob
import os.path
import numpy as np
import os
from PIL import Image
def make_lfw_format_dataset(INPUT_DATA, extensions, save_path):
sub_dirs = [x[0] for x in os.walk(INPUT_DATA)]
for sub_dir in sub_dirs[1:]:
# 獲取當前目錄下所有的有效圖片文件
dir_name = os.path.basename(sub_dir)
file_glob = os.path.join(INPUT_DATA, dir_name, '*.' + extensions)
# glob.glob(file_glob)獲取指定目錄下的所有圖片,存放在file_list中
if not os.path.exists(save_path + dir_name):
os.makedirs(save_path + dir_name)
for j, img_path in enumerate(glob.glob(file_glob)):
# print(j, img_path)
format = "%s_%04d%s"
img = Image.open(img_path)
img.save(format % (save_path + dir_name + "\\" + dir_name, j, ".png"))
return
if __name__ == '__main__':
make_lfw_format_dataset("test\\", "jpg", "test0\\")
print("SUCCEED !!!")
pass
2 生成lfw格式的人臉對代碼
import glob
import os.path
import numpy as np
import os
def find_not_zero_pos(sstr):
s_flag = False
for i in range(len(sstr)):
if sstr[i] != "0":
s_flag = True
return i
if s_flag == False:
return len(sstr) - 1
def get_real_str(sstr):
i = 0
i = find_not_zero_pos(sstr)
return sstr[i:]
def create_match_content():
matched_result = set()
k = 0
sub_dirs = [x[0] for x in os.walk(INPUT_DATA)]
while len(matched_result) < 3000:
for sub_dir in sub_dirs[1:]:
extensions = 'png'
file_list = []
dir_name = os.path.basename(sub_dir)
file_glob = os.path.join(INPUT_DATA, dir_name, '*.' + extensions)
# glob.glob(file_glob)獲取指定目錄下的所有圖片
file_list.extend(glob.glob(file_glob))
if not file_list: continue
if len(file_list) >= 2:
label_name = dir_name
length = len(file_list)
random_number1 = np.random.randint(length)
random_number2 = np.random.randint(length)
while random_number1 == random_number2:
random_number1 = np.random.randint(length)
random_number2 = np.random.randint(length)
base_name1 = os.path.basename(file_list[random_number1 % length])
base_name2 = os.path.basename(file_list[random_number2 % length])
if(file_list[random_number1%length] != file_list[random_number2%length]):
base_name1 = base_name1.split("_")[-1]
base_name1 = base_name1.split(".")[0]
base_name2 = base_name2.split("_")[-1]
base_name2 = base_name2.split(".")[0]
matched_result.add(label_name + ' ' + get_real_str(base_name1) + ' ' + get_real_str(base_name2))
# print(label_name + ' ' + get_real_str(base_name1) + ' ' + get_real_str(base_name2))
k = k + 1
return matched_result, k
def create_unmatch_content():
"""不同類的匹配對"""
unmatched_result = set()
k = 0
while len(unmatched_result) < 3000:
sub_dirs = [x[0] for x in os.walk(INPUT_DATA)]
length_of_dir = len(sub_dirs)
for j in range(24):
for i in range(1, length_of_dir):
class1 = sub_dirs[i]
random_number = np.random.randint(length_of_dir)
while random_number == 0 | random_number == i:
random_number = np.random.randint(length_of_dir)
class2 = sub_dirs[random_number]
class1_name = os.path.basename(class1)
class2_name = os.path.basename(class2)
extensions = 'png'
file_list1 = []
file_list2 = []
file_glob1 = os.path.join(INPUT_DATA, class1_name, '*.' + extensions)
file_list1.extend(glob.glob(file_glob1))
file_glob2 = os.path.join(INPUT_DATA, class2_name, '*.' + extensions)
file_list2.extend(glob.glob(file_glob2))
if file_list1 and file_list2:
base_name1 = os.path.basename(file_list1[j % len(file_list1)])
base_name2 = os.path.basename(file_list2[j % len(file_list2)])
base_name1 = base_name1.split("_")[-1]
base_name1 = base_name1.split(".")[0]
base_name1 = get_real_str(base_name1)
base_name2 = base_name2.split("_")[-1]
base_name2 = base_name2.split(".")[0]
base_name2 = get_real_str(base_name2)
s = class2_name + ' ' + base_name2 + ' ' + class1_name + ' ' + base_name1
if(s not in unmatched_result):
unmatched_result.add(s)
if len(unmatched_result) > 3000:
break
k = k + 1
return unmatched_result, k
if __name__ == '__main__':
# TODO 圖片數據文件夾
INPUT_DATA = r'lfw'
txt_path = 'pairs.txt'
if os.path.isfile(txt_path):
os.remove(txt_path)
result, k1 = create_match_content()
print(k1)
# print(result)
result_un, k2 = create_unmatch_content()
print(k2)
# print(result_un)
file = open(txt_path, 'w')
result1 = list(result)
result2 = list(result_un)
file.write('10 300\n')
for i in range(10):
for pair in result1[i*300:i*300+300]:
file.write(pair + '\n')
for pair in result2[i*300:i*300+300]:
file.write(pair + '\n')
file.close()