適用於多類別的數據集文件的掃描,可檢索數量、文件類型,防止自己創建數據集時出現紕漏,根據具體文件結構情況稍微改動即可
import sys
import shutil
import glob
def get_all(path):
return glob.glob(path)
def get_iterator(path): # 返回一個可迭代的數據
return glob.iglob(path)
def main(controll):
path = "E:\Desktop\Zhou\data13\*"
# glob_dir = get_all(path) #可使用相對或絕對路徑
# print(glob_dir)
iglob_dir = get_iterator(path)
for first_subpath in iglob_dir: # 對應sketch和view目錄
first_sub_name = first_subpath.split("\\")[-1]
print("\033[1;35;46m"+first_sub_name+"統計如下:\033[0m")
second_subpath = get_iterator(first_subpath+"\*")
class_num = 0
sketch_train_num = 0
sketch_test_num = 0
views_train_num = 0
views_test_num = 0
total_type = set()
for class_path in second_subpath: # 對應每個樣本類的目錄
class_num = class_num+1
class_name = class_path.split("\\")[-1]
if controll:
print(" "+class_name+":")
third_subpath = get_iterator(class_path+"\*") # 對應每個類的train和test文件夾
for train_or_test_path in third_subpath:
train_or_test_name = train_or_test_path.split("\\")[-1]
all_files = get_all(train_or_test_path+"\*")
per_class_type = set()
per_class_type.update(str(image_type.split(".")[-1]) for image_type in all_files)
total_type = set.union(total_type, per_class_type)
if controll:
print(" "+train_or_test_name+":",end="")
print(len(all_files),end=" ")
print("包含的文件類型有:",per_class_type)
if first_sub_name == "sketch" and train_or_test_name == "train":
sketch_train_num += len(all_files)
if first_sub_name == "views" and train_or_test_name == "train" :
views_train_num += len(all_files)
if first_sub_name == "sketch" and train_or_test_name == "test":
sketch_test_num += len(all_files)
if first_sub_name == "views" and train_or_test_name == "test" :
views_test_num += len(all_files)
print("class_num===========================================================", class_num)
if first_sub_name == "sketch":
print("sketch_train_num====================================================",sketch_train_num)
print("sketch_test_num=====================================================",sketch_test_num)
print("包含的文件類型有====================================================",total_type)
if first_sub_name == "views":
print("views_train_num=====================================================", views_train_num)
print("views_test_num======================================================", views_test_num)
print("包含的文件類型有====================================================", total_type)
if __name__ == '__main__':
controll = 0 # 是否顯示每個類的詳細信息,顯示就寫1
main(controll)