數據集掃描

適用於多類別的數據集文件的掃描,可檢索數量、文件類型,防止自己創建數據集時出現紕漏,根據具體文件結構情況稍微改動即可

import sys
import shutil
import glob

def get_all(path):
    return  glob.glob(path)

def get_iterator(path): # 返回一個可迭代的數據
    return glob.iglob(path)

def main(controll):
    path = "E:\Desktop\Zhou\data13\*"
    # glob_dir = get_all(path) #可使用相對或絕對路徑
    # print(glob_dir)

    iglob_dir = get_iterator(path)
    for first_subpath in iglob_dir: # 對應sketch和view目錄
        first_sub_name = first_subpath.split("\\")[-1]
        print("\033[1;35;46m"+first_sub_name+"統計如下:\033[0m")
        second_subpath = get_iterator(first_subpath+"\*")
        class_num = 0
        sketch_train_num = 0
        sketch_test_num = 0
        views_train_num = 0
        views_test_num = 0

        total_type = set()
        for class_path in second_subpath: # 對應每個樣本類的目錄
            class_num = class_num+1
            class_name = class_path.split("\\")[-1]
            if controll:
                print("  "+class_name+":")
            third_subpath = get_iterator(class_path+"\*") # 對應每個類的train和test文件夾
            for train_or_test_path in third_subpath:
                train_or_test_name = train_or_test_path.split("\\")[-1]
                all_files = get_all(train_or_test_path+"\*")
                per_class_type = set()
                per_class_type.update(str(image_type.split(".")[-1]) for image_type in all_files)
                total_type = set.union(total_type, per_class_type)
                if controll:
                    print("    "+train_or_test_name+":",end="")
                    print(len(all_files),end="  ")
                    print("包含的文件類型有:",per_class_type)
                if first_sub_name == "sketch" and train_or_test_name == "train":
                    sketch_train_num += len(all_files)
                if first_sub_name == "views" and train_or_test_name == "train" :
                    views_train_num += len(all_files)
                if first_sub_name == "sketch" and train_or_test_name == "test":
                    sketch_test_num += len(all_files)
                if first_sub_name == "views" and train_or_test_name == "test" :
                    views_test_num += len(all_files)
        print("class_num===========================================================", class_num)
        if first_sub_name == "sketch":
            print("sketch_train_num====================================================",sketch_train_num)
            print("sketch_test_num=====================================================",sketch_test_num)
            print("包含的文件類型有====================================================",total_type)

        if first_sub_name == "views":
            print("views_train_num=====================================================", views_train_num)
            print("views_test_num======================================================", views_test_num)
            print("包含的文件類型有====================================================", total_type)


if __name__ == '__main__':
    controll = 0 # 是否顯示每個類的詳細信息,顯示就寫1
    main(controll)

在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章