python文件去重之遞歸去重

# -*- coding:utf-8 -*-

import os
import hashlib
import time
import sys


# 搞到文件的MD5
def get_ms5(filename):
    m = hashlib.md5()
    mfile = open(filename, "rb")
    m.update(mfile.read())
    mfile.close()
    md5_value = m.hexdigest()
    return md5_value

# 搞到文件的列表
def get_recursion_file(path):
    recursion_list = []
    for dirpath, dirnames, filenames in os.walk(path):
        for filename in filenames:
            recursion_list.append(os.path.join(dirpath, filename))
            print(os.path.join(dirpath, filename))
    return recursion_list


def get_urllist():
    base = r'F:\img\\'
    list = get_recursion_file(base)
    return list


# 主函數
if __name__ == '__main__':
    md5list = []
    urllist = get_urllist()
    print("test1")
    for a in urllist:
        md5 = get_ms5(a)
        if (md5 in md5list):
            os.remove(a)
            print("重複:%s" % a)
        else:
            md5list.append(md5)

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章