在上篇博客https://blog.csdn.net/lzqg1990/article/details/89881731中,利用Python第三方庫RarFile,無需解壓,即對RAR壓縮包內文件大小進行了統計。
在這篇博客中,對統計功能進一步擴展:
- 對相同文件結構的不同城市壓縮包進行批量統計
- 統計結果中自動添加最優單位,比如將2019Byte記爲1.97MB爲最優結果。
- 按給定的城市順序將統計結果自動生成CSV
將特定功能封裝爲函數,更方便後續使用。
#coding=utf-8
#城市文件大小統計
import rarfile
#按照既定統計順序統計城市
city_ls = ['BEIJING', 'SHANGHAI', 'GUANGZHOU', 'SHENZHEN', 'SHENYANG', 'XIAN',
'CHENGDU', 'TIANJIN', 'NANJING', 'HANGZHOU', 'SUZHOU', 'ZHENGZHOU',
'WUHAN', 'NINGBO', 'WUXI', 'WENZHOU', 'CHANGSHA', 'CHANGCHUN',
'XIANGGANG', 'DALIAN', 'DONGGUAN', 'TAIYUAN', 'HEFEI', 'JINAN',
'HAERBIN', 'QINGDAO', 'CHONGQING', 'KUNMING', 'XIAMEN', 'NANNING',
'FOSHAN', 'FUZHOU', 'TANGSHAN', 'AOMEN', 'HUHEHAOTE', 'YANTAI',
'WULUMUQI', 'NANCHANG', 'YANGZHOU', 'LANZHOU', 'GUIYANG', 'BAODING',
'SHIJIAZHUANG', 'QUANZHOU', 'HAIKOU', 'SHAOXING', 'TAIZHOU', 'SANYA',
'LIUZHOU', 'WEIFANG', 'QINHUANGDAO', 'XINING', 'ZHUHAI', 'XUZHOU',
'YANCHENG', 'ZHONGSHAN','YINCHUAN', 'WUHU', 'NANTONG', 'JINHUA',
'HUIZHOU', 'CHANGZHOU']
def unitConvert(size):
#字節單位換算
convert_KB = 1024
convert_MB = 1024*1024
convert_GB = 1024*1024*1024
#計入字符串
if size < convert_MB:
convert_size = '{0:.2F}KB'.format(size/convert_KB)
elif size < convert_GB:
convert_size = '{0:.2F}MB'.format(size/convert_MB)
else:
convert_size = '{0:.2F}GB'.format(size/convert_GB)
return convert_size
def getSize(city):
#注意定義在函數外部的變量,內部使用時需聲明全局變量global
sum_S = sum_DAE = sum_XREF = sum_DAY = sum_NIGHT = sum_DAE_file = 0
rf = rarfile.RarFile(city+'.rar')
for f in rf.infolist():
#print(f.filename, f.file_size)#file_size爲字節大小 1KB=1024字節
#【1】DAE文件(MB)
#_H、_XREF、_B1、_B2文件合計大小
if '_S.dae' in f.filename:
sum_S += f.file_size
#【2】 XREF紋理庫文件(MB)
# XREF文件夾
if 'DAE/XREF' in f.filename:
sum_XREF += f.file_size
#【3】DAY紋理庫文件(MB)
# TEXTURES下DAY文件夾
if 'DAE/TEXTURES/DAY' in f.filename:
sum_DAY += f.file_size
#【4】NIGHT紋理庫文件(MB)
# TEXTURES下NIGHT文件夾
if 'DAE/TEXTURES/NIGHT' in f.filename:
sum_NIGHT += f.file_size
#【5】小計(MB)
# DAE文件夾
if '/DAE' in f.filename:
sum_DAE_file += f.file_size
#sum_DAE = sum_DAE_file-sum_S-sum_XREF-sum_DAY-sum_NIGHT
sum_DAE = sum_DAE_file-sum_XREF-sum_DAY-sum_NIGHT#最新統計時不去除S.dae
ls_size = [city,unitConvert(sum_DAE),unitConvert(sum_XREF),unitConvert(sum_DAY),unitConvert(sum_NIGHT),unitConvert(sum_DAE_file)]
print(ls_size)
return ls_size
#統計結果存入CSV
ls_title = ['城市','DAE文件','XREF紋理庫文件','DAY紋理庫文件','NIGHT紋理庫文件','小計']
print(ls_title)
f = open('size_count.csv','w')
f.write(','.join(ls_title)+'\n')
num = 0
for city in city_ls:
try:
f.write(','.join(getSize(city))+'\n')
num += 1
except:
continue
#及時關閉,否則無法寫入
f.close()
print('統計完成!共統計了{}個城市'.format(num))
- 帶單位數值各列求和:
#coding=utf-8
import csv
#import re
#待拓展:正則表達式 提取字符串中數字
f = open('zz.csv')
L=list(csv.reader(f))
##print(L)
##print(len(L))
def Sum_Size(size_list):#列表元素大小統計函數
sum_GB = sum_MB = sum_KB = 0
for size in size_list:
if 'GB' in size:
sum_GB += float(size.replace('GB',''))
elif 'MB' in size:
sum_MB += float(size.replace('MB',''))
elif 'KB' in size:
sum_KB += float(size.replace('KB',''))
else:
print('{}數據有誤請確認!'.format(size))
sum_size = sum_GB + sum_MB/1024 + sum_KB/(1024**2)
return sum_size
for i in range(5):#第i列
L_colomn = []
for j in range(len(L)):#第j行
L_colomn.append(L[j][i])
sum_size = Sum_Size(L_colomn)
print('第{0}列合計爲{1:.2f}GB'.format(i+1,sum_size))