1. 讀寫 zip
相關模塊安裝可以參考 [Python] 讀取 rar/zip 壓縮文件
import zipfile
def ls_zip(zip_file_path):
'''獲取zip壓縮文件夾dir'''
_zip = zipfile.ZipFile(zip_file_path)
zip_dir = _zip.namelist()
return zip_dir
def read_zip(zip_file_path, csv_file_name, header=None, dtype=None, low_memory=False):
'''讀取zip壓縮文件夾中的某個csv文件'''
try:
_zip = zipfile.ZipFile(zip_file_path)
data = _zip.open(csv_file_name)
df = pd.read_csv(data, header=header, dtype=dtype, low_memory=low_memory)
return df
except Exception as e:
raise Exception(str(e))
def to_zip(to_zip_file, zip_file, is_python=False):
'''將指定文件或文件夾壓縮爲zip文件'''
if is_python:
with zipfile.ZipFile(zip_file,"w",zipfile.ZIP_DEFLATED) as f:
f.write(to_zip_file)
elif not is_python:
zip_file_dir = os.path.split(to_zip_file)[0]
os.system(f'cd {zip_file_dir}')
os.system(f'zip -rq {zip_file} {to_zip_file}')
2. 讀寫 HDF
import h5py
def ls_hdf(hdf_file, group='lv1'):
f = h5py.File(hdf_file, 'r')
if group == 'lv1': # 讀取一級KEY
keys = list(f.keys())
elif group == 'lv2': # 讀取一級及二級KEY
keys = []
lv1_keys = list(f.keys())
for lv1_key in lv1_keys:
lv2_keys = list(f[lv1_key].keys())
lv2_keys = [f'{lv1_key}/{lv2_key}' for lv2_key in lv2_keys]
keys += lv2_keys
else:
try:
keys = list(f[group].keys())
except Exception as e:
raise ValueError('{} not exist'.format(group))
f.close()
return keys
def read_hdf(hdf_file, key):
return pd.read_hdf(hdf_file, key)
def to_hdf(data, hdf_file, key, complevel=1):
data.to_hdf(hdf_file, key, mode='a', complevel=complevel)
def del_hdf_key(hdf_file, key):
with h5py.File(hdf_file, "a") as f:
del f[key]
3. 讀寫 rar
相關模塊安裝可以參考 [Python] 讀取 rar/zip 壓縮文件
import rarfile
def ls_rar(rar_file_path):
'''獲取rar壓縮文件夾dir'''
rar = rarfile.RarFile(rar_file_path)
rar_dir = rar.namelist()
return rar_dir
def read_rar(rar_file_path, csv_file_name, header=None, dtype=None):
'''使用Pandas讀取rar壓縮文件夾中的某個csv文件'''
rar = rarfile.RarFile(rar_file_path)
data = rar.open(csv_file_name)
df = pd.read_csv(data, header=header, dtype=dtype)
if type(df) is pd.DataFrame:
return df
else:
raise ValueError('Can not get DataFrame from {} in {}'.format(csv_file_name, rar_file_path))
def to_rar(to_rar_files, rar_file_path):
if type(to_rar_files) == list:
to_rar_files = ' '.join(to_rar_files)
cmd = 'rar a {rar_file_path} {to_rar_files}'
os.system(cmd)
4. 讀寫 targz
import tarfile
def ls_tar(tar_file_path):
tar = tarfile.open(tar_file_path,'r:*')
return tar.get_names()
def read_targz(targz_file_path, member):
tar = tarfile.open(targz_file_path)
obj = tar.getmember(member)
data = tar.extractfile(obj)
return data.read()
def to_targz(source_dir, output_filename=None):
"""
將指定文件夾打包壓縮爲tar.gz文件
:param source_dir: 需要打包的目錄
:param output_filename: 壓縮文件名
:return:
"""
if output_filename == None:
output_filename = '{}.tar.gz'.format(source_dir)
try:
with tarfile.open(output_filename, "w:gz") as tar:
tar.add(source_dir, arcname=os.path.basename(source_dir))
except Exception as e:
print(e)
def extract_tar_single(tar_file_path, file_name, save_path):
'''抽取並保存一個文件到本地'''
with tarfile.open(tar_file_path,'r:*') as tar:
tar.extract(file_name, save_path)
def extract_tar_all(tar_file_path, save_path):
'''抽取並保存所有文件到本地'''
with tarfile.open(tar_file_path,'r:*') as tar:
tar.extractall(save_path)
5. 讀寫 bcolz
import bcolz
def ls_bcolz(bcolz_file_path, arg='line_map'):
f = bcolz.open(bcolz_file_path)
index = f.attrs[arg]
return index
def read_bcolz(bcolz_file_path):
f = bcolz.open(bcolz_file_path)
return f.todataframe()
def to_bcolz(df, bcolz_file_path):
bcolz.ctable.fromdataframe(df, rootdir=bcolz_file_path)