1. 读写 zip
相关模块安装可以参考 [Python] 读取 rar/zip 压缩文件
import zipfile
def ls_zip(zip_file_path):
'''获取zip压缩文件夹dir'''
_zip = zipfile.ZipFile(zip_file_path)
zip_dir = _zip.namelist()
return zip_dir
def read_zip(zip_file_path, csv_file_name, header=None, dtype=None, low_memory=False):
'''读取zip压缩文件夹中的某个csv文件'''
try:
_zip = zipfile.ZipFile(zip_file_path)
data = _zip.open(csv_file_name)
df = pd.read_csv(data, header=header, dtype=dtype, low_memory=low_memory)
return df
except Exception as e:
raise Exception(str(e))
def to_zip(to_zip_file, zip_file, is_python=False):
'''将指定文件或文件夹压缩为zip文件'''
if is_python:
with zipfile.ZipFile(zip_file,"w",zipfile.ZIP_DEFLATED) as f:
f.write(to_zip_file)
elif not is_python:
zip_file_dir = os.path.split(to_zip_file)[0]
os.system(f'cd {zip_file_dir}')
os.system(f'zip -rq {zip_file} {to_zip_file}')
2. 读写 HDF
import h5py
def ls_hdf(hdf_file, group='lv1'):
f = h5py.File(hdf_file, 'r')
if group == 'lv1': # 读取一级KEY
keys = list(f.keys())
elif group == 'lv2': # 读取一级及二级KEY
keys = []
lv1_keys = list(f.keys())
for lv1_key in lv1_keys:
lv2_keys = list(f[lv1_key].keys())
lv2_keys = [f'{lv1_key}/{lv2_key}' for lv2_key in lv2_keys]
keys += lv2_keys
else:
try:
keys = list(f[group].keys())
except Exception as e:
raise ValueError('{} not exist'.format(group))
f.close()
return keys
def read_hdf(hdf_file, key):
return pd.read_hdf(hdf_file, key)
def to_hdf(data, hdf_file, key, complevel=1):
data.to_hdf(hdf_file, key, mode='a', complevel=complevel)
def del_hdf_key(hdf_file, key):
with h5py.File(hdf_file, "a") as f:
del f[key]
3. 读写 rar
相关模块安装可以参考 [Python] 读取 rar/zip 压缩文件
import rarfile
def ls_rar(rar_file_path):
'''获取rar压缩文件夹dir'''
rar = rarfile.RarFile(rar_file_path)
rar_dir = rar.namelist()
return rar_dir
def read_rar(rar_file_path, csv_file_name, header=None, dtype=None):
'''使用Pandas读取rar压缩文件夹中的某个csv文件'''
rar = rarfile.RarFile(rar_file_path)
data = rar.open(csv_file_name)
df = pd.read_csv(data, header=header, dtype=dtype)
if type(df) is pd.DataFrame:
return df
else:
raise ValueError('Can not get DataFrame from {} in {}'.format(csv_file_name, rar_file_path))
def to_rar(to_rar_files, rar_file_path):
if type(to_rar_files) == list:
to_rar_files = ' '.join(to_rar_files)
cmd = 'rar a {rar_file_path} {to_rar_files}'
os.system(cmd)
4. 读写 targz
import tarfile
def ls_tar(tar_file_path):
tar = tarfile.open(tar_file_path,'r:*')
return tar.get_names()
def read_targz(targz_file_path, member):
tar = tarfile.open(targz_file_path)
obj = tar.getmember(member)
data = tar.extractfile(obj)
return data.read()
def to_targz(source_dir, output_filename=None):
"""
将指定文件夹打包压缩为tar.gz文件
:param source_dir: 需要打包的目录
:param output_filename: 压缩文件名
:return:
"""
if output_filename == None:
output_filename = '{}.tar.gz'.format(source_dir)
try:
with tarfile.open(output_filename, "w:gz") as tar:
tar.add(source_dir, arcname=os.path.basename(source_dir))
except Exception as e:
print(e)
def extract_tar_single(tar_file_path, file_name, save_path):
'''抽取并保存一个文件到本地'''
with tarfile.open(tar_file_path,'r:*') as tar:
tar.extract(file_name, save_path)
def extract_tar_all(tar_file_path, save_path):
'''抽取并保存所有文件到本地'''
with tarfile.open(tar_file_path,'r:*') as tar:
tar.extractall(save_path)
5. 读写 bcolz
import bcolz
def ls_bcolz(bcolz_file_path, arg='line_map'):
f = bcolz.open(bcolz_file_path)
index = f.attrs[arg]
return index
def read_bcolz(bcolz_file_path):
f = bcolz.open(bcolz_file_path)
return f.todataframe()
def to_bcolz(df, bcolz_file_path):
bcolz.ctable.fromdataframe(df, rootdir=bcolz_file_path)