[Python] 读写 zip/hdf/rar/targz/bcolz

1. 读写 zip

相关模块安装可以参考 [Python] 读取 rar/zip 压缩文件

import zipfile

def ls_zip(zip_file_path):
    '''获取zip压缩文件夹dir'''
    _zip = zipfile.ZipFile(zip_file_path)
    zip_dir = _zip.namelist()

    return zip_dir

def read_zip(zip_file_path, csv_file_name, header=None, dtype=None, low_memory=False):
    '''读取zip压缩文件夹中的某个csv文件'''
    try:
        _zip = zipfile.ZipFile(zip_file_path)
        data = _zip.open(csv_file_name)
        df = pd.read_csv(data, header=header, dtype=dtype, low_memory=low_memory)
        return df
    except Exception as e:
        raise Exception(str(e))

def to_zip(to_zip_file, zip_file, is_python=False):
    '''将指定文件或文件夹压缩为zip文件'''
    if is_python:
        with zipfile.ZipFile(zip_file,"w",zipfile.ZIP_DEFLATED) as f:
            f.write(to_zip_file)
    elif not is_python:
        zip_file_dir = os.path.split(to_zip_file)[0]
        os.system(f'cd {zip_file_dir}')
        os.system(f'zip -rq {zip_file} {to_zip_file}')

2. 读写 HDF

import h5py

def ls_hdf(hdf_file, group='lv1'):
    f = h5py.File(hdf_file, 'r')
    if group == 'lv1':  # 读取一级KEY
        keys = list(f.keys())
    elif group == 'lv2':  # 读取一级及二级KEY
        keys = []
        lv1_keys = list(f.keys())
        for lv1_key in lv1_keys:
            lv2_keys = list(f[lv1_key].keys())
            lv2_keys = [f'{lv1_key}/{lv2_key}' for lv2_key in lv2_keys]
            keys += lv2_keys
    else:
        try:
            keys = list(f[group].keys())
        except Exception as e:
            raise ValueError('{} not exist'.format(group))
    f.close()
    
    return keys

def read_hdf(hdf_file, key):
    return pd.read_hdf(hdf_file, key)

def to_hdf(data, hdf_file, key, complevel=1):
    data.to_hdf(hdf_file, key, mode='a', complevel=complevel)

def del_hdf_key(hdf_file, key):
    with h5py.File(hdf_file,  "a") as f:
        del f[key]

3. 读写 rar

相关模块安装可以参考 [Python] 读取 rar/zip 压缩文件

import rarfile

def ls_rar(rar_file_path):
    '''获取rar压缩文件夹dir'''
    rar = rarfile.RarFile(rar_file_path)
    rar_dir = rar.namelist()

    return rar_dir

 def read_rar(rar_file_path, csv_file_name, header=None, dtype=None):
    '''使用Pandas读取rar压缩文件夹中的某个csv文件'''
    rar = rarfile.RarFile(rar_file_path)
    data = rar.open(csv_file_name)
    df = pd.read_csv(data, header=header, dtype=dtype)
    if type(df) is pd.DataFrame:
        return df
    else:
        raise ValueError('Can not get DataFrame from {} in {}'.format(csv_file_name, rar_file_path))

def to_rar(to_rar_files, rar_file_path):
    if type(to_rar_files) == list:
        to_rar_files = ' '.join(to_rar_files)
    cmd = 'rar a {rar_file_path} {to_rar_files}'
    os.system(cmd)

4. 读写 targz

import tarfile

def ls_tar(tar_file_path):
    tar = tarfile.open(tar_file_path,'r:*')
    return tar.get_names()

def read_targz(targz_file_path, member):
    tar = tarfile.open(targz_file_path)
    obj = tar.getmember(member)
    data = tar.extractfile(obj)
    return data.read()

def to_targz(source_dir, output_filename=None):
    """
    将指定文件夹打包压缩为tar.gz文件
    :param source_dir: 需要打包的目录
    :param output_filename: 压缩文件名
    :return: 
    """
    if output_filename == None:
        output_filename = '{}.tar.gz'.format(source_dir)
    try:
        with tarfile.open(output_filename, "w:gz") as tar:
            tar.add(source_dir, arcname=os.path.basename(source_dir))
    except Exception as e:
        print(e) 

def extract_tar_single(tar_file_path, file_name, save_path):
	'''抽取并保存一个文件到本地'''
    with tarfile.open(tar_file_path,'r:*') as tar:
        tar.extract(file_name, save_path)

def extract_tar_all(tar_file_path, save_path):
    '''抽取并保存所有文件到本地'''
    with tarfile.open(tar_file_path,'r:*') as tar:
        tar.extractall(save_path)

5. 读写 bcolz

import bcolz

def ls_bcolz(bcolz_file_path, arg='line_map'):
    f = bcolz.open(bcolz_file_path)
    index = f.attrs[arg]
    return index

def read_bcolz(bcolz_file_path):
    f = bcolz.open(bcolz_file_path)
    return f.todataframe()

def to_bcolz(df, bcolz_file_path):
    bcolz.ctable.fromdataframe(df, rootdir=bcolz_file_path)
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章