[Python] 讀寫 zip/hdf/rar/targz/bcolz

1. 讀寫 zip

相關模塊安裝可以參考 [Python] 讀取 rar/zip 壓縮文件

import zipfile

def ls_zip(zip_file_path):
    '''獲取zip壓縮文件夾dir'''
    _zip = zipfile.ZipFile(zip_file_path)
    zip_dir = _zip.namelist()

    return zip_dir

def read_zip(zip_file_path, csv_file_name, header=None, dtype=None, low_memory=False):
    '''讀取zip壓縮文件夾中的某個csv文件'''
    try:
        _zip = zipfile.ZipFile(zip_file_path)
        data = _zip.open(csv_file_name)
        df = pd.read_csv(data, header=header, dtype=dtype, low_memory=low_memory)
        return df
    except Exception as e:
        raise Exception(str(e))

def to_zip(to_zip_file, zip_file, is_python=False):
    '''將指定文件或文件夾壓縮爲zip文件'''
    if is_python:
        with zipfile.ZipFile(zip_file,"w",zipfile.ZIP_DEFLATED) as f:
            f.write(to_zip_file)
    elif not is_python:
        zip_file_dir = os.path.split(to_zip_file)[0]
        os.system(f'cd {zip_file_dir}')
        os.system(f'zip -rq {zip_file} {to_zip_file}')

2. 讀寫 HDF

import h5py

def ls_hdf(hdf_file, group='lv1'):
    f = h5py.File(hdf_file, 'r')
    if group == 'lv1':  # 讀取一級KEY
        keys = list(f.keys())
    elif group == 'lv2':  # 讀取一級及二級KEY
        keys = []
        lv1_keys = list(f.keys())
        for lv1_key in lv1_keys:
            lv2_keys = list(f[lv1_key].keys())
            lv2_keys = [f'{lv1_key}/{lv2_key}' for lv2_key in lv2_keys]
            keys += lv2_keys
    else:
        try:
            keys = list(f[group].keys())
        except Exception as e:
            raise ValueError('{} not exist'.format(group))
    f.close()
    
    return keys

def read_hdf(hdf_file, key):
    return pd.read_hdf(hdf_file, key)

def to_hdf(data, hdf_file, key, complevel=1):
    data.to_hdf(hdf_file, key, mode='a', complevel=complevel)

def del_hdf_key(hdf_file, key):
    with h5py.File(hdf_file,  "a") as f:
        del f[key]

3. 讀寫 rar

相關模塊安裝可以參考 [Python] 讀取 rar/zip 壓縮文件

import rarfile

def ls_rar(rar_file_path):
    '''獲取rar壓縮文件夾dir'''
    rar = rarfile.RarFile(rar_file_path)
    rar_dir = rar.namelist()

    return rar_dir

 def read_rar(rar_file_path, csv_file_name, header=None, dtype=None):
    '''使用Pandas讀取rar壓縮文件夾中的某個csv文件'''
    rar = rarfile.RarFile(rar_file_path)
    data = rar.open(csv_file_name)
    df = pd.read_csv(data, header=header, dtype=dtype)
    if type(df) is pd.DataFrame:
        return df
    else:
        raise ValueError('Can not get DataFrame from {} in {}'.format(csv_file_name, rar_file_path))

def to_rar(to_rar_files, rar_file_path):
    if type(to_rar_files) == list:
        to_rar_files = ' '.join(to_rar_files)
    cmd = 'rar a {rar_file_path} {to_rar_files}'
    os.system(cmd)

4. 讀寫 targz

import tarfile

def ls_tar(tar_file_path):
    tar = tarfile.open(tar_file_path,'r:*')
    return tar.get_names()

def read_targz(targz_file_path, member):
    tar = tarfile.open(targz_file_path)
    obj = tar.getmember(member)
    data = tar.extractfile(obj)
    return data.read()

def to_targz(source_dir, output_filename=None):
    """
    將指定文件夾打包壓縮爲tar.gz文件
    :param source_dir: 需要打包的目錄
    :param output_filename: 壓縮文件名
    :return: 
    """
    if output_filename == None:
        output_filename = '{}.tar.gz'.format(source_dir)
    try:
        with tarfile.open(output_filename, "w:gz") as tar:
            tar.add(source_dir, arcname=os.path.basename(source_dir))
    except Exception as e:
        print(e) 

def extract_tar_single(tar_file_path, file_name, save_path):
	'''抽取並保存一個文件到本地'''
    with tarfile.open(tar_file_path,'r:*') as tar:
        tar.extract(file_name, save_path)

def extract_tar_all(tar_file_path, save_path):
    '''抽取並保存所有文件到本地'''
    with tarfile.open(tar_file_path,'r:*') as tar:
        tar.extractall(save_path)

5. 讀寫 bcolz

import bcolz

def ls_bcolz(bcolz_file_path, arg='line_map'):
    f = bcolz.open(bcolz_file_path)
    index = f.attrs[arg]
    return index

def read_bcolz(bcolz_file_path):
    f = bcolz.open(bcolz_file_path)
    return f.todataframe()

def to_bcolz(df, bcolz_file_path):
    bcolz.ctable.fromdataframe(df, rootdir=bcolz_file_path)
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章