讀取txt中的字段key,然後編號再輸出

import pandas as pd
sep="|"

def read_key(dict_key, arr_fileld, idx=[]):
    if len(idx) == 0:
        return
    for index in idx:
        keys = arr_fileld[index].split(",")
        for key in keys:
            if key not in dict_key:
                dict_key[key] = len(dict_key) + 1

def replace_key(dict_key, arr_fileld, idx=[]):
    if len(idx) == 0:
        return ""
    res = []
    for i in range(len(arr_fileld)):
        if i in idx:
            keys = arr_fileld[i].split(",")
            ids = []
            for key in keys:
                ids.append(str(dict_key[key]))
            res.append(",".join(ids))
        else:
            res.append(arr_fileld[i])
    return "|".join(res)


def read_file(file_name,file_out,idx):

    with open(file_name,"r",encoding="utf-8") as f:
        dict_key = {}
        for line in f:
            read_key(dict_key, line.split(sep), idx)
    with open(file_name,"r",encoding="utf-8") as f:
        with open(file_out,"w",encoding="utf-8") as fout:
            for line in f:
                res = replace_key(dict_key,line.split(sep),idx)
                fout.write(res)


import json

with open("../conf/names.json", 'r') as f:
    names = json.load(f)
idx = []


for i,name in enumerate(names):
    if "room_id" in name or ("room" in name and "idx" in name):
        idx.append(i)


from datetime import datetime
cur_time1 = datetime.now()
read_file("../data/xxx", "../data/out",idx)
cur_time2 = datetime.now()

time_span = cur_time2 -cur_time1
print("time", time_span)

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章