# ===================== 逐塊讀取文本文件 ===========================
nrows = 10 # 只讀取一定行數的數據
chunksize = 4 # 分塊讀取,返回一個可迭代對象TextFileReader
iterator = True # 返回一個可迭代對象,使用df.get_chunk(10)查看數據
# ===================== 處理分隔符格式 ========================
import csv
def csv_read_file(file_path, delimiter=',', header=True, lineterminator='\r\n', quotechar='"', skipinitialspace=False):
"""csv模塊讀取形如
"a","b","c"
"1","2","3"
"1","2","3"
等帶特殊符號的不規範數據
:param file_path: 文件路徑
:param delimiter: 分隔符,默認爲逗號
:param header: 文件中是否帶標題行,默認True
:param lineterminator: 用於寫操作的行結束符,默認爲'\r\n'
:param quotechar: 用於帶有特殊字符(如分隔符)的字段的引用符號,默認爲'"'
:param skipinitialspace: 忽略分隔符後面的空白符,默認爲False
:return df: 返回一個DataFrame
"""
with open(file_path) as f:
lines = list(csv.reader(f, delimiter=delimiter, lineterminator=lineterminator, quotechar=quotechar, skipinitialspace=skipinitialspace))
if header:
header, values = lines[0],lines[1:]
data_cidt = {k:v for k,v in zip(header, zip(*values))}
else:
data_cidt = {index:v for index,v in enumerate(zip(*lines))}
df_result = pd.DataFrame(data_cidt)
f.close()
return df_result
df = csv_read_file(r'C:\Users\86188\Desktop\python/ex1', header=True)