def save_data(line):
with open("new微博評論.csv","a+",newline="",encoding="utf-8") as f:
f.write(line)
f = open("微博評論.csv","rb")#二進制格式讀文件
i = 0
while True:
i += 1
# print(i)
line = f.readline()
if not line:
break
else:
try:
n_line = line.decode('utf8')
save_data(n_line)
except Exception as e:
print(type(e),e)
print("=========================")
print(i,line)
編碼檢查chardet
import chardet
def judge(data):
return chardet.detect(data)["encoding"]
def error(e,q=1):
input(e)
if q:
exit(0)
def trans(path):
data = open(path, "rb").read()
coding = judge(data)
if coding == "GB2312":
coding = "GBK"
try:
arr = [i.rstrip() for i in data.decode(coding).split("\n")]
if len(arr) == 1:
return [i for i in arr[0].split("\r")]
return arr
except Exception as e:
print(e)
error("[!] 無法使用此文本,請使用utf8編碼的文本")
print(trans("123.txt"))