python 讀取txt中的英文內容 分析詞頻 可視化顯示
調用turtle庫 平臺:Spyder
import turtle
pi=3.14159
count = 10
data = []
words = []
import random
def read(filename,data,words):
txt1= open(filename,"r")
word_spss=process_read(txt1)
pairs=list(word_spss.items())
items = [[x,y]for (y,x)in pairs]
items.sort()
for i in range(len(items)-1, len(items)-count-1, -1):
print(items[i][1]+"\t"+str(items[i][0]))
data.append(items[i][0])
words.append(items[i][1])
def randomcolor():
colorArr = ['1','2','3','4','5','6','7','8','9','A','B','C','D','E','F']
color = ""
for i in range(6):
color += colorArr[random.randint(0,14)]
return "#"+color
def replaceMark(line):
for ch in line:
if ch in "~@#$%^&*()_-+=<>?/,.:;{}[]|\'\"":
line=line.replace(ch," ")
return line
def process_read(txt1):
word_spss={}
for line in txt1:
line=line.lower()
line=replaceMark(line)
words=line.split()
for word in words:
if word in word_spss:
word_spss[word]+=1
else:
word_spss[word]=1
return word_spss
def DIY_draw(data,words):
turtle.title("詞頻結果統計圖")
turtle.setup(1200, 500, 0, 0)
t=turtle.Turtle()
sum=0
for ii in range(9):
sum+=data[ii]
print(sum)
x0=150
coefficient=2*x0/sum*1.2;
t0=-1
pp=[]
for i in range(9):
pp.append(words[i]+"["+str(data[i])+"]")
print(pp[1])
for i in range(9):
t.color(randomcolor())
t.penup()
t.goto(x0,-data[i]*coefficient)
x0=x0-data[i]*coefficient-data[i+1]*coefficient
t.pendown()
t.begin_fill()
t.circle(data[i]*coefficient)
t.end_fill()
t0=t0*(-1)
x0=150
for i in range(9):
t.color(randomcolor())
t.penup()
t.goto(x0,-data[i]*coefficient-t0*data[i]*coefficient*1.2)
t.write(pp[i], False, align="center", font=("Arial", 18, "normal"))
x0=x0-data[i]*coefficient-data[i+1]*coefficient
t.pendown()
t0=t0*(-1)
t.down()
def main():
read("C:\\Users\\yiqing\\Desktop\\ex1.txt",data,words)
DIY_draw(data,words)
main()
結果
txt