# 选择喜欢的小说,统计出场人物词频排名
import jieba
excludes = {"人马","都督","后主","军马","主公", "孔明曰","左右","东吴",\
"于是","知道","众将","大喜","二人", "玄德曰","天下",\
"军士","引兵","陛下","次日", "丞相","如此","商议","魏兵",\
"只见","今日","却说","不是", "将军","不可","不能","荆州",\
"不知","这个","如何","一人","汉中","蜀兵","不敢","大叫",}
f = open("三国演义.txt", "r")
txt = f.read()
f.close()
words = jieba.lcut(txt)
counts = {}
for word in words:
if len(word) == 1: #排除单个字符的分词结果
continue
else:
counts[word] = counts.get(word,0) + 1
for word in excludes:
del(counts[word])
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse=True)
for i in range(15):
word, count = items[i]
print ("{0:<10}{1:>5}".format(word, count))
Python统计小说中出场人物词频
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.