import nltk #自然語言處理
import re #正則
from colorama import Fore,Back,Style #改變終端輸出字體和顏色
ex_sent = "I have a pen and a pencil. But Idon't want the pen, I just want a pencil."
sp_sent = nltk.word_tokenize(ex_sent) #分詞
wo = 'pen'
if wo in sp_sent: #判斷單詞是否在句子中
print(wo)
n =sp_sent.count(wo) #計算單詞出現頻率
nn = 0
while(nn<n): #按詞頻拆分句子
print(re.split(r'\b%s\b'%wo,ex_sent)[nn] + Fore.RED + wo +Style.RESET_ALL,end = '') #正則拆分句子,單詞用Fore.RED + 單詞 +Style.RESET_ALL在終端中標出
nn =nn+1
if(nn== n): #輸出最後一次拆分結果的後半部分
print(re.split(r'\b%s\b'%wo,ex_sent)[nn],end = '')
輸出:
參考:
nltk:
http://www.pythontip.com/blog/post/10012/
colorama:
https://www.cnblogs.com/xieshengsen/p/6932233.html
https://pypi.python.org/pypi/colorama