參考博客:https://blog.csdn.net/brucewong0516/article/details/79055480
本地有一個停用詞表eng_stop_words.txt文本,現在使用pickle打包:
def savedb(pre_file, filename):
file = open(filename, 'wb')
pickle.dump(pre_file, file)
file.close()
savedb('eng_stop_words.txt', 'eng_stop_words.pkl')
使用時:
def drop_stopwords(contents, stopwords):
contents_clean = []
for line in contents:
line_clean = []
for word in line:
if word in stopwords:
continue
line_clean.append(word)
contents_clean.extend(line_clean)
return contents_clean
stopwords = pickle.load(open('eng_stop_words.pkl', 'rb')) #讀取停用詞文件
content_list = drop_stopwords(content, stopwords)