mport sys
import math
#讀取文件,L是字符串列表
def read_file(filename):
try:
fp = open(filename)
L = fp.readlines()
return L
except IOError:
print("error opening or reading input file:",filename)
sys.exit()
#入口參數是一行
def get_words_from_string(line):
word_list = []
character_list = []
for c in line:
if c.isalnum():
character_list.append(c)
elif len(character_list) > 0:
#將字符列表轉化爲字符串
word = "".join(character_list)
#將單詞變爲小寫
word = str.lower(word)
#加入到單詞列表中
word_list.append(word)
character_list = []
#如果讀完了文件,character_list還沒有被清空,就將character_list中的單詞取出加入到word_list
if len(character_list) > 0:
word = "".join(character_list)
word = str.lower(word)
word_list.append(word)
character_list = []
return word_list
#入口參數:一篇文章的字符串列表,列表項是文章的每一行
def get_words_from_line_list(L):
word_list = []
for line in L:
words_test_line = get_words_from_string(line)
word_list = word_list + words_test_line
return word_list
#計算文件中每一個單詞出現的頻次
def count_ferquency(word_list):
L = []
for new_word in word_list:
for entry in L:
if new_word ==entry[0]:
entry[1] +=1
break
else:
L.append([new_word,1])
#計算兩向量內積
def inner_product(L1,L2):
sum = 0
for word1,cont1 in L1:
for word2,cont2 in L2:
if word1 == word2:
sum += cont1*cont2
return sum
#計算兩向量夾角
def vector_angle(L1,L2):
#計算分子
numerator = inner_product(L1,L2)
#計算分母
denominator = math.sqrt(inner_product(L1,L1)*inner_product(L2,L2))
return math.acos(numerator/denominator)
算法設計與分析-文檔比較-代碼解析
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.