算法設計與分析-文檔比較-代碼解析

原創

2020-02-24 09:47

mport sys
import math
#讀取文件，L是字符串列表
def read_file(filename):
    try:
        fp = open(filename)
        L = fp.readlines()
        return L
    except IOError:
        print("error opening or reading input file:",filename)
        sys.exit()
#入口參數是一行
def get_words_from_string(line):
    word_list = []
    character_list = []
    for c in line:
        if c.isalnum():
            character_list.append(c)
        elif len(character_list) > 0:
            #將字符列表轉化爲字符串
            word = "".join(character_list)
            #將單詞變爲小寫
            word = str.lower(word)
            #加入到單詞列表中
            word_list.append(word)
            character_list = []
    #如果讀完了文件，character_list還沒有被清空，就將character_list中的單詞取出加入到word_list
    if len(character_list) > 0:
        word = "".join(character_list)
        word = str.lower(word)
        word_list.append(word)
        character_list = []
    return word_list


#入口參數：一篇文章的字符串列表，列表項是文章的每一行
def get_words_from_line_list(L):
    word_list = []
    for line in L:
        words_test_line = get_words_from_string(line)
        word_list = word_list + words_test_line
    return word_list

#計算文件中每一個單詞出現的頻次
def count_ferquency(word_list):
    L = []
    for new_word in word_list:
        for entry in L:
            if new_word ==entry[0]:
                entry[1] +=1
                break
        else:
            L.append([new_word,1])



#計算兩向量內積
def inner_product(L1,L2):
    sum = 0
    for word1,cont1 in L1:
        for word2,cont2 in L2:
            if word1 == word2:
                sum += cont1*cont2
    return sum

#計算兩向量夾角
def vector_angle(L1,L2):
	#計算分子
    numerator = inner_product(L1,L2)
    #計算分母
    denominator = math.sqrt(inner_product(L1,L1)*inner_product(L2,L2))
    return math.acos(numerator/denominator)

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

算法設計與分析-文檔比較-代碼解析

藍橋杯題解十字圖 python 計蒜客

卷積神經網絡理解-提取總結--大話卷積神經網絡CNN

2016年藍橋杯A組第六題寒假作業（python實現）

創建堆的數據結構（C++）

計蒜客快速提升代碼能力 python題解

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結