current_dir = os.path.abspath('.')
w2v_file = os.path.join(current_dir, 'w2v_file_sg')
self.dic = gensim.models.Word2Vec.load(w2v_file)
for word in word_list:
if word not in self.dic.wv.vocab:
continue
else:
result = self.get_max_similar(word,sim_value)
if result != '':
ret.append(result)
def get_max_similar(self, word,sim_value):
word_vec = self.dic.wv[word]
lab1_key1=self.getKey()
lab_key_list = lab1_key1[0]
lab_key_lab = lab1_key1[1]
max = 0.0
result = ''
for i in range(len(lab_key_list)):
each = self.cos_sim(word_vec, lab_key_list[i])
if each > max:
max = each
result = lab_key_lab[i]
print("【"+word+"】與【類別:"+result+"】,相似度爲==="+str(max))
if max < sim_value:
result = ''
return result
def cos_sim(self, arrA, arrB):
aa = arrA.dot(arrB) / (np.linalg.norm(arrA) * np.linalg.norm(arrB))
return aa
def average(self, key_list):
sum = 0.0
average = 0.0
for key in key_list:
sum = sum + self.dic.wv[key]
average = sum / len(key_list)
return average