pyTorch 導入預訓練詞向量 2023

# 測試 Embedding import torch import gensim import torch.nn as nn wvmodel = gensim.models.KeyedVectors.load_word2vec_format("./data/vector.txt",binary=False,encoding='utf-8') # 需要在字典的位置加上1是需要給UNK添加一個位置 vocab_size=len(wvmodel)+1 vector_size=wvmodel.vector_size # 隨機生成weight weight = torch.randn(vocab_size, vector_size) words=wvmodel.key_to_index word_to_idx = {word: i+1 for i, word in enumerate(words)} # 定義了一個unknown的詞. word_to_idx['<unk>'] = 0 idx_to_word = {i+1: word for i, word in enumerate(words)} idx_to_word[0] = '<unk>' for i in range(len(wvmodel.index_to_key)): try: index = word_to_idx[wvmodel.index_to_key[i]] except: continue vector=wvmodel.get_vector(idx_to_word[word_to_idx[wvmodel.index_to_key[i]]]) weight[index, :] = torch.from_numpy(vector) embedding = nn.Embedding.from_pretrained(weight,freeze=True) embedding
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章