pytorch NNLM 模型的簡單實現(註釋版)

我是跟着b站一個up主的博客學的代碼,然後自己添加了一些註釋

https://wmathor.com/index.php/archives/1442/

import torch
import torch.nn as nn
import torch.optim
import torch.utils.data as Data

dtype = torch.FloatTensor

sentences = ['i like cat', 'i love coffee', 'i hate milk']
#將上面的單詞逐個分開
word_list = " ".join(sentences).split()
#將分詞後的結果去重
word_list = list(set(word_list))
#對單詞建立索引,for循環裏面是先取索引,再取單詞
word_dict = {w:i for i, w in enumerate(word_list)}
#反向建立索引
number_dict = {i:w for i, w in enumerate(word_list)}
#計算詞典長度
n_class = len(word_dict)

#NNLM的計算步長
n_step = len(sentences[0].split())-1
#隱藏層的參數量
n_hidden = 2
#嵌入詞向量的維度
m = 2

#構建輸入輸出數據
def make_batch(sentences):
    input_batch = []
    target_batch = []

    for sen in sentences:
        word = sen.split()#將句子中每個詞分詞
        #:-1表示取每個句子裏面的前兩個單詞作爲輸入
        #然後通過word_dict取出這兩個單詞的下標,作爲整個網絡的輸入
        input = [word_dict[n] for n in word[:-1]] # [0, 1], [0, 3], [0, 5]
        #target取的是預測單詞的下標,這裏就是cat,coffee和milk
        target = word_dict[word[-1]] # 2, 4, 6
        
        #輸入數據集
        input_batch.append(input) # [[0, 1], [0, 3], [0, 5]]
        #輸出數據集
        target_batch.append(target) # [2, 4, 6]

    return input_batch, target_batch

input_batch, target_batch = make_batch(sentences)
#將數據裝載到torch上
input_batch = torch.LongTensor(input_batch)
target_batch = torch.LongTensor(target_batch)

dataset = Data.TensorDataset(input_batch, target_batch)
loader = Data.DataLoader(dataset=dataset, batch_size=16, shuffle=True)

#定義網絡結構,繼承nn.Module
class NNLM(nn.Module):
  def __init__(self):
    super(NNLM, self).__init__()
    #計算詞向量表,大小是len(word_dict) * m
    self.C = nn.Embedding(n_class, m)
    #下面就是初始化網絡參數,公式如下
    """
    hiddenout = tanh(d + X*H)
    y = b + X*H + hiddenout*U
    """
    self.H = nn.Parameter(torch.randn(n_step * m, n_hidden).type(dtype))
    self.W = nn.Parameter(torch.randn(n_step * m, n_class).type(dtype))
    self.d = nn.Parameter(torch.randn(n_hidden).type(dtype))
    self.U = nn.Parameter(torch.randn(n_hidden, n_class).type(dtype))
    self.b = nn.Parameter(torch.randn(n_class).type(dtype))

  def forward(self, X):
    '''
    X: [batch_size, n_step]
    '''
    #根據詞向量表,將我們的輸入數據轉換成三維數據
    #將每個單詞替換成相應的詞向量
    X = self.C(X) # [batch_size, n_step] => [batch_size, n_step, m]
    #將替換後的詞向量表的相同行進行拼接
    #view的第一個參數爲-1表示自動判斷需要合併成幾行
    X = X.view(-1, n_step * m) # [batch_size, n_step * m]
    hidden_out = torch.tanh(self.d + torch.mm(X, self.H)) # [batch_size, n_hidden]
    output = self.b + torch.mm(X, self.W) + torch.mm(hidden_out, self.U) # [batch_size, n_class]
    return output

model = NNLM()
#分類問題用交叉熵作爲損失函數
criterion = nn.CrossEntropyLoss()
#優化器使用Adam
#所謂的優化器,實際上就是你用什麼方法去更新網路中的參數
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# 開始訓練
for epoch in range(5000):
  for batch_x, batch_y in loader:
    optimizer.zero_grad()
    output = model(batch_x)

    # output : [batch_size, n_class], batch_y : [batch_size] (LongTensor, not one-hot)
    loss = criterion(output, batch_y)
    #每1000次打印一次結果
    if (epoch + 1)%1000 == 0:
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))

    #反向傳播更新梯度
    loss.backward()
    optimizer.step()

# Predict
# max()取的是最內層維度中最大的那個數的值和索引,[1]表示取索引
predict = model(input_batch).data.max(1, keepdim=True)[1]

# Test
# squeeze()表示將數組中維度爲1的維度去掉
print([sen.split()[:n_step] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])

最後的結果如下

Epoch: 1000 cost = 0.371559
Epoch: 2000 cost = 0.069313
Epoch: 3000 cost = 0.012472
Epoch: 4000 cost = 0.004256
Epoch: 5000 cost = 0.001829
[['i', 'like'], ['i', 'love'], ['i', 'hate']] -> ['cat', 'coffee', 'milk']

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章