採用RNN爲小Baby起個英文名字by SixInNight

在這裏插入圖片描述

實驗描述

本實驗提供了 8000 多個英文名字,試訓練一個環神經網絡語言模型,進而給定一個開始字母,由語言模型自動生成後續的字母,直到生成一個名字的結束符,與此同時,輸出模型爲每個時刻預測的前幾個最有可能的候選字母。

準備工作

安裝PyTorch

打開cmd,輸入指令:
pip install torch===1.5.1 torchvision===0.6.1 -f https://download.pytorch.org/whl/torch_stable.html

觀察數據集創建字典

給定的英文名字分別位於 baby-names 文件夾下的三個子文件:female.txt、male.txt、pet.txt,可知一共有三類 baby names,創建存儲各個類別的名字的字典

all_files = glob.glob('C:/Users/Administrator/Desktop/機器學習/baby-names/*.txt')
for f in all_files:
    kind = os.path.splitext(os.path.basename(f))[0]
    all_kinds.append(kind)
    one_kind_names = open(f, encoding='utf-8').read().strip().split('\n')
    all_kinds_names[kind] = one_kind_names

轉換格式張量

隨機獲取(類別, 該類別的名字)對兒,並將對兒轉換爲所需要的(類別, 輸入, 目標)格式張量

def random_train():
    kind = random.choice(all_kinds)
    name = random.choice(all_kinds_names[kind])

    kind_tensor = torch.zeros(1, num_of_all_kinds)
    kind_tensor[0][all_kinds.index(kind)] = 1

    input_name_tensor = torch.zeros(len(name), 1, num_of_all_letters)
    for i in range(len(name)):
        letter = name[i]
        input_name_tensor[i][0][all_letters.find(letter)] = 1

    letter_indexes = [all_letters.find(name[j]) for j in range(1, len(name))]
    letter_indexes.append(num_of_all_letters - 1)
    target_name_tensor = torch.LongTensor(letter_indexes)
    return kind_tensor, input_name_tensor, target_name_tensor

構造神經網絡

在採樣過程中,下一時刻的輸入字母爲爲當前輸出概率最高的字母;名字的類別處理方法同字母一樣,組成 one-hot 向量構成張量輸入;dropout 層用來模糊處理輸入,防止過擬合(參考 https://arxiv.org/abs/1207.0580)

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(num_of_all_kinds + input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(num_of_all_kinds + input_size + hidden_size, output_size)
        self.o2o = nn.Linear(hidden_size + output_size, output_size)
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, category, input, hidden):
        input_combined = torch.cat((category, input, hidden), 1)
        hidden = self.i2h(input_combined)
        output = self.i2o(input_combined)
        output_combined = torch.cat((hidden, output), 1)
        output = self.o2o(output_combined)
        output = self.dropout(output)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)

訓練神經網絡

預測和計算損失

在每一個時間序列都進行預測和計算損失,將損失累加,最後反向傳播

rnn = RNN(num_of_all_letters, 128, num_of_all_letters)
criterion = nn.NLLLoss()
learning_rate = 0.0005
# optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)

def train(kind_tensor, input_name_tensor, target_name_tensor):
    hidden = rnn.initHidden()
    rnn.zero_grad()
    target_name_tensor.unsqueeze_(-1)
    loss = 0
    for i in range(input_name_tensor.size(0)):
        output, hidden = rnn(kind_tensor, input_name_tensor[i], hidden)
        loss += criterion(output, target_name_tensor[i])
    # loss = torch.tensor(float(loss), requires_grad=True)
    loss.backward()
    for j in rnn.parameters():
        j.data.add_(-learning_rate, j.grad.data)
    # optimizer.step()
    return output, loss.item()/input_name_tensor.size(0)

打印訓練進度和損失、準備繪圖

在訓練過程中打印訓練進度和損失,並不斷將平均損失存入列表(繪圖使用)

losses = 0
L_loss = []
for i in range(1, 100001):
    output, loss = train(*random_train())
    losses += loss
    if(i % 5000 == 0):
        print('\n Waiting... {}%\tloss:{}'.format((i/1000), round(loss, 5)))
    if(i % 500 == 0):
        L_loss.append(round(losses/500, 5))
        losses = 0

作圖反應神經網絡學習情況

plt.figure()
plt.plot(L_loss)
plt.show()

預測名字

每次根據當前字母預測下一個字母,並將最有可能的幾個字母順序輸出,最終返回預測的完整英文名字

print('\n\n名字的類數及類別:{}\t{}\n'.format(num_of_all_kinds, all_kinds))

def predict(kind, first='A'):
    with torch.no_grad():
        kind_tensor = torch.zeros(1, num_of_all_kinds)
        kind_tensor[0][all_kinds.index(kind)] = 1
        input = torch.zeros(len(first), 1, num_of_all_letters)
        input[0][0][all_letters.find(first[0])] = 1
        hidden = rnn.initHidden()
        predict_name = first
        for i in range(7):
            output, hidden = rnn(kind_tensor, input[0], hidden)
            tv, ti = output.topk(1)
            # if(i == 0):
            #     print('\ntv:{}\nti:{}'.format(tv, ti))
            t = ti[0][0]
            if(t == num_of_all_letters - 1):
                break
            else:
                predict_name += all_letters[t]
            input = torch.zeros(len(first), 1, num_of_all_letters)
            input[0][0][all_letters.find(first[0])] = 1
        return predict_name

測試及打印結果

預測男孩的名字,首字母爲L

first_letter = 'L'
which_kind = 'male'
print('預測首字母爲{}的{}寶寶名字'.format(first_letter, which_kind))
print('\n 寶寶的名字爲:{}\n'.format(predict(which_kind, first_letter)))

預測女孩的名字,首字母爲R

first_letter = 'R'
which_kind = 'female'
print('預測首字母爲{}的{}寶寶名字'.format(first_letter, which_kind))
print('\n 寶寶的名字爲:{}\n'.format(predict(which_kind, first_letter)))

在這裏插入圖片描述

完整代碼

import os
import glob
import random
import string
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

# 1.準備工作

# 創建存儲各個類別的名字的字典
all_kinds_names = {}
all_kinds = []
all_files = glob.glob('C:/Users/Administrator/Desktop/機器學習/baby-names/*.txt')
for f in all_files:
    kind = os.path.splitext(os.path.basename(f))[0]
    all_kinds.append(kind)
    one_kind_names = open(f, encoding='utf-8').read().strip().split('\n')
    all_kinds_names[kind] = one_kind_names

# 名字類別數
num_of_all_kinds = len(all_kinds)

# 所有字符數=特殊字符數+大小寫英文字符數+EOS結束標記
all_letters = string.ascii_letters + " .,;'-"
num_of_all_letters = len(all_letters) + 1


# 隨機獲取(類別, 該類別的名字)對兒,並將對兒轉換爲所需要的(類別, 輸入, 目標)格式張量
def random_train():
    kind = random.choice(all_kinds)
    name = random.choice(all_kinds_names[kind])

    kind_tensor = torch.zeros(1, num_of_all_kinds)
    kind_tensor[0][all_kinds.index(kind)] = 1

    input_name_tensor = torch.zeros(len(name), 1, num_of_all_letters)
    for i in range(len(name)):
        letter = name[i]
        input_name_tensor[i][0][all_letters.find(letter)] = 1

    letter_indexes = [all_letters.find(name[j]) for j in range(1, len(name))]
    letter_indexes.append(num_of_all_letters - 1)
    target_name_tensor = torch.LongTensor(letter_indexes)
    return kind_tensor, input_name_tensor, target_name_tensor


# 2.構造神經網絡

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(num_of_all_kinds + input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(num_of_all_kinds + input_size + hidden_size, output_size)
        self.o2o = nn.Linear(hidden_size + output_size, output_size)
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, category, input, hidden):
        input_combined = torch.cat((category, input, hidden), 1)
        hidden = self.i2h(input_combined)
        output = self.i2o(input_combined)
        output_combined = torch.cat((hidden, output), 1)
        output = self.o2o(output_combined)
        output = self.dropout(output)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)


# 3.訓練神經網絡

rnn = RNN(num_of_all_letters, 128, num_of_all_letters)
losses = 0
L_loss = []
criterion = nn.NLLLoss()
learning_rate = 0.0005
# optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)


def train(kind_tensor, input_name_tensor, target_name_tensor):
    hidden = rnn.initHidden()
    rnn.zero_grad()
    target_name_tensor.unsqueeze_(-1)
    loss = 0
    for i in range(input_name_tensor.size(0)):
        output, hidden = rnn(kind_tensor, input_name_tensor[i], hidden)
        loss += criterion(output, target_name_tensor[i])
    # loss = torch.tensor(float(loss), requires_grad=True)
    loss.backward()
    for j in rnn.parameters():
        j.data.add_(-learning_rate, j.grad.data)
    # optimizer.step()
    return output, loss.item()/input_name_tensor.size(0)


for i in range(1, 100001):
    output, loss = train(*random_train())
    losses += loss
    if(i % 5000 == 0):
        print('\n Waiting... {}%\tloss:{}'.format((i/1000), round(loss, 5)))
    if(i % 500 == 0):
        L_loss.append(round(losses/500, 5))
        losses = 0

# 4.損失數據作圖反應神經網絡學習情況(打印在代碼末段執行

plt.figure()
plt.plot(L_loss)

# 5.預測名字

print('\n\n名字的類數及類別:{}\t{}\n'.format(num_of_all_kinds, all_kinds))


def predict(kind, first='A'):
    with torch.no_grad():
        kind_tensor = torch.zeros(1, num_of_all_kinds)
        kind_tensor[0][all_kinds.index(kind)] = 1
        input = torch.zeros(len(first), 1, num_of_all_letters)
        input[0][0][all_letters.find(first[0])] = 1
        hidden = rnn.initHidden()
        predict_name = first
        for i in range(7):
            output, hidden = rnn(kind_tensor, input[0], hidden)
            tv, ti = output.topk(1)
            # if(i == 0):
            #     print('\ntv:{}\nti:{}'.format(tv, ti))
            t = ti[0][0]
            if(t == num_of_all_letters - 1):
                break
            else:
                predict_name += all_letters[t]
            input = torch.zeros(len(first), 1, num_of_all_letters)
            input[0][0][all_letters.find(first[0])] = 1
        return predict_name


# 6.測試及打印結果

# 預測男孩的名字,首字母爲L
first_letter = 'L'
which_kind = 'male'
print('預測首字母爲{}的{}寶寶名字'.format(first_letter, which_kind))
print('\n 寶寶的名字爲:{}\n'.format(predict(which_kind, first_letter)))
# 預測女孩的名字,首字母爲R
first_letter = 'R'
which_kind = 'female'
print('預測首字母爲{}的{}寶寶名字'.format(first_letter, which_kind))
print('\n 寶寶的名字爲:{}\n'.format(predict(which_kind, first_letter)))

plt.show()

在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章