采用RNN为小Baby起个英文名字by SixInNight

在这里插入图片描述

实验描述

本实验提供了 8000 多个英文名字,试训练一个环神经网络语言模型,进而给定一个开始字母,由语言模型自动生成后续的字母,直到生成一个名字的结束符,与此同时,输出模型为每个时刻预测的前几个最有可能的候选字母。

准备工作

安装PyTorch

打开cmd,输入指令:
pip install torch===1.5.1 torchvision===0.6.1 -f https://download.pytorch.org/whl/torch_stable.html

观察数据集创建字典

给定的英文名字分别位于 baby-names 文件夹下的三个子文件:female.txt、male.txt、pet.txt,可知一共有三类 baby names,创建存储各个类别的名字的字典

all_files = glob.glob('C:/Users/Administrator/Desktop/机器学习/baby-names/*.txt')
for f in all_files:
    kind = os.path.splitext(os.path.basename(f))[0]
    all_kinds.append(kind)
    one_kind_names = open(f, encoding='utf-8').read().strip().split('\n')
    all_kinds_names[kind] = one_kind_names

转换格式张量

随机获取(类别, 该类别的名字)对儿,并将对儿转换为所需要的(类别, 输入, 目标)格式张量

def random_train():
    kind = random.choice(all_kinds)
    name = random.choice(all_kinds_names[kind])

    kind_tensor = torch.zeros(1, num_of_all_kinds)
    kind_tensor[0][all_kinds.index(kind)] = 1

    input_name_tensor = torch.zeros(len(name), 1, num_of_all_letters)
    for i in range(len(name)):
        letter = name[i]
        input_name_tensor[i][0][all_letters.find(letter)] = 1

    letter_indexes = [all_letters.find(name[j]) for j in range(1, len(name))]
    letter_indexes.append(num_of_all_letters - 1)
    target_name_tensor = torch.LongTensor(letter_indexes)
    return kind_tensor, input_name_tensor, target_name_tensor

构造神经网络

在采样过程中,下一时刻的输入字母为为当前输出概率最高的字母;名字的类别处理方法同字母一样,组成 one-hot 向量构成张量输入;dropout 层用来模糊处理输入,防止过拟合(参考 https://arxiv.org/abs/1207.0580)

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(num_of_all_kinds + input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(num_of_all_kinds + input_size + hidden_size, output_size)
        self.o2o = nn.Linear(hidden_size + output_size, output_size)
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, category, input, hidden):
        input_combined = torch.cat((category, input, hidden), 1)
        hidden = self.i2h(input_combined)
        output = self.i2o(input_combined)
        output_combined = torch.cat((hidden, output), 1)
        output = self.o2o(output_combined)
        output = self.dropout(output)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)

训练神经网络

预测和计算损失

在每一个时间序列都进行预测和计算损失,将损失累加,最后反向传播

rnn = RNN(num_of_all_letters, 128, num_of_all_letters)
criterion = nn.NLLLoss()
learning_rate = 0.0005
# optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)

def train(kind_tensor, input_name_tensor, target_name_tensor):
    hidden = rnn.initHidden()
    rnn.zero_grad()
    target_name_tensor.unsqueeze_(-1)
    loss = 0
    for i in range(input_name_tensor.size(0)):
        output, hidden = rnn(kind_tensor, input_name_tensor[i], hidden)
        loss += criterion(output, target_name_tensor[i])
    # loss = torch.tensor(float(loss), requires_grad=True)
    loss.backward()
    for j in rnn.parameters():
        j.data.add_(-learning_rate, j.grad.data)
    # optimizer.step()
    return output, loss.item()/input_name_tensor.size(0)

打印训练进度和损失、准备绘图

在训练过程中打印训练进度和损失,并不断将平均损失存入列表(绘图使用)

losses = 0
L_loss = []
for i in range(1, 100001):
    output, loss = train(*random_train())
    losses += loss
    if(i % 5000 == 0):
        print('\n Waiting... {}%\tloss:{}'.format((i/1000), round(loss, 5)))
    if(i % 500 == 0):
        L_loss.append(round(losses/500, 5))
        losses = 0

作图反应神经网络学习情况

plt.figure()
plt.plot(L_loss)
plt.show()

预测名字

每次根据当前字母预测下一个字母,并将最有可能的几个字母顺序输出,最终返回预测的完整英文名字

print('\n\n名字的类数及类别:{}\t{}\n'.format(num_of_all_kinds, all_kinds))

def predict(kind, first='A'):
    with torch.no_grad():
        kind_tensor = torch.zeros(1, num_of_all_kinds)
        kind_tensor[0][all_kinds.index(kind)] = 1
        input = torch.zeros(len(first), 1, num_of_all_letters)
        input[0][0][all_letters.find(first[0])] = 1
        hidden = rnn.initHidden()
        predict_name = first
        for i in range(7):
            output, hidden = rnn(kind_tensor, input[0], hidden)
            tv, ti = output.topk(1)
            # if(i == 0):
            #     print('\ntv:{}\nti:{}'.format(tv, ti))
            t = ti[0][0]
            if(t == num_of_all_letters - 1):
                break
            else:
                predict_name += all_letters[t]
            input = torch.zeros(len(first), 1, num_of_all_letters)
            input[0][0][all_letters.find(first[0])] = 1
        return predict_name

测试及打印结果

预测男孩的名字,首字母为L

first_letter = 'L'
which_kind = 'male'
print('预测首字母为{}的{}宝宝名字'.format(first_letter, which_kind))
print('\n 宝宝的名字为:{}\n'.format(predict(which_kind, first_letter)))

预测女孩的名字,首字母为R

first_letter = 'R'
which_kind = 'female'
print('预测首字母为{}的{}宝宝名字'.format(first_letter, which_kind))
print('\n 宝宝的名字为:{}\n'.format(predict(which_kind, first_letter)))

在这里插入图片描述

完整代码

import os
import glob
import random
import string
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

# 1.准备工作

# 创建存储各个类别的名字的字典
all_kinds_names = {}
all_kinds = []
all_files = glob.glob('C:/Users/Administrator/Desktop/机器学习/baby-names/*.txt')
for f in all_files:
    kind = os.path.splitext(os.path.basename(f))[0]
    all_kinds.append(kind)
    one_kind_names = open(f, encoding='utf-8').read().strip().split('\n')
    all_kinds_names[kind] = one_kind_names

# 名字类别数
num_of_all_kinds = len(all_kinds)

# 所有字符数=特殊字符数+大小写英文字符数+EOS结束标记
all_letters = string.ascii_letters + " .,;'-"
num_of_all_letters = len(all_letters) + 1


# 随机获取(类别, 该类别的名字)对儿,并将对儿转换为所需要的(类别, 输入, 目标)格式张量
def random_train():
    kind = random.choice(all_kinds)
    name = random.choice(all_kinds_names[kind])

    kind_tensor = torch.zeros(1, num_of_all_kinds)
    kind_tensor[0][all_kinds.index(kind)] = 1

    input_name_tensor = torch.zeros(len(name), 1, num_of_all_letters)
    for i in range(len(name)):
        letter = name[i]
        input_name_tensor[i][0][all_letters.find(letter)] = 1

    letter_indexes = [all_letters.find(name[j]) for j in range(1, len(name))]
    letter_indexes.append(num_of_all_letters - 1)
    target_name_tensor = torch.LongTensor(letter_indexes)
    return kind_tensor, input_name_tensor, target_name_tensor


# 2.构造神经网络

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(num_of_all_kinds + input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(num_of_all_kinds + input_size + hidden_size, output_size)
        self.o2o = nn.Linear(hidden_size + output_size, output_size)
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, category, input, hidden):
        input_combined = torch.cat((category, input, hidden), 1)
        hidden = self.i2h(input_combined)
        output = self.i2o(input_combined)
        output_combined = torch.cat((hidden, output), 1)
        output = self.o2o(output_combined)
        output = self.dropout(output)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)


# 3.训练神经网络

rnn = RNN(num_of_all_letters, 128, num_of_all_letters)
losses = 0
L_loss = []
criterion = nn.NLLLoss()
learning_rate = 0.0005
# optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)


def train(kind_tensor, input_name_tensor, target_name_tensor):
    hidden = rnn.initHidden()
    rnn.zero_grad()
    target_name_tensor.unsqueeze_(-1)
    loss = 0
    for i in range(input_name_tensor.size(0)):
        output, hidden = rnn(kind_tensor, input_name_tensor[i], hidden)
        loss += criterion(output, target_name_tensor[i])
    # loss = torch.tensor(float(loss), requires_grad=True)
    loss.backward()
    for j in rnn.parameters():
        j.data.add_(-learning_rate, j.grad.data)
    # optimizer.step()
    return output, loss.item()/input_name_tensor.size(0)


for i in range(1, 100001):
    output, loss = train(*random_train())
    losses += loss
    if(i % 5000 == 0):
        print('\n Waiting... {}%\tloss:{}'.format((i/1000), round(loss, 5)))
    if(i % 500 == 0):
        L_loss.append(round(losses/500, 5))
        losses = 0

# 4.损失数据作图反应神经网络学习情况(打印在代码末段执行

plt.figure()
plt.plot(L_loss)

# 5.预测名字

print('\n\n名字的类数及类别:{}\t{}\n'.format(num_of_all_kinds, all_kinds))


def predict(kind, first='A'):
    with torch.no_grad():
        kind_tensor = torch.zeros(1, num_of_all_kinds)
        kind_tensor[0][all_kinds.index(kind)] = 1
        input = torch.zeros(len(first), 1, num_of_all_letters)
        input[0][0][all_letters.find(first[0])] = 1
        hidden = rnn.initHidden()
        predict_name = first
        for i in range(7):
            output, hidden = rnn(kind_tensor, input[0], hidden)
            tv, ti = output.topk(1)
            # if(i == 0):
            #     print('\ntv:{}\nti:{}'.format(tv, ti))
            t = ti[0][0]
            if(t == num_of_all_letters - 1):
                break
            else:
                predict_name += all_letters[t]
            input = torch.zeros(len(first), 1, num_of_all_letters)
            input[0][0][all_letters.find(first[0])] = 1
        return predict_name


# 6.测试及打印结果

# 预测男孩的名字,首字母为L
first_letter = 'L'
which_kind = 'male'
print('预测首字母为{}的{}宝宝名字'.format(first_letter, which_kind))
print('\n 宝宝的名字为:{}\n'.format(predict(which_kind, first_letter)))
# 预测女孩的名字,首字母为R
first_letter = 'R'
which_kind = 'female'
print('预测首字母为{}的{}宝宝名字'.format(first_letter, which_kind))
print('\n 宝宝的名字为:{}\n'.format(predict(which_kind, first_letter)))

plt.show()

在这里插入图片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章