采用RNN为小Baby起个英文名字
实验描述
本实验提供了 8000 多个英文名字,试训练一个环神经网络语言模型,进而给定一个开始字母,由语言模型自动生成后续的字母,直到生成一个名字的结束符,与此同时,输出模型为每个时刻预测的前几个最有可能的候选字母。
准备工作
安装PyTorch
打开cmd,输入指令:
pip install torch===1.5.1 torchvision===0.6.1 -f https://download.pytorch.org/whl/torch_stable.html
观察数据集创建字典
给定的英文名字分别位于 baby-names 文件夹下的三个子文件:female.txt、male.txt、pet.txt,可知一共有三类 baby names,创建存储各个类别的名字的字典
all_files = glob.glob('C:/Users/Administrator/Desktop/机器学习/baby-names/*.txt')
for f in all_files:
kind = os.path.splitext(os.path.basename(f))[0]
all_kinds.append(kind)
one_kind_names = open(f, encoding='utf-8').read().strip().split('\n')
all_kinds_names[kind] = one_kind_names
转换格式张量
随机获取(类别, 该类别的名字)对儿,并将对儿转换为所需要的(类别, 输入, 目标)格式张量
def random_train():
kind = random.choice(all_kinds)
name = random.choice(all_kinds_names[kind])
kind_tensor = torch.zeros(1, num_of_all_kinds)
kind_tensor[0][all_kinds.index(kind)] = 1
input_name_tensor = torch.zeros(len(name), 1, num_of_all_letters)
for i in range(len(name)):
letter = name[i]
input_name_tensor[i][0][all_letters.find(letter)] = 1
letter_indexes = [all_letters.find(name[j]) for j in range(1, len(name))]
letter_indexes.append(num_of_all_letters - 1)
target_name_tensor = torch.LongTensor(letter_indexes)
return kind_tensor, input_name_tensor, target_name_tensor
构造神经网络
在采样过程中,下一时刻的输入字母为为当前输出概率最高的字母;名字的类别处理方法同字母一样,组成 one-hot 向量构成张量输入;dropout 层用来模糊处理输入,防止过拟合(参考 https://arxiv.org/abs/1207.0580)
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.i2h = nn.Linear(num_of_all_kinds + input_size + hidden_size, hidden_size)
self.i2o = nn.Linear(num_of_all_kinds + input_size + hidden_size, output_size)
self.o2o = nn.Linear(hidden_size + output_size, output_size)
self.dropout = nn.Dropout(0.1)
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, category, input, hidden):
input_combined = torch.cat((category, input, hidden), 1)
hidden = self.i2h(input_combined)
output = self.i2o(input_combined)
output_combined = torch.cat((hidden, output), 1)
output = self.o2o(output_combined)
output = self.dropout(output)
output = self.softmax(output)
return output, hidden
def initHidden(self):
return torch.zeros(1, self.hidden_size)
训练神经网络
预测和计算损失
在每一个时间序列都进行预测和计算损失,将损失累加,最后反向传播
rnn = RNN(num_of_all_letters, 128, num_of_all_letters)
criterion = nn.NLLLoss()
learning_rate = 0.0005
# optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
def train(kind_tensor, input_name_tensor, target_name_tensor):
hidden = rnn.initHidden()
rnn.zero_grad()
target_name_tensor.unsqueeze_(-1)
loss = 0
for i in range(input_name_tensor.size(0)):
output, hidden = rnn(kind_tensor, input_name_tensor[i], hidden)
loss += criterion(output, target_name_tensor[i])
# loss = torch.tensor(float(loss), requires_grad=True)
loss.backward()
for j in rnn.parameters():
j.data.add_(-learning_rate, j.grad.data)
# optimizer.step()
return output, loss.item()/input_name_tensor.size(0)
打印训练进度和损失、准备绘图
在训练过程中打印训练进度和损失,并不断将平均损失存入列表(绘图使用)
losses = 0
L_loss = []
for i in range(1, 100001):
output, loss = train(*random_train())
losses += loss
if(i % 5000 == 0):
print('\n Waiting... {}%\tloss:{}'.format((i/1000), round(loss, 5)))
if(i % 500 == 0):
L_loss.append(round(losses/500, 5))
losses = 0
作图反应神经网络学习情况
plt.figure()
plt.plot(L_loss)
plt.show()
预测名字
每次根据当前字母预测下一个字母,并将最有可能的几个字母顺序输出,最终返回预测的完整英文名字
print('\n\n名字的类数及类别:{}\t{}\n'.format(num_of_all_kinds, all_kinds))
def predict(kind, first='A'):
with torch.no_grad():
kind_tensor = torch.zeros(1, num_of_all_kinds)
kind_tensor[0][all_kinds.index(kind)] = 1
input = torch.zeros(len(first), 1, num_of_all_letters)
input[0][0][all_letters.find(first[0])] = 1
hidden = rnn.initHidden()
predict_name = first
for i in range(7):
output, hidden = rnn(kind_tensor, input[0], hidden)
tv, ti = output.topk(1)
# if(i == 0):
# print('\ntv:{}\nti:{}'.format(tv, ti))
t = ti[0][0]
if(t == num_of_all_letters - 1):
break
else:
predict_name += all_letters[t]
input = torch.zeros(len(first), 1, num_of_all_letters)
input[0][0][all_letters.find(first[0])] = 1
return predict_name
测试及打印结果
预测男孩的名字,首字母为L
first_letter = 'L'
which_kind = 'male'
print('预测首字母为{}的{}宝宝名字'.format(first_letter, which_kind))
print('\n 宝宝的名字为:{}\n'.format(predict(which_kind, first_letter)))
预测女孩的名字,首字母为R
first_letter = 'R'
which_kind = 'female'
print('预测首字母为{}的{}宝宝名字'.format(first_letter, which_kind))
print('\n 宝宝的名字为:{}\n'.format(predict(which_kind, first_letter)))
完整代码
import os
import glob
import random
import string
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
# 1.准备工作
# 创建存储各个类别的名字的字典
all_kinds_names = {}
all_kinds = []
all_files = glob.glob('C:/Users/Administrator/Desktop/机器学习/baby-names/*.txt')
for f in all_files:
kind = os.path.splitext(os.path.basename(f))[0]
all_kinds.append(kind)
one_kind_names = open(f, encoding='utf-8').read().strip().split('\n')
all_kinds_names[kind] = one_kind_names
# 名字类别数
num_of_all_kinds = len(all_kinds)
# 所有字符数=特殊字符数+大小写英文字符数+EOS结束标记
all_letters = string.ascii_letters + " .,;'-"
num_of_all_letters = len(all_letters) + 1
# 随机获取(类别, 该类别的名字)对儿,并将对儿转换为所需要的(类别, 输入, 目标)格式张量
def random_train():
kind = random.choice(all_kinds)
name = random.choice(all_kinds_names[kind])
kind_tensor = torch.zeros(1, num_of_all_kinds)
kind_tensor[0][all_kinds.index(kind)] = 1
input_name_tensor = torch.zeros(len(name), 1, num_of_all_letters)
for i in range(len(name)):
letter = name[i]
input_name_tensor[i][0][all_letters.find(letter)] = 1
letter_indexes = [all_letters.find(name[j]) for j in range(1, len(name))]
letter_indexes.append(num_of_all_letters - 1)
target_name_tensor = torch.LongTensor(letter_indexes)
return kind_tensor, input_name_tensor, target_name_tensor
# 2.构造神经网络
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.i2h = nn.Linear(num_of_all_kinds + input_size + hidden_size, hidden_size)
self.i2o = nn.Linear(num_of_all_kinds + input_size + hidden_size, output_size)
self.o2o = nn.Linear(hidden_size + output_size, output_size)
self.dropout = nn.Dropout(0.1)
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, category, input, hidden):
input_combined = torch.cat((category, input, hidden), 1)
hidden = self.i2h(input_combined)
output = self.i2o(input_combined)
output_combined = torch.cat((hidden, output), 1)
output = self.o2o(output_combined)
output = self.dropout(output)
output = self.softmax(output)
return output, hidden
def initHidden(self):
return torch.zeros(1, self.hidden_size)
# 3.训练神经网络
rnn = RNN(num_of_all_letters, 128, num_of_all_letters)
losses = 0
L_loss = []
criterion = nn.NLLLoss()
learning_rate = 0.0005
# optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
def train(kind_tensor, input_name_tensor, target_name_tensor):
hidden = rnn.initHidden()
rnn.zero_grad()
target_name_tensor.unsqueeze_(-1)
loss = 0
for i in range(input_name_tensor.size(0)):
output, hidden = rnn(kind_tensor, input_name_tensor[i], hidden)
loss += criterion(output, target_name_tensor[i])
# loss = torch.tensor(float(loss), requires_grad=True)
loss.backward()
for j in rnn.parameters():
j.data.add_(-learning_rate, j.grad.data)
# optimizer.step()
return output, loss.item()/input_name_tensor.size(0)
for i in range(1, 100001):
output, loss = train(*random_train())
losses += loss
if(i % 5000 == 0):
print('\n Waiting... {}%\tloss:{}'.format((i/1000), round(loss, 5)))
if(i % 500 == 0):
L_loss.append(round(losses/500, 5))
losses = 0
# 4.损失数据作图反应神经网络学习情况(打印在代码末段执行
plt.figure()
plt.plot(L_loss)
# 5.预测名字
print('\n\n名字的类数及类别:{}\t{}\n'.format(num_of_all_kinds, all_kinds))
def predict(kind, first='A'):
with torch.no_grad():
kind_tensor = torch.zeros(1, num_of_all_kinds)
kind_tensor[0][all_kinds.index(kind)] = 1
input = torch.zeros(len(first), 1, num_of_all_letters)
input[0][0][all_letters.find(first[0])] = 1
hidden = rnn.initHidden()
predict_name = first
for i in range(7):
output, hidden = rnn(kind_tensor, input[0], hidden)
tv, ti = output.topk(1)
# if(i == 0):
# print('\ntv:{}\nti:{}'.format(tv, ti))
t = ti[0][0]
if(t == num_of_all_letters - 1):
break
else:
predict_name += all_letters[t]
input = torch.zeros(len(first), 1, num_of_all_letters)
input[0][0][all_letters.find(first[0])] = 1
return predict_name
# 6.测试及打印结果
# 预测男孩的名字,首字母为L
first_letter = 'L'
which_kind = 'male'
print('预测首字母为{}的{}宝宝名字'.format(first_letter, which_kind))
print('\n 宝宝的名字为:{}\n'.format(predict(which_kind, first_letter)))
# 预测女孩的名字,首字母为R
first_letter = 'R'
which_kind = 'female'
print('预测首字母为{}的{}宝宝名字'.format(first_letter, which_kind))
print('\n 宝宝的名字为:{}\n'.format(predict(which_kind, first_letter)))
plt.show()