採用RNN爲小Baby起個英文名字
實驗描述
本實驗提供了 8000 多個英文名字,試訓練一個環神經網絡語言模型,進而給定一個開始字母,由語言模型自動生成後續的字母,直到生成一個名字的結束符,與此同時,輸出模型爲每個時刻預測的前幾個最有可能的候選字母。
準備工作
安裝PyTorch
打開cmd,輸入指令:
pip install torch===1.5.1 torchvision===0.6.1 -f https://download.pytorch.org/whl/torch_stable.html
觀察數據集創建字典
給定的英文名字分別位於 baby-names 文件夾下的三個子文件:female.txt、male.txt、pet.txt,可知一共有三類 baby names,創建存儲各個類別的名字的字典
all_files = glob.glob('C:/Users/Administrator/Desktop/機器學習/baby-names/*.txt')
for f in all_files:
kind = os.path.splitext(os.path.basename(f))[0]
all_kinds.append(kind)
one_kind_names = open(f, encoding='utf-8').read().strip().split('\n')
all_kinds_names[kind] = one_kind_names
轉換格式張量
隨機獲取(類別, 該類別的名字)對兒,並將對兒轉換爲所需要的(類別, 輸入, 目標)格式張量
def random_train():
kind = random.choice(all_kinds)
name = random.choice(all_kinds_names[kind])
kind_tensor = torch.zeros(1, num_of_all_kinds)
kind_tensor[0][all_kinds.index(kind)] = 1
input_name_tensor = torch.zeros(len(name), 1, num_of_all_letters)
for i in range(len(name)):
letter = name[i]
input_name_tensor[i][0][all_letters.find(letter)] = 1
letter_indexes = [all_letters.find(name[j]) for j in range(1, len(name))]
letter_indexes.append(num_of_all_letters - 1)
target_name_tensor = torch.LongTensor(letter_indexes)
return kind_tensor, input_name_tensor, target_name_tensor
構造神經網絡
在採樣過程中,下一時刻的輸入字母爲爲當前輸出概率最高的字母;名字的類別處理方法同字母一樣,組成 one-hot 向量構成張量輸入;dropout 層用來模糊處理輸入,防止過擬合(參考 https://arxiv.org/abs/1207.0580)
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.i2h = nn.Linear(num_of_all_kinds + input_size + hidden_size, hidden_size)
self.i2o = nn.Linear(num_of_all_kinds + input_size + hidden_size, output_size)
self.o2o = nn.Linear(hidden_size + output_size, output_size)
self.dropout = nn.Dropout(0.1)
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, category, input, hidden):
input_combined = torch.cat((category, input, hidden), 1)
hidden = self.i2h(input_combined)
output = self.i2o(input_combined)
output_combined = torch.cat((hidden, output), 1)
output = self.o2o(output_combined)
output = self.dropout(output)
output = self.softmax(output)
return output, hidden
def initHidden(self):
return torch.zeros(1, self.hidden_size)
訓練神經網絡
預測和計算損失
在每一個時間序列都進行預測和計算損失,將損失累加,最後反向傳播
rnn = RNN(num_of_all_letters, 128, num_of_all_letters)
criterion = nn.NLLLoss()
learning_rate = 0.0005
# optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
def train(kind_tensor, input_name_tensor, target_name_tensor):
hidden = rnn.initHidden()
rnn.zero_grad()
target_name_tensor.unsqueeze_(-1)
loss = 0
for i in range(input_name_tensor.size(0)):
output, hidden = rnn(kind_tensor, input_name_tensor[i], hidden)
loss += criterion(output, target_name_tensor[i])
# loss = torch.tensor(float(loss), requires_grad=True)
loss.backward()
for j in rnn.parameters():
j.data.add_(-learning_rate, j.grad.data)
# optimizer.step()
return output, loss.item()/input_name_tensor.size(0)
打印訓練進度和損失、準備繪圖
在訓練過程中打印訓練進度和損失,並不斷將平均損失存入列表(繪圖使用)
losses = 0
L_loss = []
for i in range(1, 100001):
output, loss = train(*random_train())
losses += loss
if(i % 5000 == 0):
print('\n Waiting... {}%\tloss:{}'.format((i/1000), round(loss, 5)))
if(i % 500 == 0):
L_loss.append(round(losses/500, 5))
losses = 0
作圖反應神經網絡學習情況
plt.figure()
plt.plot(L_loss)
plt.show()
預測名字
每次根據當前字母預測下一個字母,並將最有可能的幾個字母順序輸出,最終返回預測的完整英文名字
print('\n\n名字的類數及類別:{}\t{}\n'.format(num_of_all_kinds, all_kinds))
def predict(kind, first='A'):
with torch.no_grad():
kind_tensor = torch.zeros(1, num_of_all_kinds)
kind_tensor[0][all_kinds.index(kind)] = 1
input = torch.zeros(len(first), 1, num_of_all_letters)
input[0][0][all_letters.find(first[0])] = 1
hidden = rnn.initHidden()
predict_name = first
for i in range(7):
output, hidden = rnn(kind_tensor, input[0], hidden)
tv, ti = output.topk(1)
# if(i == 0):
# print('\ntv:{}\nti:{}'.format(tv, ti))
t = ti[0][0]
if(t == num_of_all_letters - 1):
break
else:
predict_name += all_letters[t]
input = torch.zeros(len(first), 1, num_of_all_letters)
input[0][0][all_letters.find(first[0])] = 1
return predict_name
測試及打印結果
預測男孩的名字,首字母爲L
first_letter = 'L'
which_kind = 'male'
print('預測首字母爲{}的{}寶寶名字'.format(first_letter, which_kind))
print('\n 寶寶的名字爲:{}\n'.format(predict(which_kind, first_letter)))
預測女孩的名字,首字母爲R
first_letter = 'R'
which_kind = 'female'
print('預測首字母爲{}的{}寶寶名字'.format(first_letter, which_kind))
print('\n 寶寶的名字爲:{}\n'.format(predict(which_kind, first_letter)))
完整代碼
import os
import glob
import random
import string
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
# 1.準備工作
# 創建存儲各個類別的名字的字典
all_kinds_names = {}
all_kinds = []
all_files = glob.glob('C:/Users/Administrator/Desktop/機器學習/baby-names/*.txt')
for f in all_files:
kind = os.path.splitext(os.path.basename(f))[0]
all_kinds.append(kind)
one_kind_names = open(f, encoding='utf-8').read().strip().split('\n')
all_kinds_names[kind] = one_kind_names
# 名字類別數
num_of_all_kinds = len(all_kinds)
# 所有字符數=特殊字符數+大小寫英文字符數+EOS結束標記
all_letters = string.ascii_letters + " .,;'-"
num_of_all_letters = len(all_letters) + 1
# 隨機獲取(類別, 該類別的名字)對兒,並將對兒轉換爲所需要的(類別, 輸入, 目標)格式張量
def random_train():
kind = random.choice(all_kinds)
name = random.choice(all_kinds_names[kind])
kind_tensor = torch.zeros(1, num_of_all_kinds)
kind_tensor[0][all_kinds.index(kind)] = 1
input_name_tensor = torch.zeros(len(name), 1, num_of_all_letters)
for i in range(len(name)):
letter = name[i]
input_name_tensor[i][0][all_letters.find(letter)] = 1
letter_indexes = [all_letters.find(name[j]) for j in range(1, len(name))]
letter_indexes.append(num_of_all_letters - 1)
target_name_tensor = torch.LongTensor(letter_indexes)
return kind_tensor, input_name_tensor, target_name_tensor
# 2.構造神經網絡
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.i2h = nn.Linear(num_of_all_kinds + input_size + hidden_size, hidden_size)
self.i2o = nn.Linear(num_of_all_kinds + input_size + hidden_size, output_size)
self.o2o = nn.Linear(hidden_size + output_size, output_size)
self.dropout = nn.Dropout(0.1)
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, category, input, hidden):
input_combined = torch.cat((category, input, hidden), 1)
hidden = self.i2h(input_combined)
output = self.i2o(input_combined)
output_combined = torch.cat((hidden, output), 1)
output = self.o2o(output_combined)
output = self.dropout(output)
output = self.softmax(output)
return output, hidden
def initHidden(self):
return torch.zeros(1, self.hidden_size)
# 3.訓練神經網絡
rnn = RNN(num_of_all_letters, 128, num_of_all_letters)
losses = 0
L_loss = []
criterion = nn.NLLLoss()
learning_rate = 0.0005
# optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
def train(kind_tensor, input_name_tensor, target_name_tensor):
hidden = rnn.initHidden()
rnn.zero_grad()
target_name_tensor.unsqueeze_(-1)
loss = 0
for i in range(input_name_tensor.size(0)):
output, hidden = rnn(kind_tensor, input_name_tensor[i], hidden)
loss += criterion(output, target_name_tensor[i])
# loss = torch.tensor(float(loss), requires_grad=True)
loss.backward()
for j in rnn.parameters():
j.data.add_(-learning_rate, j.grad.data)
# optimizer.step()
return output, loss.item()/input_name_tensor.size(0)
for i in range(1, 100001):
output, loss = train(*random_train())
losses += loss
if(i % 5000 == 0):
print('\n Waiting... {}%\tloss:{}'.format((i/1000), round(loss, 5)))
if(i % 500 == 0):
L_loss.append(round(losses/500, 5))
losses = 0
# 4.損失數據作圖反應神經網絡學習情況(打印在代碼末段執行
plt.figure()
plt.plot(L_loss)
# 5.預測名字
print('\n\n名字的類數及類別:{}\t{}\n'.format(num_of_all_kinds, all_kinds))
def predict(kind, first='A'):
with torch.no_grad():
kind_tensor = torch.zeros(1, num_of_all_kinds)
kind_tensor[0][all_kinds.index(kind)] = 1
input = torch.zeros(len(first), 1, num_of_all_letters)
input[0][0][all_letters.find(first[0])] = 1
hidden = rnn.initHidden()
predict_name = first
for i in range(7):
output, hidden = rnn(kind_tensor, input[0], hidden)
tv, ti = output.topk(1)
# if(i == 0):
# print('\ntv:{}\nti:{}'.format(tv, ti))
t = ti[0][0]
if(t == num_of_all_letters - 1):
break
else:
predict_name += all_letters[t]
input = torch.zeros(len(first), 1, num_of_all_letters)
input[0][0][all_letters.find(first[0])] = 1
return predict_name
# 6.測試及打印結果
# 預測男孩的名字,首字母爲L
first_letter = 'L'
which_kind = 'male'
print('預測首字母爲{}的{}寶寶名字'.format(first_letter, which_kind))
print('\n 寶寶的名字爲:{}\n'.format(predict(which_kind, first_letter)))
# 預測女孩的名字,首字母爲R
first_letter = 'R'
which_kind = 'female'
print('預測首字母爲{}的{}寶寶名字'.format(first_letter, which_kind))
print('\n 寶寶的名字爲:{}\n'.format(predict(which_kind, first_letter)))
plt.show()