论文地址:http://www.iro.umontreal.ca/~vincentp/Publications/lm_jmlr.pdf

论文给出了NNLM的框架图:

      pytorch  ---神经网络语言模型 NNLM  《A Neural Probabilistic Language Model》-LMLPHP

针对论文,实现代码如下(https://github.com/graykode/nlp-tutorial):

 # -*- coding: utf-8 -*-
# @time : 2019/10/26 12:20 import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable dtype = torch.FloatTensor sentences = [ "i like dog", "i love coffee", "i hate milk"] word_list = " ".join(sentences).split()
word_list = list(set(word_list))
word_dict = {w: i for i, w in enumerate(word_list)} # {'i': 0, 'like': 1, 'love': 2, 'hate': 3, 'milk': 4, 'dog': 5, 'coffee': 6}}
number_dict = {i: w for i, w in enumerate(word_list)}
n_class = len(word_dict) # number of Vocabulary # NNLM Parameter
n_step = 2 # n-1 in paper ->3gram
n_hidden = 2 # h in paper ->number hidden unit
m = 2 # m in paper ->embedding size # make data batch (input,target)
# input: [[0,1],[0,2],[0,3]]
# target: [5,6,4]
def make_batch(sentences):
input_batch = []
target_batch = [] for sen in sentences:
word = sen.split()
input = [word_dict[n] for n in word[:-1]]
target = word_dict[word[-1]] input_batch.append(input)
target_batch.append(target) return input_batch, target_batch # Model
class NNLM(nn.Module):
def __init__(self):
super(NNLM, self).__init__()
self.C = nn.Embedding(n_class, m)
self.H = nn.Parameter(torch.randn(n_step * m, n_hidden).type(dtype))
self.W = nn.Parameter(torch.randn(n_step * m, n_class).type(dtype))
self.d = nn.Parameter(torch.randn(n_hidden).type(dtype))
self.U = nn.Parameter(torch.randn(n_hidden, n_class).type(dtype))
self.b = nn.Parameter(torch.randn(n_class).type(dtype)) def forward(self, X):
X = self.C(X)
X = X.view(-1, n_step * m) # [batch_size, n_step * m]
tanh = torch.tanh(self.d + torch.mm(X, self.H)) # [batch_size, n_hidden]
output = self.b + torch.mm(X, self.W) + torch.mm(tanh, self.U) # [batch_size, n_class]
return output model = NNLM() criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001) input_batch, target_batch = make_batch(sentences)
input_batch = Variable(torch.LongTensor(input_batch))
target_batch = Variable(torch.LongTensor(target_batch)) # Training
for epoch in range(5000): optimizer.zero_grad()
output = model(input_batch) # output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)
loss = criterion(output, target_batch)
if (epoch + 1)%1000 == 0:
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) loss.backward()
optimizer.step() # Predict [5,6,4] (equal with target)
predict = model(input_batch).data.max(1, keepdim=True)[1] # print to visual
print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])
05-11 17:12
查看更多