1.几种不同的初始化方法
import torch.nn as nn embedding = torch.Tensor(3, 5) #如下6种初始化方法 #正态分布 nn.init.normal_(embedding) #均匀分布 nn.init.uniform_(embedding) #凯明均匀分布,mode可为fan_in 或 fan_out, fan_in正向传播时,方差一致;fan_out反向传播时,方差一致;nonlinearity为对应的激活函数 nn.init.kaiming_uniform_(embedding, mode='fan_in', nonlinearity='leaky_relu') #凯明正态分布,mode可为fan_in 或 fan_out, fan_in正向传播时,方差一致;fan_out反向传播时,方差一致;nonlinearity为对应的激活函数 nn.init.kaiming_normal_(embedding, mode='fan_in', nonlinearity='leaky_relu') #xavier初始化方法中服从正态分布,mean=0,std = gain * sqrt(2/fan_in + fan_out) nn.init.xavier_normal_(embedding) #avier初始化方法中服从均匀分布U(−a,a) ,分布的参数a = gain * sqrt(6/fan_in+fan_out) nn.init.xavier_uniform_(embedding) embedding.requires_grad=True
2.加载预训练的词向量1
import torch import torch.nn as nn embedding = torch.Tensor(3, 5) nn.init.xavier_normal_(embedding) #embedding = Variable(tensor) data=torch.Tensor([-0.5736, -3.6566, 3.0850, 3.4097, 2.6072])#已有的词向量, embedding[1, :] = data#data必须是tensor embedding = nn.Parameter(embedding)#默认是可训练的 print(embedding[1])
3.加载预训练得的词向量2
import torch import torch.nn as nn word_embeds = nn.Embedding(vocab_size, embedding_dim) pretrained_weight = np.array(pretrained_weight)#预训练的词向量 embed.weight.data.copy_(torch.from_numpy(pretrained_weight))