请问Paddle的dynamic_lstm和pytorch的lstm,在op实现和计算上是否有区别
Created by: aprilvkuo
pytorch: https://pytorch.org/docs/1.1.0/nn.html?highlight=lstm#torch.nn.LSTM
paddle: https://www.paddlepaddle.org.cn/documentation/docs/zh/1.3/api_cn/layers_cn.html#dynamic-lstm
目前将pytorch的模型参数通过numpy保存, 再从paddle上加载,发现相同输入,效果不一致
pytorch代码
# 模型参数保存
lstm_weight_ih = self.lstm.weight_ih_l0.detach().numpy()
lstm_weight_hh = self.lstm.weight_hh_l0.detach().numpy()
lstm_bias_ih = self.lstm.bias_ih_l0.detach().numpy()
lstm_bias_hh = self.lstm.bias_hh_l0.detach().numpy()
#模型
dropout = 0.0
self.lstm = nn.LSTM(emb_size+32+32, hidden_size, layer_size,
bidirectional=True, batch_first=True, dropout=dropout)
out, _ = self.lstm(out)
paddle代码
def __lstm_reshape(self, data):
"""
pytorch : W_ii|W_if|W_ig|W_io
paddle : ii if io ig
Arguments:
data {[type]} -- [description]
"""
dims = data.shape
print(dims)
assert dims[1] % 4 == 0
each_dim = dims[1] / 4
new_column_order = range(2 * each_dim) + range(3 * each_dim, 4 * each_dim) + range(2 * each_dim, 3 * each_dim)
new_data = data[:, new_column_order]
return new_data
#参数加载
def load(self):
self.lstm_weight_hh = np.load(os.path.join(model_dir, "lstm_weight_hh.npy")).T
self.lstm_weight_ih = np.load(os.path.join(model_dir, "lstm_weight_ih.npy")).T
self.lstm_bias_hh = np.load(os.path.join(model_dir, "lstm_bias_hh.npy")).T.reshape((1, -1))
self.lstm_bias_ih = np.load(os.path.join(model_dir, "lstm_bias_ih.npy")).T.reshape((1, -1))
self.lstm_weight_hh = self.__lstm_reshape(self.lstm_weight_hh)
self.lstm_weight_ih = self.__lstm_reshape(self.lstm_weight_ih)
self.lstm_bias_hh = self.__lstm_reshape(self.lstm_bias_hh)
self.lstm_bias_ih = self.__lstm_reshape(self.lstm_bias_ih).flatten()
# self.lstm_bias_ih = self.lstm_bias_ih.flatten()
for item in [self.lstm_weight_hh, self.lstm_weight_ih, self.lstm_bias_hh, self.lstm_bias_ih]:
print("data")
print(item.shape)
self.lstm_weight_hh = fluid.initializer.NumpyArrayInitializer(self.lstm_weight_hh)
self.lstm_weight_ih = fluid.initializer.NumpyArrayInitializer(self.lstm_weight_ih)
self.lstm_bias_hh = fluid.initializer.NumpyArrayInitializer(self.lstm_bias_hh)
self.lstm_bias_ih = fluid.initializer.NumpyArrayInitializer(self.lstm_bias_ih)
# 建图
lstm_input = fluid.layers.fc(out, size=self.lstm_hidden_size*4, param_attr=self.lstm_weight_ih, bias_attr=self.lstm_bias_ih)
out_1, _ = fluid.layers.dynamic_lstm(input=lstm_input, size=self.lstm_hidden_size*4, \
param_attr=self.lstm_weight_hh, bias_attr=self.lstm_bias_hh, \
use_peepholes=False, is_reverse=False) # # hidden_state, cell_state
out_2, _ = fluid.layers.dynamic_lstm(input=lstm_input, size=self.lstm_hidden_size*4, \
param_attr=self.lstm_weight_hh, bias_attr=self.lstm_bias_hh, \
use_peepholes=False, is_reverse=True) # # hidden_state, cell_state