From 58f6d459f887932c25365182f5ba435e6fccc837 Mon Sep 17 00:00:00 2001 From: helen88 Date: Thu, 21 Apr 2022 19:21:37 +0800 Subject: [PATCH] [cherry-pick]support multi_layer of bilstm,*test=kunlun (#42076) * modify xpu.cmake,*test=kunlun (#41832) * modify xpu.cmake,*test=kunlun * modify xpu.cmake,*test=kunlun * modify xpu.cmake,*test=kunlun * modify xpu.cmake,*test=kunlun * support bilstm,*test=kunlun * [cherry-pick]support multi_layer of bilstm,*test=kunlun --- paddle/fluid/operators/rnn_op_xpu.cc | 357 ++++++++++-------- .../fluid/platform/device/xpu/xpu2_op_list.h | 1 + .../tests/unittests/xpu/test_rnn_op_xpu.py | 64 ++-- 3 files changed, 250 insertions(+), 172 deletions(-) diff --git a/paddle/fluid/operators/rnn_op_xpu.cc b/paddle/fluid/operators/rnn_op_xpu.cc index 2dee4e889f..220d91bf4f 100644 --- a/paddle/fluid/operators/rnn_op_xpu.cc +++ b/paddle/fluid/operators/rnn_op_xpu.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/fluid/platform/device/xpu/xpu_header.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/phi/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -50,41 +51,6 @@ void reset_parameter_vector(const std::vector& raw_params_vec, } } -template -void RunLSTMLayer(const framework::ExecutionContext& ctx, int seq_len, - int batch_size, int xdim, int hidden_size, const T* x, T* y, - const T* init_h, const T* init_c, T* last_h, T* last_c, - int state_offset, const std::vector& seq_len_tensor, - const std::vector& param_list, T* i_f_g_o, T* c, - bool is_bidirect, int layer_idx, int offset) { - bool is_reverse = false; - if (is_bidirect) { - layer_idx = 2 * layer_idx + offset; - if (offset > 0) { - is_reverse = true; - } - } - auto w_x = param_list[0 + offset * 4]; - auto w_h = param_list[1 + offset * 4]; - auto b_x = param_list[2 + offset * 4]; - auto b_h = param_list[3 + offset * 4]; - - auto h_0 = init_h + layer_idx * state_offset; - auto c_0 = init_c + layer_idx * state_offset; - auto last_h_ptr = last_h + layer_idx * state_offset; - auto last_c_ptr = last_c + layer_idx * state_offset; - auto& dev_ctx = ctx.template device_context(); - int r = xpu::lstm_train( - dev_ctx.x_context(), (const T*)x, (const T*)h_0, (const T*)c_0, - (const T*)w_x, (const T*)w_h, (const T*)b_x, (const T*)b_h, - reinterpret_cast(y), reinterpret_cast(last_h_ptr), - reinterpret_cast(last_c_ptr), batch_size, xdim, hidden_size, seq_len, - seq_len_tensor, is_reverse, nullptr, nullptr, nullptr, nullptr, - reinterpret_cast(i_f_g_o), reinterpret_cast(c), - xpu::Activation_t::TANH, xpu::Activation_t::SIGMOID); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "lstm_train"); -} - template class RnnXPUKernel : public framework::OpKernel { public: @@ -114,6 +80,9 @@ class RnnXPUKernel : public framework::OpKernel { if (dropout_mask->numel() != output->numel()) dropout_mask->clear(); } dropout_mask->mutable_data(output->dims(), ctx.GetPlace()); + auto& dev_ctx = ctx.template device_context(); + phi::funcs::SetConstant ones; + ones(dev_ctx, dropout_mask, static_cast(1)); PADDLE_ENFORCE_EQ( mode, "LSTM", @@ -156,23 +125,13 @@ class RnnXPUKernel : public framework::OpKernel { output->mutable_data(ctx.GetPlace()); last_h->mutable_data(ctx.GetPlace()); last_c->mutable_data(ctx.GetPlace()); + int gate_num = 4; + int hidden_data_idx = (num_layers - 1); + hidden_data_idx += (gate_num + 1) * num_layers; + const int& block_size = direction_num * seq_len * batch_size * hidden_size; + reserve_data->Resize({hidden_data_idx, block_size}); - reserve_data->Resize( - {num_layers * direction_num * seq_len * batch_size * hidden_size * 5}); reserve_data->mutable_data(ctx.GetPlace()); - Tensor internal_output_1_tensor, internal_output_2_tensor; - T* internal_output_1_ptr = nullptr; - T* internal_output_2_ptr = nullptr; - if (num_layers >= 2) { - internal_output_1_tensor.Resize(output->dims()); - internal_output_1_ptr = - internal_output_1_tensor.mutable_data(ctx.GetPlace()); - } - if (num_layers >= 3) { - internal_output_2_tensor.Resize(output->dims()); - internal_output_2_ptr = - internal_output_2_tensor.mutable_data(ctx.GetPlace()); - } // get ptr from tensor auto x = input->data(); auto init_h_ptr = init_h->data(); @@ -180,80 +139,77 @@ class RnnXPUKernel : public framework::OpKernel { auto y = output->data(); auto last_h_ptr = last_h->data(); auto last_c_ptr = last_c->data(); - auto i_f_g_o = reserve_data->data(); - auto c = - i_f_g_o + - num_layers * direction_num * seq_len * batch_size * hidden_size * 4; + auto i_f_g_o_ptr = reserve_data->data(); + auto c_ptr = + i_f_g_o_ptr + num_layers * block_size * 4; // 4 for i_f_g_o offset + auto hidden_data_ptr = + c_ptr + num_layers * block_size * 1; // 1 for c offset std::vector seq_len_tensor(batch_size, seq_len); if (has_seq_length) { seq_len_tensor = operators::GetDataFromTensor(sequence_length); } - auto& dev_ctx = ctx.template device_context(); int state_offset = pre_state[0]->dims()[1] * pre_state[0]->dims()[2]; + const T* cur_input_ptr = nullptr; + int cur_xdim = -1; + T* cur_output_ptr = y; for (int i = 0; i < num_layers; i++) { - const T* cur_input_ptr = nullptr; - int cur_xdim = -1; - i_f_g_o += i * direction_num * seq_len * batch_size * hidden_size * 4; - c += i * direction_num * seq_len * batch_size * hidden_size; + auto i_f_g_o = i_f_g_o_ptr + i * block_size * 4; + auto c = c_ptr + i * block_size; + + cur_output_ptr = y; + if (i < num_layers - 1 && num_layers > 1) { + cur_output_ptr = hidden_data_ptr + i * block_size; + } if (i == 0) { cur_input_ptr = x; cur_xdim = input_dim; - } else if (i % 2 != 0) { - cur_input_ptr = internal_output_1_ptr; - cur_xdim = is_bidirec ? 2 * hidden_size : hidden_size; } else { - cur_input_ptr = internal_output_2_ptr; + cur_input_ptr = hidden_data_ptr + (i - 1) * block_size; cur_xdim = is_bidirec ? 2 * hidden_size : hidden_size; } - T* cur_output_ptr = nullptr; - if (i == num_layers - 1) { - cur_output_ptr = y; - } else if (i % 2 != 0) { - cur_output_ptr = internal_output_2_ptr; - } else { - cur_output_ptr = internal_output_1_ptr; - } + auto h_0 = init_h_ptr + direction_num * i * state_offset; + auto c_0 = init_c_ptr + direction_num * i * state_offset; + auto last_h = last_h_ptr + direction_num * i * state_offset; + auto last_c = last_c_ptr + direction_num * i * state_offset; + auto w_x = parameter_lists[i][0]; + auto w_h = parameter_lists[i][1]; + auto b_x = parameter_lists[i][2]; + auto b_h = parameter_lists[i][3]; if (is_bidirec) { - std::vector output_vec(2); - std::vector output_ptr_vec(2); - for (int k = 0; k < 2; ++k) { - output_vec[k].Resize({seq_len, batch_size, output->dims()[2] / 2}); - output_ptr_vec[k] = output_vec[k].mutable_data(ctx.GetPlace()); - } - RunLSTMLayer( - ctx, seq_len, batch_size, cur_xdim, hidden_size, cur_input_ptr, - output_ptr_vec[0], init_h_ptr, init_c_ptr, last_h_ptr, last_c_ptr, - state_offset, seq_len_tensor, parameter_lists[i], i_f_g_o, c, - is_bidirec, i, 0); - - T* bw_i_f_g_o = i_f_g_o + seq_len * batch_size * hidden_size * 4; - T* bw_c = c + seq_len * batch_size * hidden_size; - RunLSTMLayer( - ctx, seq_len, batch_size, cur_xdim, hidden_size, cur_input_ptr, - output_ptr_vec[1], init_h_ptr, init_c_ptr, last_h_ptr, last_c_ptr, - state_offset, seq_len_tensor, parameter_lists[i], bw_i_f_g_o, bw_c, - is_bidirec, i, 1); - - // concat - int r = xpu::concat( - dev_ctx.x_context(), {output_ptr_vec[0], output_ptr_vec[1]}, - cur_output_ptr, {{seq_len, batch_size, hidden_size}, - {seq_len, batch_size, hidden_size}}, - 2); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "concat"); - xpu_wait(dev_ctx.x_context()->xpu_stream); + auto bw_x = parameter_lists[i][4]; + auto bw_h = parameter_lists[i][5]; + auto bb_x = parameter_lists[i][6]; + auto bb_h = parameter_lists[i][7]; + + int r = xpu::bilstm_train( + dev_ctx.x_context(), (const T*)cur_input_ptr, (const T*)h_0, + (const T*)c_0, (const T*)w_x, (const T*)w_h, (const T*)b_x, + (const T*)b_h, (const T*)bw_x, (const T*)bw_h, (const T*)bb_x, + (const T*)bb_h, reinterpret_cast(cur_output_ptr), + reinterpret_cast(last_h), reinterpret_cast(last_c), + batch_size, cur_xdim, hidden_size, seq_len, seq_len_tensor, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, + reinterpret_cast(i_f_g_o), reinterpret_cast(c)); + + PADDLE_ENFORCE_XDNN_SUCCESS(r, "bilstm_train"); } else { - RunLSTMLayer( - ctx, seq_len, batch_size, cur_xdim, hidden_size, cur_input_ptr, - cur_output_ptr, init_h_ptr, init_c_ptr, last_h_ptr, last_c_ptr, - state_offset, seq_len_tensor, parameter_lists[i], i_f_g_o, c, - is_bidirec, i, 0); + int r = xpu::lstm_train( + dev_ctx.x_context(), (const T*)cur_input_ptr, (const T*)h_0, + (const T*)c_0, (const T*)w_x, (const T*)w_h, (const T*)b_x, + (const T*)b_h, reinterpret_cast(cur_output_ptr), + reinterpret_cast(last_h), reinterpret_cast(last_c), + batch_size, cur_xdim, hidden_size, seq_len, seq_len_tensor, nullptr, + nullptr, nullptr, nullptr, reinterpret_cast(i_f_g_o), + reinterpret_cast(c), xpu::Activation_t::TANH, + xpu::Activation_t::SIGMOID); + + PADDLE_ENFORCE_XDNN_SUCCESS(r, "lstm_train"); } } } @@ -261,6 +217,8 @@ class RnnXPUKernel : public framework::OpKernel { template class RnnXPUGradKernel : public framework::OpKernel { + using XPUTyp = typename XPUTypeTrait::Type; + public: void Compute(const framework::ExecutionContext& ctx) const override { // get the tensor pointer for the input @@ -271,6 +229,7 @@ class RnnXPUGradKernel : public framework::OpKernel { auto* reserve_data = ctx.Input("Reserve"); const int& num_layers = ctx.Attr("num_layers"); const bool& is_bidirec = ctx.Attr("is_bidirec"); + const float& dropout_prob = ctx.Attr("dropout_prob"); const int& hidden_size = ctx.Attr("hidden_size"); const std::string& mode = ctx.Attr("mode"); @@ -285,16 +244,6 @@ class RnnXPUGradKernel : public framework::OpKernel { platform::errors::InvalidArgument( "XPU only support LSTM mode now, current mode is %s", mode)); - PADDLE_ENFORCE_EQ(is_bidirec, false, - platform::errors::InvalidArgument( - "XPU only support unidirectional LSTM now")); - - PADDLE_ENFORCE_EQ( - num_layers, 1, - platform::errors::InvalidArgument( - "XPU only support 1 layer LSTM now, current layer num is %s", - num_layers)); - auto init_h = pre_state[0]; auto init_c = pre_state[1]; @@ -317,11 +266,12 @@ class RnnXPUGradKernel : public framework::OpKernel { } // check shape - int seq_len = input->dims()[0]; - int batch_size = input->dims()[1]; - int input_dim = input->dims()[2]; + const int& seq_len = input->dims()[0]; + const int& batch_size = input->dims()[1]; + const int& input_dim = input->dims()[2]; + const int& direction_num = is_bidirec ? 2 : 1; PADDLE_ENFORCE_EQ( - init_h->dims()[0], num_layers, + init_h->dims()[0], num_layers * direction_num, platform::errors::InvalidArgument("The num_layers of in RNN layer must" " be the same as first dim of init " "hidden, but received num_layers:%d," @@ -329,7 +279,7 @@ class RnnXPUGradKernel : public framework::OpKernel { num_layers, init_h->dims()[0])); PADDLE_ENFORCE_EQ( - init_c->dims()[0], num_layers, + init_c->dims()[0], num_layers * direction_num, platform::errors::InvalidArgument( "The num_layers of in RNN layer must" " be the same as first dim of cell state hidden, but received" @@ -351,52 +301,165 @@ class RnnXPUGradKernel : public framework::OpKernel { // allocate the memory and initization the input_grad input_grad->mutable_data(input->dims(), ctx.GetPlace()); + auto& dev_ctx = ctx.template device_context(); + phi::funcs::SetConstant zero; + zero(dev_ctx, input_grad, static_cast(0.0)); + + Tensor a, b; + Tensor* dynamic_grad_pre_h = &a; + Tensor* dynamic_grad_pre_c = &b; if (init_h_grad) { - init_h_grad->mutable_data(init_h->dims(), ctx.GetPlace()); + init_h_grad->mutable_data(last_h_grad->dims(), ctx.GetPlace()); + zero(dev_ctx, init_h_grad, static_cast(0.0)); + } else { + dynamic_grad_pre_h->Resize(last_h_grad->dims()); + dynamic_grad_pre_h->mutable_data(ctx.GetPlace()); + zero(dev_ctx, dynamic_grad_pre_h, static_cast(0.0)); + init_h_grad = dynamic_grad_pre_h; } if (init_c_grad) { - init_c_grad->mutable_data(init_c->dims(), ctx.GetPlace()); + init_c_grad->mutable_data(last_c_grad->dims(), ctx.GetPlace()); + } else { + dynamic_grad_pre_c->Resize(last_h_grad->dims()); + dynamic_grad_pre_c->mutable_data(ctx.GetPlace()); + init_c_grad = dynamic_grad_pre_c; + } + + Tensor temp_input_grad_1, temp_input_grad_2; + T* input_grad_1_ptr = nullptr; + T* input_grad_2_ptr = nullptr; + if (num_layers >= 2) { + temp_input_grad_1.Resize(output_grad->dims()); + input_grad_1_ptr = temp_input_grad_1.mutable_data(ctx.GetPlace()); + } + if (num_layers >= 3) { + temp_input_grad_2.Resize(output_grad->dims()); + input_grad_2_ptr = temp_input_grad_2.mutable_data(ctx.GetPlace()); } // get ptr from tensor auto x = input->data(); - auto h_0 = init_h->data(); - auto c_0 = init_c->data(); - auto w_x = parameter_lists[0][0]; - auto w_h = parameter_lists[0][1]; + auto init_h_ptr = init_h->data(); + auto init_c_ptr = init_c->data(); auto y = output->data(); auto y_grad = output_grad->data(); auto last_h_grad_ptr = last_h_grad->data(); auto last_c_grad_ptr = last_c_grad->data(); auto x_grad = input_grad->data(); - auto h_0_grad = init_h_grad ? init_h_grad->data() : nullptr; - auto c_0_grad = init_c_grad ? init_c_grad->data() : nullptr; - auto w_x_grad = parameter_lists_grad[0][0]; - auto w_h_grad = parameter_lists_grad[0][1]; - auto b_x_grad = parameter_lists_grad[0][2]; - auto b_h_grad = parameter_lists_grad[0][3]; - auto i_f_g_o = reserve_data->data(); - auto c = i_f_g_o + seq_len * batch_size * hidden_size * 4; + auto init_h_grad_ptr = init_h_grad->data(); + auto init_c_grad_ptr = init_c_grad->data(); + const int& block_size = direction_num * seq_len * batch_size * hidden_size; + auto i_f_g_o_ptr = reserve_data->data(); + auto c_ptr = i_f_g_o_ptr + num_layers * block_size * 4; + auto hidden_data_ptr = c_ptr + num_layers * block_size * 1; + int state_offset = pre_state[0]->dims()[1] * pre_state[0]->dims()[2]; std::vector seq_len_tensor(batch_size, seq_len); if (has_seq_length) { seq_len_tensor = operators::GetDataFromTensor(sequence_length); } - auto& dev_ctx = ctx.template device_context(); - int r = xpu::lstm_grad( - dev_ctx.x_context(), (const T*)x, (const T*)h_0, (const T*)c_0, - (const T*)w_x, (const T*)w_h, (const T*)y, (const T*)y_grad, - (const T*)last_h_grad_ptr, (const T*)last_c_grad_ptr, - reinterpret_cast(x_grad), reinterpret_cast(h_0_grad), - reinterpret_cast(c_0_grad), w_x_grad, w_h_grad, b_x_grad, b_h_grad, - batch_size, input_dim, hidden_size, seq_len, seq_len_tensor, nullptr, - nullptr, nullptr, nullptr, i_f_g_o, c); - PADDLE_ENFORCE_EQ( - r, xpu::Error_t::SUCCESS, - platform::errors::External("RnnXPUGrad(lstm) return wrong " - "value[%d %s]", - r, XPUAPIErrorMsg[r])); + for (int i = num_layers - 1; i >= 0; --i) { + // the layer input output had saved, just use the data + auto w_x = parameter_lists[i][0]; + auto w_h = parameter_lists[i][1]; + auto bw_x = parameter_lists[i][4]; + auto bw_h = parameter_lists[i][5]; + + auto i_f_g_o = i_f_g_o_ptr + i * block_size * 4; + auto c = c_ptr + i * block_size; + + Tensor layer_input_t; + auto layer_input = x; + if (i > 0) { + layer_input_t.Resize(output->dims()); + layer_input = layer_input_t.mutable_data(ctx.GetPlace()); + float scale = static_cast(1.0f - dropout_prob); + auto hidden_data = hidden_data_ptr + (i - 1) * block_size; + int r = xpu::scale(dev_ctx.x_context(), + reinterpret_cast(hidden_data), + const_cast(layer_input), output->numel(), + false, scale, 0.0f); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale"); + } else { + layer_input = x; + } + + auto layer_output = y; + if (i == num_layers - 1) { + layer_output = y; + } else { + layer_output = hidden_data_ptr + i * block_size; + } + + const T* cur_input_ptr = nullptr; + if (i == num_layers - 1) { + cur_input_ptr = y_grad; + } else if (i % 2 != 0) { + cur_input_ptr = input_grad_2_ptr; + } else { + cur_input_ptr = input_grad_1_ptr; + } + + T* cur_output_ptr = nullptr; + int cur_xdim = -1; + if (i == 0) { + cur_output_ptr = x_grad; + cur_xdim = input_dim; + } else if (i % 2 != 0) { + cur_output_ptr = input_grad_1_ptr; + cur_xdim = is_bidirec ? 2 * hidden_size : hidden_size; + } else { + cur_output_ptr = input_grad_2_ptr; + cur_xdim = is_bidirec ? 2 * hidden_size : hidden_size; + } + + auto w_x_grad = parameter_lists_grad[i][0]; + auto w_h_grad = parameter_lists_grad[i][1]; + auto b_x_grad = parameter_lists_grad[i][2]; + auto b_h_grad = parameter_lists_grad[i][3]; + + auto h_0 = init_h_ptr + direction_num * i * state_offset; + auto c_0 = init_c_ptr + direction_num * i * state_offset; + + auto h_0_grad = init_h_grad_ptr + direction_num * i * state_offset; + auto c_0_grad = init_c_grad_ptr + direction_num * i * state_offset; + auto h_t_grad = last_h_grad_ptr + direction_num * i * state_offset; + auto c_t_grad = last_c_grad_ptr + direction_num * i * state_offset; + + if (is_bidirec) { + auto bw_x_grad = parameter_lists_grad[i][4]; + auto bw_h_grad = parameter_lists_grad[i][5]; + auto bb_x_grad = parameter_lists_grad[i][6]; + auto bb_h_grad = parameter_lists_grad[i][7]; + + int r = xpu::bilstm_grad( + dev_ctx.x_context(), (const T*)layer_input, (const T*)h_0, + (const T*)c_0, (const T*)w_x, (const T*)w_h, (const T*)bw_x, + (const T*)bw_h, (const T*)layer_output, (const T*)cur_input_ptr, + (const T*)h_t_grad, (const T*)c_t_grad, + reinterpret_cast(cur_output_ptr), + reinterpret_cast(h_0_grad), reinterpret_cast(c_0_grad), + w_x_grad, w_h_grad, b_x_grad, b_h_grad, bw_x_grad, bw_h_grad, + bb_x_grad, bb_h_grad, batch_size, cur_xdim, hidden_size, seq_len, + seq_len_tensor, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, i_f_g_o, c); + + PADDLE_ENFORCE_XDNN_SUCCESS(r, "bilstm_grad"); + } else { + int r = xpu::lstm_grad( + dev_ctx.x_context(), (const T*)layer_input, (const T*)h_0, + (const T*)c_0, (const T*)w_x, (const T*)w_h, (const T*)layer_output, + (const T*)cur_input_ptr, (const T*)h_t_grad, (const T*)c_t_grad, + reinterpret_cast(cur_output_ptr), + reinterpret_cast(h_0_grad), reinterpret_cast(c_0_grad), + w_x_grad, w_h_grad, b_x_grad, b_h_grad, batch_size, cur_xdim, + hidden_size, seq_len, seq_len_tensor, nullptr, nullptr, nullptr, + nullptr, i_f_g_o, c); + + PADDLE_ENFORCE_XDNN_SUCCESS(r, "lstm_grad"); + } + } } }; diff --git a/paddle/fluid/platform/device/xpu/xpu2_op_list.h b/paddle/fluid/platform/device/xpu/xpu2_op_list.h index 897183f2cf..2f3688dca2 100644 --- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h +++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h @@ -296,6 +296,7 @@ XPUOpMap& get_kl2_ops() { pOpKernelType(vartype::BOOL, XPUPlace()), pOpKernelType(vartype::FP32, XPUPlace())})}, {"rnn", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"rnn_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"roi_align", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"roi_align_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, diff --git a/python/paddle/fluid/tests/unittests/xpu/test_rnn_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_rnn_op_xpu.py index e0d208644e..52d37f5caf 100755 --- a/python/paddle/fluid/tests/unittests/xpu/test_rnn_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_rnn_op_xpu.py @@ -48,6 +48,10 @@ class XPUTestRNNOp(XPUOpTestWrapper): self.place = paddle.XPUPlace(0) self.sequence_length = np.ones( (self.batch_size, ), dtype=np.int32) * self.seq_length + #self.sequence_length = np.array( + # [12, 11, 10, 9, 8], dtype=np.int32) + self.num_layers = 1 + self.is_bidirec = False self.set_attrs() self.mode = "LSTM" self.is_test = False @@ -61,6 +65,10 @@ class XPUTestRNNOp(XPUOpTestWrapper): high=0.1, size=(self.seq_length, self.batch_size, self.input_size)).astype(self.dtype) + input[11][1:][:] = 0 + input[10][2:][:] = 0 + input[9][3:][:] = 0 + input[8][4:][:] = 0 rnn1 = LSTM( self.input_size, @@ -117,7 +125,7 @@ class XPUTestRNNOp(XPUOpTestWrapper): def set_xpu(self): self.__class__.use_xpu = True - self.__class__.no_need_check_grad = True + self.__class__.no_need_check_grad = False self.__class__.op_type = self.in_type def test_check_output(self): @@ -125,11 +133,20 @@ class XPUTestRNNOp(XPUOpTestWrapper): self.place, atol=0.01, no_check_set=['Reserve', 'DropoutState']) + def test_grad(self): + if not self.is_test: + var_name_list = self.get_weight_names() + grad_check_list = ['Input', 'init_h', 'init_c'] + grad_check_list.extend(var_name_list) + self.check_grad_with_place(self.place, + set(grad_check_list), + ['Out', 'last_hidden', 'last_cell']) + def init_size(self): - self.seq_length = 1 - self.batch_size = 1 - self.input_size = 5 - self.hidden_size = 16 + self.seq_length = 12 + self.batch_size = 5 + self.input_size = 3 + self.hidden_size = 2 def get_weight_names(self): weight_names = [] @@ -142,38 +159,24 @@ class XPUTestRNNOp(XPUOpTestWrapper): return weight_names def set_attrs(self): - self.num_layers = 1 - self.is_bidirec = False + self.num_layers = 2 + self.is_bidirec = True - class TestRNNOp1(TestRNNOp): - def init_size(self): - self.seq_length = 2 - self.batch_size = 4 - self.input_size = 10 - self.hidden_size = 32 + class TestRNNOp0(TestRNNOp): + def set_attrs(self): + self.sequence_length = None + class TestRNNOp1(TestRNNOp): def set_attrs(self): self.num_layers = 1 self.is_bidirec = False class TestRNNOp2(TestRNNOp): - def init_size(self): - self.seq_length = 5 - self.batch_size = 16 - self.input_size = 30 - self.hidden_size = 64 - def set_attrs(self): self.num_layers = 1 self.is_bidirec = True class TestRNNOp3(TestRNNOp): - def init_size(self): - self.seq_length = 10 - self.batch_size = 64 - self.input_size = 50 - self.hidden_size = 64 - def set_attrs(self): self.num_layers = 2 self.is_bidirec = False @@ -188,6 +191,17 @@ class XPUTestRNNOp(XPUOpTestWrapper): self.num_layers = 2 self.is_bidirec = True + class TestRNNOp6(TestRNNOp): + def set_attrs(self): + self.num_layers = 2 + self.is_bidirec = True + self.sequence_length = None + + class TestRNNOp7(TestRNNOp): + def set_attrs(self): + self.num_layers = 3 + self.is_bidirec = True + support_types = get_xpu_op_support_types('rnn') for stype in support_types: -- GitLab