From a66b9fba3b5ada77ef5c3cc1b8e398395676a730 Mon Sep 17 00:00:00 2001 From: Aganlengzi Date: Thu, 30 Sep 2021 14:18:24 +0800 Subject: [PATCH] [NPU] modify transpose2 and index_select_grad kernels for model xlnet (#36214) * [NPU] modify transpose2 and index_select_grad kernels for model xlnet * add transpose2 int64_t unit test * add more transpose2 unit tests * update test_transpose_op_npu.py --- paddle/fluid/operators/index_select_op_npu.cc | 17 ++-- paddle/fluid/operators/transpose_op_npu.cc | 21 +++- .../unittests/npu/test_transpose_op_npu.py | 98 +++++++++++++++---- 3 files changed, 107 insertions(+), 29 deletions(-) diff --git a/paddle/fluid/operators/index_select_op_npu.cc b/paddle/fluid/operators/index_select_op_npu.cc index b624d03cc85..825229282f3 100644 --- a/paddle/fluid/operators/index_select_op_npu.cc +++ b/paddle/fluid/operators/index_select_op_npu.cc @@ -99,10 +99,11 @@ class IndexSelectGradNPUKernel : public framework::OpKernel { transed_out_dims[i] = out_dims[in_trans_perm[i]]; } transed_out_grad.mutable_data(transed_out_dims, ctx.GetPlace()); - framework::NPUAttributeMap in_trans_attr = {{"perm", in_trans_perm}}; - - const auto& in_trans_runner = NpuOpRunner( - "TransposeD", {*out_grad}, {transed_out_grad}, in_trans_attr); + NpuOpRunner in_trans_runner; + in_trans_runner.SetType("Transpose") + .AddInput(*out_grad) + .AddInput(std::move(in_trans_perm)) + .AddOutput(transed_out_grad); in_trans_runner.Run(stream); Tensor sum_out; @@ -133,10 +134,12 @@ class IndexSelectGradNPUKernel : public framework::OpKernel { for (int i = 1 + dim; i < x_dims.size(); ++i) { out_trans_perm.push_back(i); } - framework::NPUAttributeMap out_trans_attr = {{"perm", out_trans_perm}}; x_grad->mutable_data(ctx.GetPlace()); - const auto& out_trans_runner = - NpuOpRunner("TransposeD", {sum_out}, {*x_grad}, out_trans_attr); + NpuOpRunner out_trans_runner; + out_trans_runner.SetType("Transpose") + .AddInput(sum_out) + .AddInput(std::move(out_trans_perm)) + .AddOutput(*x_grad); out_trans_runner.Run(stream); } } diff --git a/paddle/fluid/operators/transpose_op_npu.cc b/paddle/fluid/operators/transpose_op_npu.cc index 035ad5f3f31..7cc68e93c5d 100644 --- a/paddle/fluid/operators/transpose_op_npu.cc +++ b/paddle/fluid/operators/transpose_op_npu.cc @@ -27,9 +27,12 @@ class TransposeNPUKernel : public framework::OpKernel { auto* x = ctx.Input("X"); auto* out = ctx.Output("Out"); std::vector axis = ctx.Attr>("axis"); - framework::NPUAttributeMap attr_input = {{"perm", axis}}; out->mutable_data(ctx.device_context().GetPlace()); - const auto& runner = NpuOpRunner("TransposeD", {*x}, {*out}, attr_input); + NpuOpRunner runner; + runner.SetType("Transpose") + .AddInput(*x) + .AddInput(std::move(axis)) + .AddOutput(*out); auto stream = ctx.template device_context() .stream(); @@ -51,9 +54,11 @@ class TransposeGradNPUKernel : public framework::OpKernel { reversed_axis[axis[i]] = i; } x_grad->mutable_data(ctx.GetPlace()); - framework::NPUAttributeMap attr_input = {{"perm", reversed_axis}}; - const auto& runner = - NpuOpRunner("TransposeD", {*out_grad}, {*x_grad}, attr_input); + NpuOpRunner runner; + runner.SetType("Transpose") + .AddInput(*out_grad) + .AddInput(std::move(reversed_axis)) + .AddOutput(*x_grad); auto stream = ctx.template device_context() .stream(); @@ -72,11 +77,17 @@ REGISTER_OP_NPU_KERNEL( ops::TransposeNPUKernel, ops::TransposeNPUKernel, +#ifdef PADDLE_WITH_ASCEND_INT64 + ops::TransposeNPUKernel, +#endif ops::TransposeNPUKernel, ops::TransposeNPUKernel); REGISTER_OP_NPU_KERNEL(transpose2_grad, ops::TransposeGradNPUKernel, ops::TransposeGradNPUKernel, ops::TransposeGradNPUKernel, +#ifdef PADDLE_WITH_ASCEND_INT64 + ops::TransposeGradNPUKernel, +#endif ops::TransposeGradNPUKernel, ops::TransposeGradNPUKernel); diff --git a/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py index e95f3cc83cf..b1a6bfcdaaa 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py @@ -31,40 +31,104 @@ class TestTransposeOp(OpTest): self.op_type = "transpose2" self.place = paddle.NPUPlace(0) self.init_dtype() - self.init_input_output() - self.init_kernel_type() - self.init_axis() + self.init_shape_axis() - self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(self.x)} - self.attrs = {'axis': [0, 2, 1, 3], 'data_format': 'AnyLayout'} - self.outputs = {'Out': self.out} + self.inputs = {'X': np.random.random(self.shape).astype(self.dtype)} + self.attrs = {'axis': self.axis, 'data_format': 'AnyLayout'} + self.outputs = {'Out': self.inputs['X'].transpose(self.axis)} def set_npu(self): self.__class__.use_npu = True - def init_kernel_type(self): - self.use_mkldnn = False - - def init_input_output(self): - self.x = np.random.uniform(0.1, 1, [8, 512, 12, 64]).astype(self.dtype) - self.out = np.transpose(self.x, [0, 2, 1, 3]) - def init_dtype(self): self.dtype = np.float32 - def init_axis(self): - self.axis = -1 + def init_shape_axis(self): + self.shape = (3, 40) + self.axis = (1, 0) def test_check_output(self): self.check_output_with_place(self.place) + def test_check_grad(self): + self.check_grad_with_place(self.place, ['X'], 'Out') + + +class TestCase0(TestTransposeOp): + def init_shape_axis(self): + self.shape = (100, ) + self.axis = (0, ) + + +class TestCase1(TestTransposeOp): + def init_shape_axis(self): + self.shape = (3, 4, 10) + self.axis = (0, 2, 1) + + +class TestCase2(TestTransposeOp): + def init_shape_axis(self): + self.shape = (2, 3, 4, 5) + self.axis = (0, 2, 3, 1) + + +class TestCase3(TestTransposeOp): + def init_shape_axis(self): + self.shape = (2, 3, 4, 5, 6) + self.axis = (4, 2, 3, 1, 0) + + +class TestCase4(TestTransposeOp): + def init_shape_axis(self): + self.shape = (2, 3, 4, 5, 6, 1) + self.axis = (4, 2, 3, 1, 0, 5) + + +class TestCase5(TestTransposeOp): + def init_shape_axis(self): + self.shape = (2, 16, 96) + self.axis = (0, 2, 1) -class TestTransposeOpFP16(TestTransposeOp): - no_need_check_grad = True +class TestCase6(TestTransposeOp): + def init_shape_axis(self): + self.shape = (2, 10, 12, 16) + self.axis = (3, 1, 2, 0) + + +class TestCase7(TestTransposeOp): + def init_shape_axis(self): + self.shape = (2, 10, 2, 16) + self.axis = (0, 1, 3, 2) + + +class TestCase8(TestTransposeOp): + def init_shape_axis(self): + self.shape = (2, 3, 2, 3, 2, 4, 3, 3) + self.axis = (0, 1, 3, 2, 4, 5, 6, 7) + + +class TestCase9(TestTransposeOp): + def init_shape_axis(self): + self.shape = (2, 3, 2, 3, 2, 4, 3, 3) + self.axis = (6, 1, 3, 5, 0, 2, 4, 7) + + +class TestTransposeOpFP16(TestTransposeOp): def init_dtype(self): self.dtype = np.float16 + def test_check_grad(self): + pass + + +class TestTransposeOpInt64(TestTransposeOp): + def init_dtype(self): + self.dtype = np.int64 + + def test_check_grad(self): + pass + if __name__ == '__main__': unittest.main() -- GitLab