From d746197398c9e1b298b1278b777c799e469e42b6 Mon Sep 17 00:00:00 2001 From: xiayanming <41795079@qq.com> Date: Fri, 12 Mar 2021 14:15:42 +0800 Subject: [PATCH] [NPU] Support npu kernel for gather op fix bug (#31541) * add gather npu op * code review done * update python new line * precommit * fix review * del commit * update gather_grad * fix bug * fix bug --- paddle/fluid/operators/gather_op_npu.cc | 7 +++++-- paddle/fluid/operators/gather_op_npu_test.cc | 10 +++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/operators/gather_op_npu.cc b/paddle/fluid/operators/gather_op_npu.cc index 2d7b5b93ad..cf0d9cda34 100644 --- a/paddle/fluid/operators/gather_op_npu.cc +++ b/paddle/fluid/operators/gather_op_npu.cc @@ -51,10 +51,13 @@ class GatherGradOpNPUKernel : public framework::OpKernel { auto *dx = ctx.Output(framework::GradVarName("X")); // step1: Unsqueeze index + framework::Tensor tmp_tensor(index->type()); const auto index_dims = index->dims(); if (index_dims.size() == 1) { - framework::Tensor tmp_index = UnsqueezeTo(*index, 2); - index = &tmp_index; + tmp_tensor.ShareDataWith(*index); + std::vector new_dim = {index_dims[0], 1}; + tmp_tensor.Resize(framework::make_ddim(new_dim)); + index = &tmp_tensor; } auto stream = diff --git a/paddle/fluid/operators/gather_op_npu_test.cc b/paddle/fluid/operators/gather_op_npu_test.cc index 4cd46da6f2..de067e4558 100644 --- a/paddle/fluid/operators/gather_op_npu_test.cc +++ b/paddle/fluid/operators/gather_op_npu_test.cc @@ -109,17 +109,17 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx, auto dout = scope->Var("DOut"); auto tensor_dout = dout->GetMutable(); - std::vector init_index = {0, 1, 2, 0}; + std::vector init_index = {0, 1}; paddle::framework::TensorFromVector(init_index, ctx, tensor_index); - tensor_index->Resize(paddle::framework::make_ddim({2, 2})); + tensor_index->Resize(paddle::framework::make_ddim({2})); std::vector init_x = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; TensorFromVector(init_x, ctx, tensor_x); tensor_x->Resize(paddle::framework::make_ddim({3, 2})); - std::vector init_dout = {5.0, 10.0}; + std::vector init_dout = {5.0, 10.0, 2.0, 3.0}; TensorFromVector(init_dout, ctx, tensor_dout); - tensor_dout->Resize(paddle::framework::make_ddim({2})); + tensor_dout->Resize(paddle::framework::make_ddim({2, 2})); ctx.Wait(); @@ -143,7 +143,7 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx, uint32_t expected_size = 3 * 2; EXPECT_EQ((uint32_t)dx_vec.size(), expected_size); - std::vector expected_dx_vec = {0.0, 5.0, 0.0, 0.0, 10.0, 0.0}; + std::vector expected_dx_vec = {5.0, 10.0, 2.0, 3.0, 0.0, 0.0}; for (uint32_t i = 0; i < dx_vec.size(); i++) { VLOG(3) << "dx_vec[i]=" << dx_vec[i]; EXPECT_EQ(dx_vec[i], expected_dx_vec[i]); -- GitLab