From 342252c90206e1fa56cc8ad6e8106632fb827bc3 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Fri, 19 Mar 2021 12:36:11 +0800 Subject: [PATCH] [NPU] change transpose to transpose2 (#31734) * change transpose to transpose2 * fix bug --- paddle/fluid/operators/transpose_op_npu.cc | 74 +++++++++---------- .../fluid/operators/transpose_op_npu_test.cc | 70 ++++++++---------- .../unittests/npu/test_transpose_op_npu.py | 2 +- 3 files changed, 69 insertions(+), 77 deletions(-) diff --git a/paddle/fluid/operators/transpose_op_npu.cc b/paddle/fluid/operators/transpose_op_npu.cc index 2d71bfdc725..994b8e534f8 100644 --- a/paddle/fluid/operators/transpose_op_npu.cc +++ b/paddle/fluid/operators/transpose_op_npu.cc @@ -9,75 +9,73 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef PADDLE_WITH_ASCEND_CL +#include #include #include -#include -#include "paddle/fluid/operators/npu_op_runner.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/expand_op.h" +#include "paddle/fluid/operators/npu_op_runner.h" namespace paddle { namespace operators { template class TransposeNPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); - std::vector axis = ctx.Attr>("axis"); - framework::NPUAttributeMap attr_input = {{"perm", axis}}; - out->mutable_data(ctx.device_context().GetPlace()); - auto runner = NpuOpRunner("TransposeD", {*x}, {*out}, attr_input); - auto stream = ctx.template device_context().stream(); - runner.Run(stream); - - } + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); + std::vector axis = ctx.Attr>("axis"); + framework::NPUAttributeMap attr_input = {{"perm", axis}}; + out->mutable_data(ctx.device_context().GetPlace()); + auto runner = NpuOpRunner("TransposeD", {*x}, {*out}, attr_input); + auto stream = + ctx.template device_context() + .stream(); + runner.Run(stream); + } }; template class TransposeGradNPUKernel : public framework::OpKernel { public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto* out_grad = ctx.Input(framework::GradVarName("Out")); - auto* x_grad = ctx.Output(framework::GradVarName("X")); + void Compute(const framework::ExecutionContext& ctx) const override { + auto* out_grad = + ctx.Input(framework::GradVarName("Out")); + auto* x_grad = + ctx.Output(framework::GradVarName("X")); std::vector axis = ctx.Attr>("axis"); std::vector reversed_axis(axis); for (size_t i = 0; i < axis.size(); i++) { reversed_axis[axis[i]] = i; } - + x_grad->mutable_data(ctx.GetPlace()); framework::NPUAttributeMap attr_input = {{"perm", reversed_axis}}; auto runner = NpuOpRunner("TransposeD", {*out_grad}, {*x_grad}, attr_input); - auto stream = ctx.template device_context().stream(); + auto stream = + ctx.template device_context() + .stream(); runner.Run(stream); } }; -} -} +} // namespace operators +} // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_NPU_KERNEL(transpose, +REGISTER_OP_NPU_KERNEL( + transpose2, ops::TransposeNPUKernel, - ops::TransposeNPUKernel, + ops::TransposeNPUKernel, ops::TransposeNPUKernel, ops::TransposeNPUKernel, - ops::TransposeNPUKernel -); - -REGISTER_OP_NPU_KERNEL(transpose_grad, - ops::TransposeGradNPUKernel, - ops::TransposeGradNPUKernel, - ops::TransposeGradNPUKernel, - ops::TransposeGradNPUKernel, - ops::TransposeGradNPUKernel -); - - - -#endif + ops::TransposeNPUKernel); +REGISTER_OP_NPU_KERNEL(transpose2_grad, ops::TransposeGradNPUKernel, + ops::TransposeGradNPUKernel, + ops::TransposeGradNPUKernel, + ops::TransposeGradNPUKernel, + ops::TransposeGradNPUKernel); diff --git a/paddle/fluid/operators/transpose_op_npu_test.cc b/paddle/fluid/operators/transpose_op_npu_test.cc index c7a791956fb..36f7a695358 100644 --- a/paddle/fluid/operators/transpose_op_npu_test.cc +++ b/paddle/fluid/operators/transpose_op_npu_test.cc @@ -13,12 +13,12 @@ limitations under the License. */ #include #endif -#include #include +#include +#include +#include #include // NOLINT #include -#include -#include #include "gtest/gtest.h" #include "paddle/fluid/framework/op_registry.h" @@ -32,17 +32,18 @@ namespace f = paddle::framework; namespace p = paddle::platform; namespace m = paddle::operators::math; -USE_OP(transpose); -USE_OP_DEVICE_KERNEL(transpose, NPU); - +USE_OP(transpose2); +USE_OP_DEVICE_KERNEL(transpose2, NPU); template void Compare(f::Scope* scope, const p::DeviceContext& ctx) { - // init + // init auto x = scope->Var("X"); auto out = scope->Var("Out"); + auto xshape = scope->Var("XShape"); auto* x_t = x->GetMutable(); auto* out_t = out->GetMutable(); + auto* xshape_t = xshape->GetMutable(); auto place = ctx.GetPlace(); int dim0 = 2; @@ -54,12 +55,13 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { ctx.Wait(); out_t->mutable_data(place); ctx.Wait(); - f::AttributeMap attrs = { - {"axis", std::vector({1, 0})}, - {"data_format", std::string("AnyLayout")} - }; - auto op = f::OpRegistry::CreateOp("transpose", {{"X", {"X"}}}, - {{"Out", {"Out"}}}, attrs); + xshape_t->Resize({dim0, dim1}); + xshape_t->mutable_data(place); + f::AttributeMap attrs = {{"axis", std::vector({1, 0})}, + {"data_format", std::string("AnyLayout")}}; + auto op = f::OpRegistry::CreateOp("transpose2", {{"X", {"X"}}}, + {{"Out", {"Out"}}, {"XShape", {"XShape"}}}, + attrs); ctx.Wait(); op->Run(*scope, place); ctx.Wait(); @@ -76,47 +78,42 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { EXPECT_EQ(out_v[5], 5); } - template void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { - // init - auto x = scope->Var("X"); + // init + auto xshape = scope->Var("XShape"); auto x_grad = scope->Var("X@GRAD"); - auto out = scope->Var("Out"); auto out_grad = scope->Var("Out@GRAD"); auto* x_grad_t = x_grad->GetMutable(); - auto* x_t = x->GetMutable(); + auto* xshape_t = xshape->GetMutable(); auto* out_grad_t = out_grad->GetMutable(); - auto* out_t = out->GetMutable(); + int dim0 = 2; int dim1 = 3; auto place = ctx.GetPlace(); TensorFromVector(std::vector({0, 1, 2, 3, 4, 5}), ctx, out_grad_t); - TensorFromVector(std::vector({0, 1, 2, 3, 4, 5}), ctx, x_t); ctx.Wait(); + x_grad_t->Resize({dim0, dim1}); - x_t->Resize({dim0, dim1}); + xshape_t->Resize( + {0, dim0, + dim1}); // NOTE(zhiqiu): 0 is needed, see its infershape function out_grad_t->Resize({dim0, dim1}); - out_t->Resize({dim0, dim1}); - x_grad_t->mutable_data(place); - out_t->mutable_data(place); - ctx.Wait(); - f::AttributeMap attrs = { - {"axis", std::vector({1, 0})}, - {"data_format", std::string("AnyLayout")} - }; + f::AttributeMap attrs = {{"axis", std::vector({1, 0})}, + {"data_format", std::string("AnyLayout")}}; + auto op = f::OpRegistry::CreateOp( - "transpose_grad", - {{"Out@GRAD", {"Out@GRAD"}}, {"X", {"X"}}, {"Out", {"Out"}}}, + "transpose2_grad", {{"Out@GRAD", {"Out@GRAD"}}, {"XShape", {"XShape"}}}, {{"X@GRAD", {"X@GRAD"}}}, attrs); + op->Run(*scope, place); - ctx.Wait(); + ctx.Wait(); std::vector out_v; TensorToVector(*x_grad_t, ctx, &out_v); - ctx.Wait(); + ctx.Wait(); EXPECT_EQ(x_grad_t->numel(), dim0 * dim1); EXPECT_EQ(out_v[0], 0); @@ -125,19 +122,16 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { EXPECT_EQ(out_v[3], 4); EXPECT_EQ(out_v[4], 2); EXPECT_EQ(out_v[5], 5); - } - -TEST(transpose, NPU_fp32) { +TEST(transpose2, NPU_fp32) { f::Scope scope; p::NPUDeviceContext ctx(p::NPUPlace(0)); Compare(&scope, ctx); } -TEST(transpose_grad, NPU_fp32) { +TEST(transpose2_grad, NPU_fp32) { f::Scope scope; p::NPUDeviceContext ctx(p::NPUPlace(0)); CompareGrad(&scope, ctx); } - diff --git a/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py index 797531a6c0f..17f6a0ae1ca 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py @@ -30,7 +30,7 @@ paddle.enable_static() class TestTransposeOp(OpTest): def setUp(self): self.set_npu() - self.op_type = "transpose" + self.op_type = "transpose2" self.place = paddle.NPUPlace(0) self.init_dtype() self.init_input_output() -- GitLab