From 7875bcb8f76da4c2d7c8ad4099d85a7b60d41591 Mon Sep 17 00:00:00 2001 From: Meiyim Date: Wed, 17 Mar 2021 20:45:35 +0800 Subject: [PATCH] [NPU] npu support `transpose` (#31486) --- paddle/fluid/operators/CMakeLists.txt | 4 + paddle/fluid/operators/transpose_op_npu.cc | 83 ++++++++++ .../fluid/operators/transpose_op_npu_test.cc | 143 ++++++++++++++++++ .../unittests/npu/test_transpose_op_npu.py | 74 +++++++++ 4 files changed, 304 insertions(+) create mode 100644 paddle/fluid/operators/transpose_op_npu.cc create mode 100644 paddle/fluid/operators/transpose_op_npu_test.cc create mode 100644 python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 78509b14528..6fe18f24794 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -167,6 +167,10 @@ set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") add_subdirectory(benchmark) cc_test(op_debug_string_test SRCS op_debug_string_test.cc DEPS elementwise_add_op) +if (WITH_ASCEND_CL) + cc_test(transpose_op_npu_test SRCS transpose_op_npu_test.cc DEPS op_registry transpose_op scope device_context enforce executor) +endif() + if(WITH_MKLDNN) include(mkldnn/inplace_op_tests.cmake) diff --git a/paddle/fluid/operators/transpose_op_npu.cc b/paddle/fluid/operators/transpose_op_npu.cc new file mode 100644 index 00000000000..2d71bfdc725 --- /dev/null +++ b/paddle/fluid/operators/transpose_op_npu.cc @@ -0,0 +1,83 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_ASCEND_CL +#include +#include +#include + +#include "paddle/fluid/operators/npu_op_runner.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/expand_op.h" + +namespace paddle { +namespace operators { + +template +class TransposeNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); + std::vector axis = ctx.Attr>("axis"); + framework::NPUAttributeMap attr_input = {{"perm", axis}}; + out->mutable_data(ctx.device_context().GetPlace()); + auto runner = NpuOpRunner("TransposeD", {*x}, {*out}, attr_input); + auto stream = ctx.template device_context().stream(); + runner.Run(stream); + + } +}; + +template +class TransposeGradNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + auto* out_grad = ctx.Input(framework::GradVarName("Out")); + auto* x_grad = ctx.Output(framework::GradVarName("X")); + std::vector axis = ctx.Attr>("axis"); + std::vector reversed_axis(axis); + for (size_t i = 0; i < axis.size(); i++) { + reversed_axis[axis[i]] = i; + } + + framework::NPUAttributeMap attr_input = {{"perm", reversed_axis}}; + auto runner = NpuOpRunner("TransposeD", {*out_grad}, {*x_grad}, attr_input); + auto stream = ctx.template device_context().stream(); + runner.Run(stream); + } +}; + +} +} + +namespace ops = paddle::operators; + +REGISTER_OP_NPU_KERNEL(transpose, + ops::TransposeNPUKernel, + ops::TransposeNPUKernel, + ops::TransposeNPUKernel, + ops::TransposeNPUKernel, + ops::TransposeNPUKernel +); + +REGISTER_OP_NPU_KERNEL(transpose_grad, + ops::TransposeGradNPUKernel, + ops::TransposeGradNPUKernel, + ops::TransposeGradNPUKernel, + ops::TransposeGradNPUKernel, + ops::TransposeGradNPUKernel +); + + + +#endif + diff --git a/paddle/fluid/operators/transpose_op_npu_test.cc b/paddle/fluid/operators/transpose_op_npu_test.cc new file mode 100644 index 00000000000..c7a791956fb --- /dev/null +++ b/paddle/fluid/operators/transpose_op_npu_test.cc @@ -0,0 +1,143 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifndef _WIN32 +#include +#endif + +#include +#include +#include // NOLINT +#include +#include +#include + +#include "gtest/gtest.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/operators/dropout_op.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/string/printf.h" + +namespace f = paddle::framework; +namespace p = paddle::platform; +namespace m = paddle::operators::math; + +USE_OP(transpose); +USE_OP_DEVICE_KERNEL(transpose, NPU); + + +template +void Compare(f::Scope* scope, const p::DeviceContext& ctx) { + // init + auto x = scope->Var("X"); + auto out = scope->Var("Out"); + auto* x_t = x->GetMutable(); + auto* out_t = out->GetMutable(); + auto place = ctx.GetPlace(); + + int dim0 = 2; + int dim1 = 3; + TensorFromVector(std::vector({0, 1, 2, 3, 4, 5}), ctx, x_t); + ctx.Wait(); + x_t->Resize({dim0, dim1}); + out_t->Resize({dim0, dim1}); + ctx.Wait(); + out_t->mutable_data(place); + ctx.Wait(); + f::AttributeMap attrs = { + {"axis", std::vector({1, 0})}, + {"data_format", std::string("AnyLayout")} + }; + auto op = f::OpRegistry::CreateOp("transpose", {{"X", {"X"}}}, + {{"Out", {"Out"}}}, attrs); + ctx.Wait(); + op->Run(*scope, place); + ctx.Wait(); + std::vector out_v; + TensorToVector(*out_t, ctx, &out_v); + ctx.Wait(); + + EXPECT_EQ(out_t->numel(), dim0 * dim1); + EXPECT_EQ(out_v[0], 0); + EXPECT_EQ(out_v[1], 3); + EXPECT_EQ(out_v[2], 1); + EXPECT_EQ(out_v[3], 4); + EXPECT_EQ(out_v[4], 2); + EXPECT_EQ(out_v[5], 5); +} + + +template +void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { + // init + auto x = scope->Var("X"); + auto x_grad = scope->Var("X@GRAD"); + auto out = scope->Var("Out"); + auto out_grad = scope->Var("Out@GRAD"); + + auto* x_grad_t = x_grad->GetMutable(); + auto* x_t = x->GetMutable(); + auto* out_grad_t = out_grad->GetMutable(); + auto* out_t = out->GetMutable(); + int dim0 = 2; + int dim1 = 3; + auto place = ctx.GetPlace(); + + TensorFromVector(std::vector({0, 1, 2, 3, 4, 5}), ctx, out_grad_t); + TensorFromVector(std::vector({0, 1, 2, 3, 4, 5}), ctx, x_t); + ctx.Wait(); + x_grad_t->Resize({dim0, dim1}); + x_t->Resize({dim0, dim1}); + out_grad_t->Resize({dim0, dim1}); + out_t->Resize({dim0, dim1}); + + x_grad_t->mutable_data(place); + out_t->mutable_data(place); + ctx.Wait(); + f::AttributeMap attrs = { + {"axis", std::vector({1, 0})}, + {"data_format", std::string("AnyLayout")} + }; + auto op = f::OpRegistry::CreateOp( + "transpose_grad", + {{"Out@GRAD", {"Out@GRAD"}}, {"X", {"X"}}, {"Out", {"Out"}}}, + {{"X@GRAD", {"X@GRAD"}}}, attrs); + op->Run(*scope, place); + ctx.Wait(); + std::vector out_v; + TensorToVector(*x_grad_t, ctx, &out_v); + ctx.Wait(); + + EXPECT_EQ(x_grad_t->numel(), dim0 * dim1); + EXPECT_EQ(out_v[0], 0); + EXPECT_EQ(out_v[1], 3); + EXPECT_EQ(out_v[2], 1); + EXPECT_EQ(out_v[3], 4); + EXPECT_EQ(out_v[4], 2); + EXPECT_EQ(out_v[5], 5); + +} + + +TEST(transpose, NPU_fp32) { + f::Scope scope; + p::NPUDeviceContext ctx(p::NPUPlace(0)); + Compare(&scope, ctx); +} + +TEST(transpose_grad, NPU_fp32) { + f::Scope scope; + p::NPUDeviceContext ctx(p::NPUPlace(0)); + CompareGrad(&scope, ctx); +} + diff --git a/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py new file mode 100644 index 00000000000..797531a6c0f --- /dev/null +++ b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py @@ -0,0 +1,74 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import unittest +import sys +sys.path.append("..") +from op_test import OpTest, _set_use_system_allocator +import paddle +import paddle.fluid as fluid + +paddle.enable_static() + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestTransposeOp(OpTest): + def setUp(self): + self.set_npu() + self.op_type = "transpose" + self.place = paddle.NPUPlace(0) + self.init_dtype() + self.init_input_output() + self.init_kernel_type() + self.init_axis() + + self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(self.x)} + self.attrs = {'axis': [0, 2, 1, 3], 'data_format': 'AnyLayout'} + self.outputs = {'Out': self.out} + + def set_npu(self): + self.__class__.use_npu = True + + def init_kernel_type(self): + self.use_mkldnn = False + + def init_input_output(self): + self.x = np.random.uniform(0.1, 1, [8, 512, 12, 64]).astype(self.dtype) + self.out = np.transpose(self.x, [0, 2, 1, 3]) + + def init_dtype(self): + self.dtype = np.float32 + + def init_axis(self): + self.axis = -1 + + def test_check_output(self): + self.check_output_with_place(self.place, check_dygraph=False) + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestTransposeOpFP16(TestTransposeOp): + no_need_check_grad = True + + def init_dtype(self): + self.dtype = np.float16 + + +if __name__ == '__main__': + unittest.main() -- GitLab