From 149f76e636bb0fce69ab1c17b874f8badb32cfc3 Mon Sep 17 00:00:00 2001 From: liym27 <33742067+liym27@users.noreply.github.com> Date: Wed, 24 Mar 2021 15:11:23 +0800 Subject: [PATCH] [NPU] Support npu kernel for op elementwise_floordiv (#31822) --- .../elementwise/elementwise_div_op_npu.cc | 37 +++++----- .../elementwise_floordiv_op_npu.cc | 52 ++++++++++++++ .../npu/test_elementwise_floordiv_op_npu.py | 67 +++++++++++++++++++ 3 files changed, 136 insertions(+), 20 deletions(-) create mode 100644 paddle/fluid/operators/elementwise/elementwise_floordiv_op_npu.cc create mode 100644 python/paddle/fluid/tests/unittests/npu/test_elementwise_floordiv_op_npu.py diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc index db44df1e6fd..8852f3a419a 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef PADDLE_WITH_ASCEND_CL #include #include @@ -61,13 +60,13 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel { auto place = ctx.GetPlace(); auto stream = - ctx.template device_context() - .stream(); + ctx.template device_context() + .stream(); Tensor y_power(y->type()); y_power.mutable_data(y->dims(), place); - auto y_power_runner = NpuOpRunner("Power", {*y}, - {y_power}, {{"power", static_cast(-1)}}); + auto y_power_runner = NpuOpRunner("Power", {*y}, {y_power}, + {{"power", static_cast(-1)}}); y_power_runner.Run(stream); if (dx) { @@ -75,32 +74,33 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel { Tensor tensor_zeros(x->type()); tensor_zeros.mutable_data(x->dims(), place); - auto tensor_zeros_runner = NpuOpRunner("ZerosLike", {*x}, - {tensor_zeros}, {}); + auto tensor_zeros_runner = + NpuOpRunner("ZerosLike", {*x}, {tensor_zeros}, {}); tensor_zeros_runner.Run(stream); Tensor x_zero(paddle::framework::proto::VarType::BOOL); x_zero.mutable_data(x->dims(), place); - auto x_zero_runner = NpuOpRunner("Equal", {*x, tensor_zeros}, - {x_zero}, {}); + auto x_zero_runner = + NpuOpRunner("Equal", {*x, tensor_zeros}, {x_zero}, {}); x_zero_runner.Run(stream); Tensor x_nozero(paddle::framework::proto::VarType::BOOL); x_nozero.mutable_data(x->dims(), place); - auto x_nozero_runner = NpuOpRunner("LogicalNot", {x_zero}, - {x_nozero}, {}); + auto x_nozero_runner = + NpuOpRunner("LogicalNot", {x_zero}, {x_nozero}, {}); x_nozero_runner.Run(stream); Tensor x_nozero_f(x->type()); x_nozero_f.mutable_data(x->dims(), place); - auto x_nozero_f_runner = NpuOpRunner("Cast", {x_nozero}, - {x_nozero_f}, {{"dst_type", static_cast(0)}}); + auto x_nozero_f_runner = + NpuOpRunner("Cast", {x_nozero}, {x_nozero_f}, + {{"dst_type", static_cast(0)}}); x_nozero_f_runner.Run(stream); Tensor x_grad_w(x->type()); x_grad_w.mutable_data(x->dims(), place); - auto x_grad_w_runner = NpuOpRunner("Mul", {x_nozero_f, y_power}, - {x_grad_w}, {}); + auto x_grad_w_runner = + NpuOpRunner("Mul", {x_nozero_f, y_power}, {x_grad_w}, {}); x_grad_w_runner.Run(stream); auto x_grad_runner = NpuOpRunner("Mul", {x_grad_w, *dout}, {*dx}, {}); @@ -112,14 +112,12 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel { Tensor neg_out(y->type()); neg_out.mutable_data(y->dims(), place); - auto neg_out_runner = NpuOpRunner("Neg", {*out}, - {neg_out}, {}); + auto neg_out_runner = NpuOpRunner("Neg", {*out}, {neg_out}, {}); neg_out_runner.Run(stream); Tensor y_grad_w(y->type()); y_grad_w.mutable_data(y->dims(), place); - auto y_grad_w_runner = NpuOpRunner("Div", {neg_out, *y}, - {y_grad_w}, {}); + auto y_grad_w_runner = NpuOpRunner("Div", {neg_out, *y}, {y_grad_w}, {}); y_grad_w_runner.Run(stream); auto y_grad_runner = NpuOpRunner("Mul", {y_grad_w, *dout}, {*dy}, {}); @@ -143,4 +141,3 @@ REGISTER_OP_NPU_KERNEL( ops::ElementwiseDivGradNPUKernel, ops::ElementwiseDivGradNPUKernel); -#endif diff --git a/paddle/fluid/operators/elementwise/elementwise_floordiv_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_floordiv_op_npu.cc new file mode 100644 index 00000000000..da011611474 --- /dev/null +++ b/paddle/fluid/operators/elementwise/elementwise_floordiv_op_npu.cc @@ -0,0 +1,52 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include "paddle/fluid/operators/elementwise/elementwise_div_op.h" +#include "paddle/fluid/operators/npu_op_runner.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class ElementwiseFloorDivNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* out = ctx.Output("Out"); + + out->mutable_data(ctx.GetPlace()); + + auto stream = + ctx.template device_context() + .stream(); + + auto runner = NpuOpRunner("FloorDiv", {*x, *y}, {*out}, {}); + runner.Run(stream); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_NPU_KERNEL(elementwise_floordiv, + ops::ElementwiseFloorDivNPUKernel, + ops::ElementwiseFloorDivNPUKernel); diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_floordiv_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_floordiv_op_npu.py new file mode 100644 index 00000000000..93538e93867 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_floordiv_op_npu.py @@ -0,0 +1,67 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import unittest +import sys +sys.path.append("..") +from op_test import OpTest +import paddle + +paddle.enable_static() + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestElementwiseFloorDiv(OpTest): + def setUp(self): + self.op_type = "elementwise_floordiv" + self.set_npu() + self.init_dtype() + self.init_input_output() + + self.inputs = { + 'X': OpTest.np_dtype_to_fluid_dtype(self.x), + 'Y': OpTest.np_dtype_to_fluid_dtype(self.y) + } + self.attrs = {} + self.outputs = {'Out': self.out} + + def set_npu(self): + self.__class__.use_npu = True + self.place = paddle.NPUPlace(0) + + def init_input_output(self): + self.x = np.random.uniform(1, 1000, [10, 10]).astype(self.dtype) + self.y = np.random.uniform(1, 1000, [10, 10]).astype(self.dtype) + self.out = np.floor_divide(self.x, self.y) + + def init_dtype(self): + self.dtype = "int64" + + def test_check_output(self): + self.check_output_with_place(self.place, check_dygraph=False) + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestElementwiseFloorDiv2(TestElementwiseFloorDiv): + def init_dtype(self): + self.dtype = "int32" + + +if __name__ == '__main__': + unittest.main() -- GitLab