diff --git a/paddle/fluid/operators/reduce_ops/reduce_prod_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_prod_op_npu.cc new file mode 100644 index 0000000000000000000000000000000000000000..834b63f199e37dcfe06560da37506b8978ca2249 --- /dev/null +++ b/paddle/fluid/operators/reduce_ops/reduce_prod_op_npu.cc @@ -0,0 +1,101 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the Licnse. */ + +#include "paddle/fluid/operators/reduce_ops/reduce_prod_op.h" +#include "paddle/fluid/operators/npu_op_runner.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +template +class ReduceProdNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); + auto dims = ctx.Attr>("dim"); + bool keep_dim = ctx.Attr("keep_dim"); + bool reduce_all = ctx.Attr("reduce_all"); + int out_dtype = ctx.Attr("out_dtype"); + + auto place = ctx.GetPlace(); + + framework::Tensor cast_out(x->type()); + cast_out.Resize(out->dims()); + cast_out.mutable_data(place); + + auto cast_out_dtype = x->type(); + if (out_dtype != -1) { + cast_out_dtype = static_cast(out_dtype); + } + + if (x->type() != cast_out_dtype) { + if (cast_out_dtype == framework::proto::VarType::FP32) { + out->mutable_data(place); + } else if (cast_out_dtype == framework::proto::VarType::FP16) { + out->mutable_data(place); + } else if (cast_out_dtype == framework::proto::VarType::INT16) { + out->mutable_data(place); + } else if (cast_out_dtype == framework::proto::VarType::INT32) { + out->mutable_data(place); + } else if (cast_out_dtype == framework::proto::VarType::INT64) { + out->mutable_data(place); + } else if (cast_out_dtype == framework::proto::VarType::FP64) { + out->mutable_data(place); + } else if (cast_out_dtype == framework::proto::VarType::BOOL) { + out->mutable_data(place); + } + } else { + out->ShareDataWith(cast_out); + } + + framework::NPUAttributeMap attr_input = {{"axes", dims}, + {"keep_dims", keep_dim}}; + + if (reduce_all) { + std::vector dim_vec; + for (int i = 0; i < x->dims().size(); i++) { + dim_vec.push_back(i); + } + + attr_input = {{"axes", dim_vec}, {"keep_dims", keep_dim}}; + } + + auto stream = + ctx.template device_context() + .stream(); + + const auto& runner = + NpuOpRunner("ReduceProdD", {*x}, {cast_out}, attr_input); + runner.Run(stream); + + if (x->type() != cast_out_dtype) { + auto dst_dtype = ConvertToNpuDtype(cast_out_dtype); + const auto& runner_cast = + NpuOpRunner("Cast", {cast_out}, {*out}, + {{"dst_type", static_cast(dst_dtype)}}); + runner_cast.Run(stream); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; +REGISTER_OP_NPU_KERNEL( + reduce_prod, ops::ReduceProdNPUKernel, + ops::ReduceProdNPUKernel); diff --git a/python/paddle/fluid/tests/unittests/npu/test_reduce_max_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_reduce_max_op_npu.py index a5d078ced2876757f3258f8a6aea5cf4bd0155da..f6c346159b8bee25e242c128412a0a36c78f4f1f 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_reduce_max_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_reduce_max_op_npu.py @@ -127,8 +127,6 @@ class TestReduceMaxOpWithOutDtype_int16(TestNPUReduceMaxOp): 'out_dtype': int(core.VarDesc.VarType.INT16) } - self.out = self.inputs['X'].max(axis=tuple(self.attrs['dim'])) - self.outputs = { 'Out': self.inputs['X'].max(axis=tuple(self.attrs['dim'])).astype(np.int16) @@ -195,9 +193,6 @@ class TestReduceMaxOpWithOutDtype_fp16(TestNPUReduceMaxOp): 'dim': [-2, -1], 'out_dtype': int(core.VarDesc.VarType.FP16) } - - self.out = self.inputs['X'].max(axis=tuple(self.attrs['dim'])) - self.outputs = { 'Out': self.inputs['X'].max( axis=tuple(self.attrs['dim'])).astype(np.float16) diff --git a/python/paddle/fluid/tests/unittests/npu/test_reduce_prod_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_reduce_prod_op_npu.py new file mode 100644 index 0000000000000000000000000000000000000000..59f181be5edacb3d609f6ab827439c4c48860220 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/npu/test_reduce_prod_op_npu.py @@ -0,0 +1,235 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from paddle.fluid.tests.unittests.op_test import OpTest, skip_check_grad_ci +import paddle +import paddle.fluid.core as core +import paddle.fluid as fluid +from paddle.fluid import compiler, Program, program_guard +from paddle.fluid.framework import convert_np_dtype_to_dtype_ + +paddle.enable_static() + + +class TestNPUReduceProd(OpTest): + def setUp(self): + self.op_type = "reduce_prod" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = {'dim': [0]} + self.outputs = { + 'Out': self.inputs['X'].prod(axis=tuple(self.attrs['dim'])) + } + + def test_check_output(self): + self.check_output_with_place(self.place) + + def set_npu(self): + self.__class__.use_npu = True + self.place = paddle.NPUPlace(0) + + def init_dtype(self): + self.dtype = np.float32 + + +class TestNPUReduceProd2(TestNPUReduceProd): + def setUp(self): + self.op_type = "reduce_prod" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = {} # default 'dim': [0] + self.outputs = {'Out': self.inputs['X'].prod(axis=tuple([0]))} + + +class TestNPUReduceProd3(TestNPUReduceProd): + def setUp(self): + self.op_type = "reduce_prod" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + # self.attrs = {'dim': [0]} + self.outputs = {'Out': self.inputs['X'].prod(axis=tuple([0]))} + + +class TestNPUReduceProd6D(TestNPUReduceProd): + def setUp(self): + self.op_type = "reduce_prod" + self.set_npu() + self.init_dtype() + + self.inputs = { + 'X': np.random.random((5, 6, 2, 3, 4, 2)).astype(self.dtype) + } + self.attrs = {'dim': [2, 3, 4]} + self.outputs = { + 'Out': self.inputs['X'].prod(axis=tuple(self.attrs['dim'])) + } + + +class TestNPUReduceProd8D(TestNPUReduceProd): + def setUp(self): + self.op_type = "reduce_prod" + self.set_npu() + self.init_dtype() + + self.inputs = { + 'X': np.random.random((2, 5, 3, 2, 2, 3, 4, 2)).astype(self.dtype) + } + self.attrs = {'dim': [2, 3, 4]} + self.outputs = { + 'Out': self.inputs['X'].prod(axis=tuple(self.attrs['dim'])) + } + + +class TestReduceAll(TestNPUReduceProd): + def setUp(self): + self.op_type = "reduce_prod" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = {'reduce_all': True} + self.outputs = {'Out': self.inputs['X'].prod()} + + +class TestNPUReduceProdWithOutDtype_bool(TestNPUReduceProd): + def setUp(self): + self.op_type = "reduce_prod" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = {'dim': [0], 'out_dtype': int(core.VarDesc.VarType.BOOL)} + self.outputs = { + 'Out': + self.inputs['X'].prod(axis=tuple(self.attrs['dim'])).astype(np.bool) + } + + +class TestNPUReduceProdWithOutDtype_int16(TestNPUReduceProd): + def setUp(self): + self.op_type = "reduce_prod" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = {'dim': [0], 'out_dtype': int(core.VarDesc.VarType.INT16)} + self.outputs = { + 'Out': self.inputs['X'].prod( + axis=tuple(self.attrs['dim'])).astype(np.int16) + } + + +class TestNPUReduceProdWithOutDtype_int32(TestNPUReduceProd): + def setUp(self): + self.op_type = "reduce_prod" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = {'dim': [0], 'out_dtype': int(core.VarDesc.VarType.INT32)} + self.outputs = { + 'Out': self.inputs['X'].prod( + axis=tuple(self.attrs['dim'])).astype(np.int32) + } + + +class TestNPUReduceProdWithOutDtype_int64(TestNPUReduceProd): + def setUp(self): + self.op_type = "reduce_prod" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = {'dim': [0], 'out_dtype': int(core.VarDesc.VarType.INT64)} + self.outputs = { + 'Out': self.inputs['X'].prod( + axis=tuple(self.attrs['dim'])).astype(np.int64) + } + + +class TestNPUReduceProdWithOutDtype_fp16(TestNPUReduceProd): + def setUp(self): + self.op_type = "reduce_prod" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = {'dim': [0], 'out_dtype': int(core.VarDesc.VarType.FP16)} + self.outputs = { + 'Out': self.inputs['X'].prod( + axis=tuple(self.attrs['dim'])).astype(np.float16) + } + + def test_check_output(self): + self.check_output_with_place(self.place, atol=1e-3) + + +class TestNPUReduceProdWithOutDtype_fp32(TestNPUReduceProd): + def setUp(self): + self.op_type = "reduce_prod" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = {'dim': [0], 'out_dtype': int(core.VarDesc.VarType.FP32)} + self.outputs = { + 'Out': self.inputs['X'].prod( + axis=tuple(self.attrs['dim'])).astype(np.float32) + } + + +class TestNPUReduceProdWithOutDtype_fp64(TestNPUReduceProd): + def setUp(self): + self.op_type = "reduce_prod" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = {'dim': [0], 'out_dtype': int(core.VarDesc.VarType.FP64)} + self.outputs = { + 'Out': self.inputs['X'].prod( + axis=tuple(self.attrs['dim'])).astype(np.float64) + } + + +@skip_check_grad_ci(reason="right now not implement grad op") +class TestNPUReduceProdWithOutDtype_fp32_2(TestNPUReduceProd): + def setUp(self): + self.op_type = "reduce_prod" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = {'dim': [0], 'out_dtype': int(core.VarDesc.VarType.FP32)} + self.outputs = { + 'Out': self.inputs['X'].prod( + axis=tuple(self.attrs['dim'])).astype(np.float32) + } + + def init_dtype(self): + self.dtype = np.float16 + + +if __name__ == '__main__': + unittest.main()