From 78836bb726931c8155ad7939a81f0ff63b329510 Mon Sep 17 00:00:00 2001 From: houj04 <35131887+houj04@users.noreply.github.com> Date: Tue, 28 Dec 2021 13:49:12 +0800 Subject: [PATCH] add reduce_prod_xpu. fix reduce_mean_xpu bug. (#38481) * add reduce_prod_xpu. fix reduce_mean_xpu bug. * iadd reduce_prod_xpu. fix reduce_mean_xpu bug. test=kunlun --- .../reduce_ops/reduce_mean_op_xpu.cc | 18 +- .../reduce_ops/reduce_prod_op_xpu.cc | 78 +++++++++ .../fluid/platform/device/xpu/xpu1_op_list.h | 1 + .../fluid/platform/device/xpu/xpu2_op_list.h | 1 + .../unittests/xpu/test_reduce_mean_op_xpu.py | 28 ++-- .../unittests/xpu/test_reduce_prod_op_xpu.py | 155 ++++++++++++++++++ 6 files changed, 263 insertions(+), 18 deletions(-) create mode 100644 paddle/fluid/operators/reduce_ops/reduce_prod_op_xpu.cc create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_reduce_prod_op_xpu.py diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op_xpu.cc b/paddle/fluid/operators/reduce_ops/reduce_mean_op_xpu.cc index d6c1dc5f02d..6f64a055d34 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op_xpu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op_xpu.cc @@ -14,11 +14,12 @@ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/reduce_ops/reduce_mean_op.h" #include #include #include +#include "paddle/fluid/operators/reduce_ops/reduce_mean_op.h" + namespace paddle { namespace operators { template @@ -41,15 +42,24 @@ class ReduceMeanXPUKernel : public framework::OpKernel { xdims.push_back(input->dims()[i]); } auto rdims = context.Attr>("dim"); + const auto& input_dim_size = input->dims().size(); + std::vector reduce_dims; if (reduce_all) { - rdims.clear(); for (size_t i = 0; i < xdims.size(); i++) { - rdims.push_back(static_cast(i)); + reduce_dims.push_back(static_cast(i)); + } + } else { + for (size_t i = 0; i < rdims.size(); ++i) { + if (rdims[i] < 0) { + reduce_dims.push_back(rdims[i] + input_dim_size); + } else { + reduce_dims.push_back(rdims[i]); + } } } int r = xpu::reduce_mean( dev_ctx.x_context(), reinterpret_cast(input->data()), - reinterpret_cast(output->data()), xdims, rdims); + reinterpret_cast(output->data()), xdims, reduce_dims); PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, platform::errors::External( diff --git a/paddle/fluid/operators/reduce_ops/reduce_prod_op_xpu.cc b/paddle/fluid/operators/reduce_ops/reduce_prod_op_xpu.cc new file mode 100644 index 00000000000..ae7e1317323 --- /dev/null +++ b/paddle/fluid/operators/reduce_ops/reduce_prod_op_xpu.cc @@ -0,0 +1,78 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifdef PADDLE_WITH_XPU + +#include +#include + +#include "paddle/fluid/operators/reduce_ops/reduce_prod_op.h" + +namespace paddle { +namespace operators { +template +class ReduceProdXPUKernel : public framework::OpKernel { + using XPUType = typename XPUTypeTrait::Type; + + public: + void Compute(const framework::ExecutionContext& context) const override { + PADDLE_ENFORCE_EQ( + platform::is_xpu_place(context.GetPlace()), true, + platform::errors::Unavailable("This kernel only runs on XPU.")); + bool reduce_all = context.Attr("reduce_all"); + auto* input = context.Input("X"); + auto* output = context.Output("Out"); + output->mutable_data(context.GetPlace()); + auto& dev_ctx = context.template device_context(); + + std::vector xdims; + for (int i = 0; i < input->dims().size(); i++) { + xdims.push_back(input->dims()[i]); + } + auto rdims = context.Attr>("dim"); + const auto& input_dim_size = input->dims().size(); + + std::vector reduce_dims; + if (reduce_all) { + for (size_t i = 0; i < xdims.size(); i++) { + reduce_dims.push_back(static_cast(i)); + } + } else { + for (size_t i = 0; i < rdims.size(); ++i) { + if (rdims[i] < 0) { + reduce_dims.push_back(rdims[i] + input_dim_size); + } else { + reduce_dims.push_back(rdims[i]); + } + } + } + int r = xpu::reduce_prod( + dev_ctx.x_context(), reinterpret_cast(input->data()), + reinterpret_cast(output->data()), xdims, reduce_dims); + + PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, + platform::errors::External( + "XPU reduce_prod kernel return wrong value[%d %s]", r, + XPUAPIErrorMsg[r])); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP_XPU_KERNEL( + reduce_prod, + ops::ReduceProdXPUKernel); + +#endif diff --git a/paddle/fluid/platform/device/xpu/xpu1_op_list.h b/paddle/fluid/platform/device/xpu/xpu1_op_list.h index d4fd42d7a97..b2114afee63 100644 --- a/paddle/fluid/platform/device/xpu/xpu1_op_list.h +++ b/paddle/fluid/platform/device/xpu/xpu1_op_list.h @@ -219,6 +219,7 @@ XPUOpMap& get_kl1_ops() { {"reduce_mean", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"reduce_mean_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_prod", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"reduce_sum_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"reduce_sum", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, diff --git a/paddle/fluid/platform/device/xpu/xpu2_op_list.h b/paddle/fluid/platform/device/xpu/xpu2_op_list.h index 255c818b98c..115250b3db7 100644 --- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h +++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h @@ -242,6 +242,7 @@ XPUOpMap& get_kl2_ops() { {"reduce_mean_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"reduce_mean", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_prod", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"reduce_sum_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"reduce_sum", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_mean_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_mean_op_xpu.py index 44c356ca65f..5e866dddbe2 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_mean_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_mean_op_xpu.py @@ -29,7 +29,7 @@ from paddle.fluid.framework import convert_np_dtype_to_dtype_ class TestMeanOp(OpTest): def setUp(self): self.op_type = "reduce_mean" - self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")} + self.inputs = {'X': np.random.random((5, 6, 10)).astype("float32")} self.attrs = {'use_xpu': True} self.outputs = {'Out': self.inputs['X'].mean(axis=0)} @@ -46,7 +46,7 @@ class TestMeanOp5D(OpTest): def setUp(self): self.op_type = "reduce_mean" self.inputs = { - 'X': np.random.random((1, 2, 5, 6, 10)).astype("float64") + 'X': np.random.random((1, 2, 5, 6, 10)).astype("float32") } self.attrs = {'use_xpu': True} self.outputs = {'Out': self.inputs['X'].mean(axis=0)} @@ -64,7 +64,7 @@ class TestMeanOp6D(OpTest): def setUp(self): self.op_type = "reduce_mean" self.inputs = { - 'X': np.random.random((1, 1, 2, 5, 6, 10)).astype("float64") + 'X': np.random.random((1, 1, 2, 5, 6, 10)).astype("float32") } self.attrs = {'use_xpu': True} self.outputs = {'Out': self.inputs['X'].mean(axis=0)} @@ -82,7 +82,7 @@ class TestMeanOp8D(OpTest): def setUp(self): self.op_type = "reduce_mean" self.inputs = { - 'X': np.random.random((1, 3, 1, 2, 1, 4, 3, 10)).astype("float64") + 'X': np.random.random((1, 3, 1, 2, 1, 4, 3, 10)).astype("float32") } self.attrs = {'dim': (0, 3), 'use_xpu': True} self.outputs = {'Out': self.inputs['X'].mean(axis=(0, 3))} @@ -99,7 +99,7 @@ class TestMeanOp8D(OpTest): class Test1DReduce(OpTest): def setUp(self): self.op_type = "reduce_mean" - self.inputs = {'X': np.random.random(120).astype("float64")} + self.inputs = {'X': np.random.random(120).astype("float32")} self.attrs = {'use_xpu': True} self.outputs = {'Out': self.inputs['X'].mean(axis=0)} @@ -116,7 +116,7 @@ class Test2DReduce0(Test1DReduce): def setUp(self): self.op_type = "reduce_mean" self.attrs = {'dim': [0], 'use_xpu': True} - self.inputs = {'X': np.random.random((20, 10)).astype("float64")} + self.inputs = {'X': np.random.random((20, 10)).astype("float32")} self.outputs = {'Out': self.inputs['X'].mean(axis=0)} @@ -124,7 +124,7 @@ class Test2DReduce1(Test1DReduce): def setUp(self): self.op_type = "reduce_mean" self.attrs = {'dim': [1], 'use_xpu': True} - self.inputs = {'X': np.random.random((20, 10)).astype("float64")} + self.inputs = {'X': np.random.random((20, 10)).astype("float32")} self.outputs = { 'Out': self.inputs['X'].mean(axis=tuple(self.attrs['dim'])) } @@ -134,7 +134,7 @@ class Test3DReduce0(Test1DReduce): def setUp(self): self.op_type = "reduce_mean" self.attrs = {'dim': [1], 'use_xpu': True} - self.inputs = {'X': np.random.random((5, 6, 7)).astype("float64")} + self.inputs = {'X': np.random.random((5, 6, 7)).astype("float32")} self.outputs = { 'Out': self.inputs['X'].mean(axis=tuple(self.attrs['dim'])) } @@ -144,7 +144,7 @@ class Test3DReduce1(Test1DReduce): def setUp(self): self.op_type = "reduce_mean" self.attrs = {'dim': [2], 'use_xpu': True} - self.inputs = {'X': np.random.random((5, 6, 7)).astype("float64")} + self.inputs = {'X': np.random.random((5, 6, 7)).astype("float32")} self.outputs = { 'Out': self.inputs['X'].mean(axis=tuple(self.attrs['dim'])) } @@ -154,7 +154,7 @@ class Test3DReduce2(Test1DReduce): def setUp(self): self.op_type = "reduce_mean" self.attrs = {'dim': [-2], 'use_xpu': True} - self.inputs = {'X': np.random.random((5, 6, 7)).astype("float64")} + self.inputs = {'X': np.random.random((5, 6, 7)).astype("float32")} self.outputs = { 'Out': self.inputs['X'].mean(axis=tuple(self.attrs['dim'])) } @@ -164,7 +164,7 @@ class Test3DReduce3(Test1DReduce): def setUp(self): self.op_type = "reduce_mean" self.attrs = {'dim': [1, 2], 'use_xpu': True} - self.inputs = {'X': np.random.random((5, 6, 7)).astype("float64")} + self.inputs = {'X': np.random.random((5, 6, 7)).astype("float32")} self.outputs = { 'Out': self.inputs['X'].mean(axis=tuple(self.attrs['dim'])) } @@ -173,7 +173,7 @@ class Test3DReduce3(Test1DReduce): class TestKeepDimReduce(Test1DReduce): def setUp(self): self.op_type = "reduce_mean" - self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")} + self.inputs = {'X': np.random.random((5, 6, 10)).astype("float32")} self.attrs = {'dim': [1], 'keep_dim': True, 'use_xpu': True} self.outputs = { 'Out': self.inputs['X'].mean( @@ -185,7 +185,7 @@ class TestKeepDim8DReduce(Test1DReduce): def setUp(self): self.op_type = "reduce_mean" self.inputs = { - 'X': np.random.random((2, 5, 3, 2, 2, 3, 4, 2)).astype("float64") + 'X': np.random.random((2, 5, 3, 2, 2, 3, 4, 2)).astype("float32") } self.attrs = {'dim': (3, 4, 5), 'keep_dim': True, 'use_xpu': True} self.outputs = { @@ -197,7 +197,7 @@ class TestKeepDim8DReduce(Test1DReduce): class TestReduceAll(Test1DReduce): def setUp(self): self.op_type = "reduce_mean" - self.inputs = {'X': np.random.random((5, 6, 2, 10)).astype("float64")} + self.inputs = {'X': np.random.random((5, 6, 2, 10)).astype("float32")} self.attrs = {'reduce_all': True, 'use_xpu': True} self.outputs = {'Out': self.inputs['X'].mean()} diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_prod_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_prod_op_xpu.py new file mode 100644 index 00000000000..44686ae418b --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_prod_op_xpu.py @@ -0,0 +1,155 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import sys +sys.path.append("..") +from op_test_xpu import OpTest, XPUOpTest +from op_test import skip_check_grad_ci +import paddle +import paddle.fluid.core as core +import paddle.fluid as fluid +from paddle.fluid import compiler, Program, program_guard +from paddle.fluid.framework import convert_np_dtype_to_dtype_ + + +class TestXPUReduceProdOp(XPUOpTest): + def setUp(self): + self.init_op_type() + self.initTestCase() + self.use_xpu = True + self.use_mkldnn = False + self.attrs = { + 'dim': self.axis, + 'keep_dim': self.keep_dim, + 'reduce_all': self.reduce_all + } + self.inputs = {'X': np.random.random(self.shape).astype("float32")} + if self.attrs['reduce_all']: + self.outputs = {'Out': self.inputs['X'].prod()} + else: + self.outputs = { + 'Out': self.inputs['X'].prod( + axis=self.axis, keepdims=self.attrs['keep_dim']) + } + + def test_check_output(self): + if paddle.is_compiled_with_xpu(): + paddle.enable_static() + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + def test_check_grad(self): + if paddle.is_compiled_with_xpu(): + paddle.enable_static() + place = paddle.XPUPlace(0) + self.check_grad_with_place(place, ['X'], 'Out') + + def init_op_type(self): + self.op_type = "reduce_prod" + self.use_mkldnn = False + self.keep_dim = False + self.reduce_all = False + + def initTestCase(self): + self.shape = (5, 6, 10) + self.axis = (0, ) + + +class TestProdOp5D(TestXPUReduceProdOp): + def initTestCase(self): + self.shape = (1, 2, 5, 6, 10) + self.axis = (0, ) + + +class TestProdOp6D(TestXPUReduceProdOp): + def initTestCase(self): + self.shape = (1, 1, 2, 5, 6, 10) + self.axis = (0, ) + + +class TestProdOp8D(TestXPUReduceProdOp): + def initTestCase(self): + self.shape = (1, 3, 1, 2, 1, 4, 3, 10) + self.axis = (0, 3) + + +class Test1DReduce(TestXPUReduceProdOp): + def initTestCase(self): + self.shape = 120 + self.axis = (0, ) + + +class Test2DReduce0(TestXPUReduceProdOp): + def initTestCase(self): + self.shape = (20, 10) + self.axis = (0, ) + + +class Test2DReduce1(TestXPUReduceProdOp): + def initTestCase(self): + self.shape = (20, 10) + self.axis = (1, ) + + +class Test3DReduce0(TestXPUReduceProdOp): + def initTestCase(self): + self.shape = (5, 6, 7) + self.axis = (1, ) + + +class Test3DReduce1(TestXPUReduceProdOp): + def initTestCase(self): + self.shape = (5, 6, 7) + self.axis = (2, ) + + +class Test3DReduce2(TestXPUReduceProdOp): + def initTestCase(self): + self.shape = (5, 6, 7) + self.axis = (-2, ) + + +class Test3DReduce3(TestXPUReduceProdOp): + def initTestCase(self): + self.shape = (5, 6, 7) + self.axis = (1, 2) + + +class TestKeepDimReduce(TestXPUReduceProdOp): + def initTestCase(self): + self.shape = (5, 6, 10) + self.axis = (1, ) + self.keep_dim = True + + +class TestKeepDim8DReduce(TestXPUReduceProdOp): + def initTestCase(self): + self.shape = (2, 5, 3, 2, 2, 3, 4, 2) + self.axis = (3, 4, 5) + self.keep_dim = True + + +class TestReduceAll(TestXPUReduceProdOp): + def initTestCase(self): + self.shape = (5, 6, 2, 10) + self.axis = (0, ) + self.reduce_all = True + + +if __name__ == '__main__': + unittest.main() -- GitLab