未验证 提交 78836bb7 编写于 作者: H houj04 提交者: GitHub

add reduce_prod_xpu. fix reduce_mean_xpu bug. (#38481)

* add reduce_prod_xpu. fix reduce_mean_xpu bug.

* iadd reduce_prod_xpu. fix reduce_mean_xpu bug. test=kunlun
上级 404a4a6a
......@@ -14,11 +14,12 @@
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/reduce_ops/reduce_mean_op.h"
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/operators/reduce_ops/reduce_mean_op.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
......@@ -41,15 +42,24 @@ class ReduceMeanXPUKernel : public framework::OpKernel<T> {
xdims.push_back(input->dims()[i]);
}
auto rdims = context.Attr<std::vector<int>>("dim");
const auto& input_dim_size = input->dims().size();
std::vector<int> reduce_dims;
if (reduce_all) {
rdims.clear();
for (size_t i = 0; i < xdims.size(); i++) {
rdims.push_back(static_cast<int>(i));
reduce_dims.push_back(static_cast<int>(i));
}
} else {
for (size_t i = 0; i < rdims.size(); ++i) {
if (rdims[i] < 0) {
reduce_dims.push_back(rdims[i] + input_dim_size);
} else {
reduce_dims.push_back(rdims[i]);
}
}
}
int r = xpu::reduce_mean(
dev_ctx.x_context(), reinterpret_cast<const XPUType*>(input->data<T>()),
reinterpret_cast<XPUType*>(output->data<T>()), xdims, rdims);
reinterpret_cast<XPUType*>(output->data<T>()), xdims, reduce_dims);
PADDLE_ENFORCE_EQ(r, XPU_SUCCESS,
platform::errors::External(
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef PADDLE_WITH_XPU
#include <memory>
#include <vector>
#include "paddle/fluid/operators/reduce_ops/reduce_prod_op.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class ReduceProdXPUKernel : public framework::OpKernel<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
public:
void Compute(const framework::ExecutionContext& context) const override {
PADDLE_ENFORCE_EQ(
platform::is_xpu_place(context.GetPlace()), true,
platform::errors::Unavailable("This kernel only runs on XPU."));
bool reduce_all = context.Attr<bool>("reduce_all");
auto* input = context.Input<Tensor>("X");
auto* output = context.Output<Tensor>("Out");
output->mutable_data<T>(context.GetPlace());
auto& dev_ctx = context.template device_context<DeviceContext>();
std::vector<int> xdims;
for (int i = 0; i < input->dims().size(); i++) {
xdims.push_back(input->dims()[i]);
}
auto rdims = context.Attr<std::vector<int>>("dim");
const auto& input_dim_size = input->dims().size();
std::vector<int> reduce_dims;
if (reduce_all) {
for (size_t i = 0; i < xdims.size(); i++) {
reduce_dims.push_back(static_cast<int>(i));
}
} else {
for (size_t i = 0; i < rdims.size(); ++i) {
if (rdims[i] < 0) {
reduce_dims.push_back(rdims[i] + input_dim_size);
} else {
reduce_dims.push_back(rdims[i]);
}
}
}
int r = xpu::reduce_prod(
dev_ctx.x_context(), reinterpret_cast<const XPUType*>(input->data<T>()),
reinterpret_cast<XPUType*>(output->data<T>()), xdims, reduce_dims);
PADDLE_ENFORCE_EQ(r, XPU_SUCCESS,
platform::errors::External(
"XPU reduce_prod kernel return wrong value[%d %s]", r,
XPUAPIErrorMsg[r]));
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_XPU_KERNEL(
reduce_prod,
ops::ReduceProdXPUKernel<paddle::platform::XPUDeviceContext, float>);
#endif
......@@ -219,6 +219,7 @@ XPUOpMap& get_kl1_ops() {
{"reduce_mean", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"reduce_mean_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"reduce_prod", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"reduce_sum_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"reduce_sum", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
......
......@@ -242,6 +242,7 @@ XPUOpMap& get_kl2_ops() {
{"reduce_mean_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"reduce_mean", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"reduce_prod", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"reduce_sum_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"reduce_sum", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
......
......@@ -29,7 +29,7 @@ from paddle.fluid.framework import convert_np_dtype_to_dtype_
class TestMeanOp(OpTest):
def setUp(self):
self.op_type = "reduce_mean"
self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")}
self.inputs = {'X': np.random.random((5, 6, 10)).astype("float32")}
self.attrs = {'use_xpu': True}
self.outputs = {'Out': self.inputs['X'].mean(axis=0)}
......@@ -46,7 +46,7 @@ class TestMeanOp5D(OpTest):
def setUp(self):
self.op_type = "reduce_mean"
self.inputs = {
'X': np.random.random((1, 2, 5, 6, 10)).astype("float64")
'X': np.random.random((1, 2, 5, 6, 10)).astype("float32")
}
self.attrs = {'use_xpu': True}
self.outputs = {'Out': self.inputs['X'].mean(axis=0)}
......@@ -64,7 +64,7 @@ class TestMeanOp6D(OpTest):
def setUp(self):
self.op_type = "reduce_mean"
self.inputs = {
'X': np.random.random((1, 1, 2, 5, 6, 10)).astype("float64")
'X': np.random.random((1, 1, 2, 5, 6, 10)).astype("float32")
}
self.attrs = {'use_xpu': True}
self.outputs = {'Out': self.inputs['X'].mean(axis=0)}
......@@ -82,7 +82,7 @@ class TestMeanOp8D(OpTest):
def setUp(self):
self.op_type = "reduce_mean"
self.inputs = {
'X': np.random.random((1, 3, 1, 2, 1, 4, 3, 10)).astype("float64")
'X': np.random.random((1, 3, 1, 2, 1, 4, 3, 10)).astype("float32")
}
self.attrs = {'dim': (0, 3), 'use_xpu': True}
self.outputs = {'Out': self.inputs['X'].mean(axis=(0, 3))}
......@@ -99,7 +99,7 @@ class TestMeanOp8D(OpTest):
class Test1DReduce(OpTest):
def setUp(self):
self.op_type = "reduce_mean"
self.inputs = {'X': np.random.random(120).astype("float64")}
self.inputs = {'X': np.random.random(120).astype("float32")}
self.attrs = {'use_xpu': True}
self.outputs = {'Out': self.inputs['X'].mean(axis=0)}
......@@ -116,7 +116,7 @@ class Test2DReduce0(Test1DReduce):
def setUp(self):
self.op_type = "reduce_mean"
self.attrs = {'dim': [0], 'use_xpu': True}
self.inputs = {'X': np.random.random((20, 10)).astype("float64")}
self.inputs = {'X': np.random.random((20, 10)).astype("float32")}
self.outputs = {'Out': self.inputs['X'].mean(axis=0)}
......@@ -124,7 +124,7 @@ class Test2DReduce1(Test1DReduce):
def setUp(self):
self.op_type = "reduce_mean"
self.attrs = {'dim': [1], 'use_xpu': True}
self.inputs = {'X': np.random.random((20, 10)).astype("float64")}
self.inputs = {'X': np.random.random((20, 10)).astype("float32")}
self.outputs = {
'Out': self.inputs['X'].mean(axis=tuple(self.attrs['dim']))
}
......@@ -134,7 +134,7 @@ class Test3DReduce0(Test1DReduce):
def setUp(self):
self.op_type = "reduce_mean"
self.attrs = {'dim': [1], 'use_xpu': True}
self.inputs = {'X': np.random.random((5, 6, 7)).astype("float64")}
self.inputs = {'X': np.random.random((5, 6, 7)).astype("float32")}
self.outputs = {
'Out': self.inputs['X'].mean(axis=tuple(self.attrs['dim']))
}
......@@ -144,7 +144,7 @@ class Test3DReduce1(Test1DReduce):
def setUp(self):
self.op_type = "reduce_mean"
self.attrs = {'dim': [2], 'use_xpu': True}
self.inputs = {'X': np.random.random((5, 6, 7)).astype("float64")}
self.inputs = {'X': np.random.random((5, 6, 7)).astype("float32")}
self.outputs = {
'Out': self.inputs['X'].mean(axis=tuple(self.attrs['dim']))
}
......@@ -154,7 +154,7 @@ class Test3DReduce2(Test1DReduce):
def setUp(self):
self.op_type = "reduce_mean"
self.attrs = {'dim': [-2], 'use_xpu': True}
self.inputs = {'X': np.random.random((5, 6, 7)).astype("float64")}
self.inputs = {'X': np.random.random((5, 6, 7)).astype("float32")}
self.outputs = {
'Out': self.inputs['X'].mean(axis=tuple(self.attrs['dim']))
}
......@@ -164,7 +164,7 @@ class Test3DReduce3(Test1DReduce):
def setUp(self):
self.op_type = "reduce_mean"
self.attrs = {'dim': [1, 2], 'use_xpu': True}
self.inputs = {'X': np.random.random((5, 6, 7)).astype("float64")}
self.inputs = {'X': np.random.random((5, 6, 7)).astype("float32")}
self.outputs = {
'Out': self.inputs['X'].mean(axis=tuple(self.attrs['dim']))
}
......@@ -173,7 +173,7 @@ class Test3DReduce3(Test1DReduce):
class TestKeepDimReduce(Test1DReduce):
def setUp(self):
self.op_type = "reduce_mean"
self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")}
self.inputs = {'X': np.random.random((5, 6, 10)).astype("float32")}
self.attrs = {'dim': [1], 'keep_dim': True, 'use_xpu': True}
self.outputs = {
'Out': self.inputs['X'].mean(
......@@ -185,7 +185,7 @@ class TestKeepDim8DReduce(Test1DReduce):
def setUp(self):
self.op_type = "reduce_mean"
self.inputs = {
'X': np.random.random((2, 5, 3, 2, 2, 3, 4, 2)).astype("float64")
'X': np.random.random((2, 5, 3, 2, 2, 3, 4, 2)).astype("float32")
}
self.attrs = {'dim': (3, 4, 5), 'keep_dim': True, 'use_xpu': True}
self.outputs = {
......@@ -197,7 +197,7 @@ class TestKeepDim8DReduce(Test1DReduce):
class TestReduceAll(Test1DReduce):
def setUp(self):
self.op_type = "reduce_mean"
self.inputs = {'X': np.random.random((5, 6, 2, 10)).astype("float64")}
self.inputs = {'X': np.random.random((5, 6, 2, 10)).astype("float32")}
self.attrs = {'reduce_all': True, 'use_xpu': True}
self.outputs = {'Out': self.inputs['X'].mean()}
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import sys
sys.path.append("..")
from op_test_xpu import OpTest, XPUOpTest
from op_test import skip_check_grad_ci
import paddle
import paddle.fluid.core as core
import paddle.fluid as fluid
from paddle.fluid import compiler, Program, program_guard
from paddle.fluid.framework import convert_np_dtype_to_dtype_
class TestXPUReduceProdOp(XPUOpTest):
def setUp(self):
self.init_op_type()
self.initTestCase()
self.use_xpu = True
self.use_mkldnn = False
self.attrs = {
'dim': self.axis,
'keep_dim': self.keep_dim,
'reduce_all': self.reduce_all
}
self.inputs = {'X': np.random.random(self.shape).astype("float32")}
if self.attrs['reduce_all']:
self.outputs = {'Out': self.inputs['X'].prod()}
else:
self.outputs = {
'Out': self.inputs['X'].prod(
axis=self.axis, keepdims=self.attrs['keep_dim'])
}
def test_check_output(self):
if paddle.is_compiled_with_xpu():
paddle.enable_static()
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
def test_check_grad(self):
if paddle.is_compiled_with_xpu():
paddle.enable_static()
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['X'], 'Out')
def init_op_type(self):
self.op_type = "reduce_prod"
self.use_mkldnn = False
self.keep_dim = False
self.reduce_all = False
def initTestCase(self):
self.shape = (5, 6, 10)
self.axis = (0, )
class TestProdOp5D(TestXPUReduceProdOp):
def initTestCase(self):
self.shape = (1, 2, 5, 6, 10)
self.axis = (0, )
class TestProdOp6D(TestXPUReduceProdOp):
def initTestCase(self):
self.shape = (1, 1, 2, 5, 6, 10)
self.axis = (0, )
class TestProdOp8D(TestXPUReduceProdOp):
def initTestCase(self):
self.shape = (1, 3, 1, 2, 1, 4, 3, 10)
self.axis = (0, 3)
class Test1DReduce(TestXPUReduceProdOp):
def initTestCase(self):
self.shape = 120
self.axis = (0, )
class Test2DReduce0(TestXPUReduceProdOp):
def initTestCase(self):
self.shape = (20, 10)
self.axis = (0, )
class Test2DReduce1(TestXPUReduceProdOp):
def initTestCase(self):
self.shape = (20, 10)
self.axis = (1, )
class Test3DReduce0(TestXPUReduceProdOp):
def initTestCase(self):
self.shape = (5, 6, 7)
self.axis = (1, )
class Test3DReduce1(TestXPUReduceProdOp):
def initTestCase(self):
self.shape = (5, 6, 7)
self.axis = (2, )
class Test3DReduce2(TestXPUReduceProdOp):
def initTestCase(self):
self.shape = (5, 6, 7)
self.axis = (-2, )
class Test3DReduce3(TestXPUReduceProdOp):
def initTestCase(self):
self.shape = (5, 6, 7)
self.axis = (1, 2)
class TestKeepDimReduce(TestXPUReduceProdOp):
def initTestCase(self):
self.shape = (5, 6, 10)
self.axis = (1, )
self.keep_dim = True
class TestKeepDim8DReduce(TestXPUReduceProdOp):
def initTestCase(self):
self.shape = (2, 5, 3, 2, 2, 3, 4, 2)
self.axis = (3, 4, 5)
self.keep_dim = True
class TestReduceAll(TestXPUReduceProdOp):
def initTestCase(self):
self.shape = (5, 6, 2, 10)
self.axis = (0, )
self.reduce_all = True
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册