未验证 提交 22fe4f03 编写于 作者: D dongfangshenzhu 提交者: GitHub

add phi reduce_sum test=kunlun (#46241)

* add phi reduce_sum test=kunlun

* add fhi reduce_sum test=kunlun

* add fhi reduce_sum test=kunlun
上级 da17953a
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef PADDLE_WITH_XPU
#include <memory>
#include <string>
#include "paddle/fluid/operators/reduce_ops/reduce_op_xpu.h"
#include "paddle/fluid/platform/device/xpu/xpu_header.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class ReduceSumXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
XPUReduce<DeviceContext, T>(context, xpu::reduce_sum<T>);
}
};
template <typename DeviceContext, typename T>
class ReduceSumGradXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto dims = context.Attr<std::vector<int>>("dim");
bool reduce_all = context.Attr<bool>("reduce_all");
auto* x = context.Input<Tensor>("X");
auto* out = context.Input<Tensor>(framework::GradVarName("Out"));
auto* x_grad = context.Output<Tensor>(framework::GradVarName("X"));
int in_dtype = context.Attr<int>("in_dtype");
PADDLE_ENFORCE_EQ(
in_dtype == -1,
true,
platform::errors::InvalidArgument(
"XPU only support in_dtype == -1 in reduce_sum_grad op."));
auto& dev_ctx = context.template device_context<DeviceContext>();
x_grad->mutable_data<T>(context.GetPlace());
const auto* out_data = out->data<T>();
auto* x_grad_data = x_grad->data<T>();
const auto& input_dim_size = x->dims().size();
std::vector<int> true_dims;
for (size_t i = 0; i < dims.size(); ++i) {
if (dims[i] < 0) {
true_dims.push_back(dims[i] + input_dim_size);
} else {
true_dims.push_back(dims[i]);
}
}
std::vector<int> ydims(input_dim_size);
std::vector<int> xdims((input_dim_size));
std::set<int> dims_set(true_dims.begin(), true_dims.end());
for (auto i = 0; i < input_dim_size; i++) {
xdims[i] = x->dims()[i];
if (dims_set.find(i) != dims_set.end() || reduce_all) {
ydims[i] = 1;
} else {
ydims[i] = x->dims()[i];
}
}
int r = xpu::broadcast<T>(
dev_ctx.x_context(), out_data, x_grad_data, ydims, xdims);
PADDLE_ENFORCE_EQ(
r == xpu::Error_t::SUCCESS,
true,
platform::errors::External("XPU broadcast in reduce_sum_grad op return"
" wrong value[%d %s].",
r,
XPUAPIErrorMsg[r]));
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_XPU_KERNEL(
reduce_sum,
ops::ReduceSumXPUKernel<paddle::platform::XPUDeviceContext, float>);
REGISTER_OP_XPU_KERNEL(
reduce_sum_grad,
ops::ReduceSumGradXPUKernel<paddle::platform::XPUDeviceContext, float>);
#endif
...@@ -73,7 +73,7 @@ PD_REGISTER_KERNEL(sum, ...@@ -73,7 +73,7 @@ PD_REGISTER_KERNEL(sum,
} }
#endif #endif
#if defined(PADDLE_WITH_XPU_KP) #if defined(PADDLE_WITH_XPU_KP) && !defined(PADDLE_WITH_XPU)
PD_REGISTER_KERNEL(sum, KPS, ALL_LAYOUT, phi::SumKernel, float) { PD_REGISTER_KERNEL(sum, KPS, ALL_LAYOUT, phi::SumKernel, float) {
kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED); kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED);
} }
...@@ -83,3 +83,7 @@ PD_REGISTER_KERNEL(sum, KPS, ALL_LAYOUT, phi::SumKernel, float) { ...@@ -83,3 +83,7 @@ PD_REGISTER_KERNEL(sum, KPS, ALL_LAYOUT, phi::SumKernel, float) {
PD_REGISTER_KERNEL( PD_REGISTER_KERNEL(
sum, OneDNN, ALL_LAYOUT, phi::SumKernel, float, phi::dtype::bfloat16) {} sum, OneDNN, ALL_LAYOUT, phi::SumKernel, float, phi::dtype::bfloat16) {}
#endif #endif
#if defined(PADDLE_WITH_XPU)
PD_REGISTER_KERNEL(sum, XPU, ALL_LAYOUT, phi::SumKernel, float) {}
#endif
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/reduce_sum_grad_kernel.h"
#include <set>
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T, typename Context>
void ReduceSumGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out_grad,
const IntArray& dims_arr,
bool keep_dim,
bool reduce_all,
DenseTensor* x_grad) {
using XPUType = typename XPUTypeTrait<T>::Type;
auto dims = dims_arr.GetData();
dev_ctx.template Alloc<XPUType>(x_grad);
const auto* out_data = out_grad.data<XPUType>();
auto* x_grad_data = x_grad->data<XPUType>();
const auto& input_dim_size = x.dims().size();
std::vector<int> true_dims;
for (size_t i = 0; i < dims.size(); ++i) {
if (dims[i] < 0) {
true_dims.push_back(dims[i] + input_dim_size);
} else {
true_dims.push_back(dims[i]);
}
}
std::vector<int> ydims(input_dim_size);
std::vector<int> xdims((input_dim_size));
std::set<int> dims_set(true_dims.begin(), true_dims.end());
for (auto i = 0; i < input_dim_size; i++) {
xdims[i] = x.dims()[i];
if (dims_set.find(i) != dims_set.end() || reduce_all) {
ydims[i] = 1;
} else {
ydims[i] = x.dims()[i];
}
}
int r = xpu::broadcast<XPUType>(
dev_ctx.x_context(), out_data, x_grad_data, ydims, xdims);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast");
}
} // namespace phi
PD_REGISTER_KERNEL(sum_grad, XPU, ALL_LAYOUT, phi::ReduceSumGradKernel, float) {
}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/reduce_sum_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/xpu/reduce.h"
namespace phi {
template <typename T, typename Context>
void SumRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const IntArray& dims,
bool keep_dim,
bool reduce_all,
DataType out_dtype,
DenseTensor* out) {
int r = XPUReduce<Context, T>(dev_ctx,
x,
dims.GetData(),
keep_dim,
reduce_all,
out,
xpu::reduce_sum<T>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_sum");
}
} // namespace phi
PD_REGISTER_KERNEL(sum_raw, XPU, ALL_LAYOUT, phi::SumRawKernel, float) {}
...@@ -67,7 +67,7 @@ class XPUTestReduceMaxOp(XPUOpTestWrapper): ...@@ -67,7 +67,7 @@ class XPUTestReduceMaxOp(XPUOpTestWrapper):
self.check_output_with_place(self.place) self.check_output_with_place(self.place)
def test_check_grad(self): def test_check_grad(self):
pass self.check_grad_with_place(self.place, ['X'], 'Out')
class XPUTestReduceMaxCase1(XPUTestReduceMaxBase): class XPUTestReduceMaxCase1(XPUTestReduceMaxBase):
......
...@@ -67,7 +67,7 @@ class XPUTestReduceSumOp(XPUOpTestWrapper): ...@@ -67,7 +67,7 @@ class XPUTestReduceSumOp(XPUOpTestWrapper):
self.check_output_with_place(self.place) self.check_output_with_place(self.place)
def test_check_grad(self): def test_check_grad(self):
pass self.check_grad_with_place(self.place, ['X'], 'Out')
class XPUTestReduceSumCase1(XPUTestReduceSumBase): class XPUTestReduceSumCase1(XPUTestReduceSumBase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册