未验证 提交 5829069d 编写于 作者: Y ykkk2333 提交者: GitHub

[XPU] migrate reduce kernels to phi, test=kunlun (#45973)

上级 d7e74e63
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef PADDLE_WITH_XPU
#include <memory>
#include <string>
#include "paddle/fluid/operators/reduce_ops/reduce_op_xpu.h"
#include "paddle/fluid/platform/device/xpu/xpu_header.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class ReduceMaxXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
XPUReduce<DeviceContext, T>(context, xpu::reduce_max<T>);
}
};
template <typename DeviceContext, typename T>
class ReduceMaxGradXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto dims = context.Attr<std::vector<int>>("dim");
bool reduce_all = context.Attr<bool>("reduce_all");
auto* x = context.Input<Tensor>("X");
auto* out = context.Input<Tensor>("Out");
auto* out_grad = context.Input<Tensor>(framework::GradVarName("Out"));
auto* x_grad = context.Output<Tensor>(framework::GradVarName("X"));
int in_dtype = context.Attr<int>("in_dtype");
PADDLE_ENFORCE_EQ(
in_dtype == -1,
true,
platform::errors::InvalidArgument(
"XPU only support in_dtype == -1 in reduce_sum_grad op."));
auto& dev_ctx = context.template device_context<DeviceContext>();
x_grad->mutable_data<T>(context.GetPlace());
const T* x_data = x->data<T>();
const T* out_data = out->data<T>();
const T* out_grad_data = out_grad->data<T>();
auto* x_grad_data = x_grad->data<T>();
const auto& input_dim_size = x->dims().size();
std::vector<int> true_dims;
for (size_t i = 0; i < dims.size(); ++i) {
if (dims[i] < 0) {
true_dims.push_back(dims[i] + input_dim_size);
} else {
true_dims.push_back(dims[i]);
}
}
std::vector<int> ydims(input_dim_size);
std::vector<int> xdims((input_dim_size));
std::set<int> dims_set(true_dims.begin(), true_dims.end());
for (auto i = 0; i < input_dim_size; i++) {
xdims[i] = x->dims()[i];
if (dims_set.find(i) != dims_set.end() || reduce_all) {
ydims[i] = 1;
} else {
ydims[i] = x->dims()[i];
}
}
T* brocast1 = nullptr;
T* brocast2 = nullptr;
bool* equal = nullptr;
PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void**>(&brocast1), x->numel() * sizeof(T)),
XPU_SUCCESS,
platform::errors::ResourceExhausted("XPU has no enough memory"));
PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void**>(&equal), x->numel() * sizeof(bool)),
XPU_SUCCESS,
platform::errors::ResourceExhausted("XPU has no enough memory"));
PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void**>(&brocast2), x->numel() * sizeof(T)),
XPU_SUCCESS,
platform::errors::ResourceExhausted("XPU has no enough memory"));
// step 1. brocast out and out_grad
int r = xpu::broadcast<T>(
dev_ctx.x_context(), out_data, brocast1, ydims, xdims);
PADDLE_ENFORCE_EQ(
r == xpu::Error_t::SUCCESS,
true,
platform::errors::External("XPU broadcast in reduce_max_grad op return"
" wrong value[%d %s].",
r,
XPUAPIErrorMsg[r]));
r = xpu::broadcast<T>(
dev_ctx.x_context(), out_grad_data, brocast2, ydims, xdims);
PADDLE_ENFORCE_EQ(
r == xpu::Error_t::SUCCESS,
true,
platform::errors::External("XPU broadcast in reduce_max_grad op return"
" wrong value[%d %s].",
r,
XPUAPIErrorMsg[r]));
// step 2. comparse out_brocast and x
r = xpu::equal<T>(dev_ctx.x_context(), x_data, brocast1, equal, x->numel());
PADDLE_ENFORCE_EQ(
r == xpu::Error_t::SUCCESS,
true,
platform::errors::External("XPU equal in reduce_max_grad "
"op return wrong value[%d %s].",
r,
XPUAPIErrorMsg[r]));
// step 3. get x_grad
r = xpu::constant<T>(dev_ctx.x_context(), brocast1, x->numel(), 0);
PADDLE_ENFORCE_EQ(
r == xpu::Error_t::SUCCESS,
true,
platform::errors::External("XPU constant in reduce_max_grad op return"
" wrong value[%d %s].",
r,
XPUAPIErrorMsg[r]));
r = xpu::select<T>(dev_ctx.x_context(),
equal,
brocast2,
brocast1,
x_grad_data,
xdims,
xdims);
PADDLE_ENFORCE_EQ(
r == xpu::Error_t::SUCCESS,
true,
platform::errors::External("XPU select in reduce_max_grad op return"
" wrong value[%d %s].",
r,
XPUAPIErrorMsg[r]));
if (dev_ctx.x_context()->xpu_stream) {
dev_ctx.Wait();
}
xpu_free(brocast1);
xpu_free(brocast2);
xpu_free(equal);
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_XPU_KERNEL(
reduce_max,
ops::ReduceMaxXPUKernel<paddle::platform::XPUDeviceContext, float>);
REGISTER_OP_XPU_KERNEL(
reduce_max_grad,
ops::ReduceMaxGradXPUKernel<paddle::platform::XPUDeviceContext, float>);
#endif
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef PADDLE_WITH_XPU
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/operators/reduce_ops/reduce_mean_op.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class ReduceMeanXPUKernel : public framework::OpKernel<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
public:
void Compute(const framework::ExecutionContext& context) const override {
PADDLE_ENFORCE_EQ(
platform::is_xpu_place(context.GetPlace()),
true,
platform::errors::Unavailable("This kernel only runs on XPU."));
bool reduce_all = context.Attr<bool>("reduce_all");
auto* input = context.Input<Tensor>("X");
auto* output = context.Output<Tensor>("Out");
output->mutable_data<T>(context.GetPlace());
auto& dev_ctx = context.template device_context<DeviceContext>();
std::vector<int> xdims;
for (int i = 0; i < input->dims().size(); i++) {
xdims.push_back(input->dims()[i]);
}
auto rdims = context.Attr<std::vector<int>>("dim");
const auto& input_dim_size = input->dims().size();
std::vector<int> reduce_dims;
if (reduce_all) {
for (size_t i = 0; i < xdims.size(); i++) {
reduce_dims.push_back(static_cast<int>(i));
}
} else {
for (size_t i = 0; i < rdims.size(); ++i) {
if (rdims[i] < 0) {
reduce_dims.push_back(rdims[i] + input_dim_size);
} else {
reduce_dims.push_back(rdims[i]);
}
}
}
int r = xpu::reduce_mean(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(input->data<T>()),
reinterpret_cast<XPUType*>(output->data<T>()),
xdims,
reduce_dims);
PADDLE_ENFORCE_EQ(r,
XPU_SUCCESS,
platform::errors::External(
"XPU reduce_mean kernel return wrong value[%d %s]",
r,
XPUAPIErrorMsg[r]));
}
};
template <typename DeviceContext, typename T>
class ReduceMeanGradXPUKernel : public framework::OpKernel<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<Tensor>("X");
auto* output_grad = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
XPUType* x_data =
reinterpret_cast<XPUType*>(input_grad->mutable_data<T>(ctx.GetPlace()));
const XPUType* dy_data =
reinterpret_cast<const XPUType*>(output_grad->data<T>());
bool reduce_all = ctx.Attr<bool>("reduce_all");
auto reduce_dims = ctx.Attr<std::vector<int>>("dim");
bool keep_dim = ctx.Attr<bool>("keep_dim");
std::vector<int> xdims;
for (int i = 0; i < input->dims().size(); i++) {
xdims.push_back(input->dims()[i]);
}
std::vector<int> ydims;
for (int i = 0; i < output_grad->dims().size(); i++) {
ydims.push_back(output_grad->dims()[i]);
}
int reduce_numel = 1;
if (reduce_all) {
reduce_dims.clear();
for (size_t d = 0; d < xdims.size(); ++d) {
reduce_dims.push_back(static_cast<int>(d));
}
}
for (auto& d : reduce_dims) {
if (d < 0) {
d = d + xdims.size();
}
reduce_numel *= xdims[d];
}
if (keep_dim != true) {
sort(reduce_dims.begin(), reduce_dims.end());
for (auto& d : reduce_dims) {
ydims.insert(ydims.begin() + d, 1);
}
}
float val = 1.0f / static_cast<float>(reduce_numel);
auto& dev_ctx = ctx.template device_context<DeviceContext>();
int r = xpu::constant(
dev_ctx.x_context(), x_data, input->numel(), static_cast<XPUType>(val));
PADDLE_ENFORCE_EQ(r,
XPU_SUCCESS,
platform::errors::External(
"XPU constant kernel return wrong value[%d %s]",
r,
XPUAPIErrorMsg[r]));
r = xpu::broadcast_mul(
dev_ctx.x_context(), x_data, dy_data, x_data, xdims, ydims);
PADDLE_ENFORCE_EQ(r,
XPU_SUCCESS,
platform::errors::External(
"XPU broadcast_mul kernel return wrong value[%d %s]",
r,
XPUAPIErrorMsg[r]));
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_XPU_KERNEL(
reduce_mean,
ops::ReduceMeanXPUKernel<paddle::platform::XPUDeviceContext, float>);
REGISTER_OP_XPU_KERNEL(
reduce_mean_grad,
ops::ReduceMeanGradXPUKernel<paddle::platform::XPUDeviceContext, float>);
#endif
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef PADDLE_WITH_XPU
#include <memory>
#include <vector>
#include "paddle/fluid/operators/reduce_ops/reduce_prod_op.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class ReduceProdXPUKernel : public framework::OpKernel<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
public:
void Compute(const framework::ExecutionContext& context) const override {
PADDLE_ENFORCE_EQ(
platform::is_xpu_place(context.GetPlace()),
true,
platform::errors::Unavailable("This kernel only runs on XPU."));
bool reduce_all = context.Attr<bool>("reduce_all");
auto* input = context.Input<Tensor>("X");
auto* output = context.Output<Tensor>("Out");
output->mutable_data<T>(context.GetPlace());
auto& dev_ctx = context.template device_context<DeviceContext>();
std::vector<int> xdims;
for (int i = 0; i < input->dims().size(); i++) {
xdims.push_back(input->dims()[i]);
}
auto rdims = context.Attr<std::vector<int>>("dim");
const auto& input_dim_size = input->dims().size();
std::vector<int> reduce_dims;
if (reduce_all) {
for (size_t i = 0; i < xdims.size(); i++) {
reduce_dims.push_back(static_cast<int>(i));
}
} else {
for (size_t i = 0; i < rdims.size(); ++i) {
if (rdims[i] < 0) {
reduce_dims.push_back(rdims[i] + input_dim_size);
} else {
reduce_dims.push_back(rdims[i]);
}
}
}
int r = xpu::reduce_prod(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(input->data<T>()),
reinterpret_cast<XPUType*>(output->data<T>()),
xdims,
reduce_dims);
PADDLE_ENFORCE_EQ(r,
XPU_SUCCESS,
platform::errors::External(
"XPU reduce_prod kernel return wrong value[%d %s]",
r,
XPUAPIErrorMsg[r]));
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_XPU_KERNEL(
reduce_prod,
ops::ReduceProdXPUKernel<paddle::platform::XPUDeviceContext, float>);
#endif
...@@ -42,7 +42,7 @@ PD_REGISTER_KERNEL( ...@@ -42,7 +42,7 @@ PD_REGISTER_KERNEL(
max, GPU, ALL_LAYOUT, phi::MaxKernel, float, double, int, int64_t) {} max, GPU, ALL_LAYOUT, phi::MaxKernel, float, double, int, int64_t) {}
#endif #endif
#if defined(PADDLE_WITH_XPU_KP) #if defined(PADDLE_WITH_XPU_KP) && !defined(PADDLE_WITH_XPU)
PD_REGISTER_KERNEL(max, KPS, ALL_LAYOUT, phi::MaxKernel, float) {} PD_REGISTER_KERNEL(max, KPS, ALL_LAYOUT, phi::MaxKernel, float) {}
#endif #endif
...@@ -50,3 +50,7 @@ PD_REGISTER_KERNEL(max, KPS, ALL_LAYOUT, phi::MaxKernel, float) {} ...@@ -50,3 +50,7 @@ PD_REGISTER_KERNEL(max, KPS, ALL_LAYOUT, phi::MaxKernel, float) {}
PD_REGISTER_KERNEL( PD_REGISTER_KERNEL(
max, OneDNN, ALL_LAYOUT, phi::MaxKernel, float, phi::dtype::bfloat16) {} max, OneDNN, ALL_LAYOUT, phi::MaxKernel, float, phi::dtype::bfloat16) {}
#endif #endif
#if defined(PADDLE_WITH_XPU)
PD_REGISTER_KERNEL(max, XPU, ALL_LAYOUT, phi::MaxKernel, float) {}
#endif
...@@ -47,7 +47,7 @@ PD_REGISTER_KERNEL(mean, ...@@ -47,7 +47,7 @@ PD_REGISTER_KERNEL(mean,
phi::dtype::float16) {} phi::dtype::float16) {}
#endif #endif
#if defined(PADDLE_WITH_XPU_KP) #if defined(PADDLE_WITH_XPU_KP) && !defined(PADDLE_WITH_XPU)
PD_REGISTER_KERNEL(mean, KPS, ALL_LAYOUT, phi::MeanKernel, float) {} PD_REGISTER_KERNEL(mean, KPS, ALL_LAYOUT, phi::MeanKernel, float) {}
#endif #endif
...@@ -55,3 +55,7 @@ PD_REGISTER_KERNEL(mean, KPS, ALL_LAYOUT, phi::MeanKernel, float) {} ...@@ -55,3 +55,7 @@ PD_REGISTER_KERNEL(mean, KPS, ALL_LAYOUT, phi::MeanKernel, float) {}
PD_REGISTER_KERNEL( PD_REGISTER_KERNEL(
mean, OneDNN, ALL_LAYOUT, phi::MeanKernel, float, phi::dtype::bfloat16) {} mean, OneDNN, ALL_LAYOUT, phi::MeanKernel, float, phi::dtype::bfloat16) {}
#endif #endif
#if defined(PADDLE_WITH_XPU)
PD_REGISTER_KERNEL(mean, XPU, ALL_LAYOUT, phi::MeanKernel, float) {}
#endif
...@@ -39,6 +39,10 @@ PD_REGISTER_KERNEL( ...@@ -39,6 +39,10 @@ PD_REGISTER_KERNEL(
prod, GPU, ALL_LAYOUT, phi::ProdKernel, float, double, int, int64_t) {} prod, GPU, ALL_LAYOUT, phi::ProdKernel, float, double, int, int64_t) {}
#endif #endif
#if defined(PADDLE_WITH_XPU_KP) #if defined(PADDLE_WITH_XPU_KP) && !defined(PADDLE_WITH_XPU)
PD_REGISTER_KERNEL(prod, KPS, ALL_LAYOUT, phi::ProdKernel, float) {} PD_REGISTER_KERNEL(prod, KPS, ALL_LAYOUT, phi::ProdKernel, float) {}
#endif #endif
#if defined(PADDLE_WITH_XPU)
PD_REGISTER_KERNEL(prod, XPU, ALL_LAYOUT, phi::ProdKernel, float) {}
#endif
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <memory>
#include <set>
#include <string>
#include <vector>
namespace phi {
template <typename Context, typename T>
int XPUReduce(const Context& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DenseTensor* out,
std::function<int(xpu::Context*,
const T*,
T*,
const std::vector<int>&,
const std::vector<int>&)> func) {
dev_ctx.template Alloc<T>(out);
const auto* x_data = x.data<T>();
auto* y_data = out->data<T>();
const auto& input_dim_size = x.dims().size();
std::vector<int> true_dims;
for (size_t i = 0; i < dims.size(); ++i) {
if (dims[i] < 0) {
true_dims.push_back(dims[i] + input_dim_size);
} else {
true_dims.push_back(dims[i]);
}
}
std::vector<int> reduce_dims;
std::vector<int> xdims((input_dim_size));
for (int i = 0; i < input_dim_size; ++i) {
xdims[i] = x.dims()[i];
}
if (reduce_all) {
for (int i = 0; i < input_dim_size; ++i) {
reduce_dims.push_back(i);
}
} else {
std::set<int> dims_set(true_dims.begin(), true_dims.end());
for (auto i = 0; i < input_dim_size; i++) {
if (dims_set.find(i) != dims_set.end()) {
if (x.dims()[i] != 1) {
reduce_dims.push_back(i);
}
}
}
}
int r = xpu::SUCCESS;
if (reduce_dims.size() == 0) {
r = xpu::copy<T>(
dev_ctx.x_context(), x_data, y_data, x.numel() * sizeof(T));
PADDLE_ENFORCE_XDNN_SUCCESS(r, "copy");
} else {
r = func(dev_ctx.x_context(), x_data, y_data, xdims, reduce_dims);
}
return r;
}
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/reduce_max_grad_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/xpu/reduce.h"
namespace phi {
template <typename T, typename Context>
void ReduceMaxGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out,
const DenseTensor& out_grad,
const IntArray& dims_arr,
bool keep_dim,
bool reduce_all,
DenseTensor* x_grad) {
auto dims = dims_arr.GetData();
dev_ctx.template Alloc<T>(x_grad);
const T* x_data = x.data<T>();
const T* out_data = out.data<T>();
const T* out_grad_data = out_grad.data<T>();
auto* x_grad_data = x_grad->data<T>();
const auto& input_dim_size = x.dims().size();
std::vector<int> true_dims;
for (size_t i = 0; i < dims.size(); ++i) {
if (dims[i] < 0) {
true_dims.push_back(dims[i] + input_dim_size);
} else {
true_dims.push_back(dims[i]);
}
}
std::vector<int> ydims(input_dim_size);
std::vector<int> xdims((input_dim_size));
std::set<int> dims_set(true_dims.begin(), true_dims.end());
for (auto i = 0; i < input_dim_size; i++) {
xdims[i] = x.dims()[i];
if (dims_set.find(i) != dims_set.end() || reduce_all) {
ydims[i] = 1;
} else {
ydims[i] = x.dims()[i];
}
}
T* brocast1 = nullptr;
T* brocast2 = nullptr;
bool* equal = nullptr;
PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void**>(&brocast1), x.numel() * sizeof(T)),
XPU_SUCCESS,
errors::ResourceExhausted("XPU has no enough memory"));
PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void**>(&equal), x.numel() * sizeof(bool)),
XPU_SUCCESS,
errors::ResourceExhausted("XPU has no enough memory"));
PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void**>(&brocast2), x.numel() * sizeof(T)),
XPU_SUCCESS,
errors::ResourceExhausted("XPU has no enough memory"));
// step 1. brocast out and out_grad
int r =
xpu::broadcast<T>(dev_ctx.x_context(), out_data, brocast1, ydims, xdims);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast");
r = xpu::broadcast<T>(
dev_ctx.x_context(), out_grad_data, brocast2, ydims, xdims);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast");
// step 2. comparse out_brocast and x
r = xpu::equal<T>(dev_ctx.x_context(), x_data, brocast1, equal, x.numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "equal");
// step 3. get x_grad
r = xpu::constant<T>(dev_ctx.x_context(), brocast1, x.numel(), 0);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant");
r = xpu::select<T>(dev_ctx.x_context(),
equal,
brocast2,
brocast1,
x_grad_data,
xdims,
xdims);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "select");
if (dev_ctx.x_context()->xpu_stream) {
dev_ctx.Wait();
}
xpu_free(brocast1);
xpu_free(brocast2);
xpu_free(equal);
}
} // namespace phi
PD_REGISTER_KERNEL(max_grad, XPU, ALL_LAYOUT, phi::ReduceMaxGradKernel, float) {
}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/reduce_max_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/xpu/reduce.h"
namespace phi {
template <typename T, typename Context>
void MaxRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const IntArray& dims,
bool keep_dim,
bool reduce_all,
DenseTensor* out) {
int r = XPUReduce<Context, T>(dev_ctx,
x,
dims.GetData(),
keep_dim,
reduce_all,
out,
xpu::reduce_max<T>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_max");
}
} // namespace phi
PD_REGISTER_KERNEL(max_raw, XPU, ALL_LAYOUT, phi::MaxRawKernel, float) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/reduce_mean_grad_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/xpu/reduce.h"
namespace phi {
template <typename T, typename Context>
void ReduceMeanGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out_grad,
const IntArray& dims_arr,
bool keep_dim,
bool reduce_all,
DenseTensor* x_grad) {
using XPUType = typename XPUTypeTrait<T>::Type;
dev_ctx.template Alloc<T>(x_grad);
const XPUType* dy_data = reinterpret_cast<const XPUType*>(out_grad.data<T>());
XPUType* x_data = reinterpret_cast<XPUType*>(x_grad->data<T>());
auto reduce_dims = dims_arr.GetData();
std::vector<int> xdims;
for (int i = 0; i < x.dims().size(); i++) {
xdims.push_back(x.dims()[i]);
}
std::vector<int> ydims;
for (int i = 0; i < out_grad.dims().size(); i++) {
ydims.push_back(out_grad.dims()[i]);
}
int reduce_numel = 1;
if (reduce_all) {
reduce_dims.clear();
for (size_t d = 0; d < xdims.size(); ++d) {
reduce_dims.push_back(static_cast<int>(d));
}
}
for (auto& d : reduce_dims) {
if (d < 0) {
d = d + xdims.size();
}
reduce_numel *= xdims[d];
}
if (keep_dim != true) {
sort(reduce_dims.begin(), reduce_dims.end());
for (auto& d : reduce_dims) {
ydims.insert(ydims.begin() + d, 1);
}
}
float val = 1.0f / static_cast<float>(reduce_numel);
int r = xpu::constant(
dev_ctx.x_context(), x_data, x.numel(), static_cast<XPUType>(val));
PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant");
r = xpu::broadcast_mul(
dev_ctx.x_context(), x_data, dy_data, x_data, xdims, ydims);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_mul");
}
} // namespace phi
PD_REGISTER_KERNEL(
mean_grad, XPU, ALL_LAYOUT, phi::ReduceMeanGradKernel, float) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/reduce_mean_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/xpu/reduce.h"
namespace phi {
template <typename T, typename Context>
void MeanRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const IntArray& dims,
bool keep_dim,
bool reduce_all,
DenseTensor* out) {
int r = XPUReduce<Context, T>(dev_ctx,
x,
dims.GetData(),
keep_dim,
reduce_all,
out,
xpu::reduce_mean<T>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_mean");
}
} // namespace phi
PD_REGISTER_KERNEL(mean_raw, XPU, ALL_LAYOUT, phi::MeanRawKernel, float) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/reduce_prod_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/xpu/reduce.h"
namespace phi {
template <typename T, typename Context>
void ProdRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const IntArray& dims,
bool keep_dim,
bool reduce_all,
DenseTensor* out) {
int r = XPUReduce<Context, T>(dev_ctx,
x,
dims.GetData(),
keep_dim,
reduce_all,
out,
xpu::reduce_prod<T>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_prod");
}
} // namespace phi
PD_REGISTER_KERNEL(prod_raw, XPU, ALL_LAYOUT, phi::ProdRawKernel, float) {}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册