未验证 提交 3c44e948 编写于 作者: H Hanchiao 提交者: GitHub

【Hackathon No.32】为 Paddle 优化 expand_as 前向&反向 op 在 GPU 上的计算性能 (#52700)

* Implement optimized kernel for OP-expand_as.

* Support fp16.
Co-authored-by: 我爱数学。's avatarTimber-Ye <ye_hanqiao@163.com>
Co-authored-by: NBrianQian1999 <brianqianhitsz@gmail.com>

* remove fp16 support

* remove MAX_RANK_SUPPORTED

---------
Co-authored-by: NBrianQian1999 <brianqianhitsz@gmail.com>
上级 ea04bef8
......@@ -15,8 +15,43 @@
#include "paddle/phi/kernels/expand_as_grad_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/expand_as_grad_kernel_impl.h"
#include "paddle/phi/kernels/funcs/reduce_function.h"
namespace phi {
template <typename T, typename Context>
void ExpandAsGradKernel(const Context& context,
const DenseTensor& x,
const DenseTensor& out_grad,
const std::vector<int>& target_shape,
DenseTensor* in_grad) {
auto in_dims = x.dims();
auto out_dims = out_grad.dims();
int in_rank = in_dims.size();
int out_rank = out_dims.size();
PADDLE_ENFORCE_LE(
out_rank,
6,
errors::InvalidArgument("The rank of the input 'Out@GRAD' for "
"expand_as_v2_grad op must be less than or equal "
"to 6, but the value received is %d.",
out_rank));
context.template Alloc<T>(in_grad);
if (in_dims == out_dims) {
phi::Copy(context, out_grad, context.GetPlace(), false, in_grad);
} else {
std::vector<int> reduce_dims = funcs::GetReduceDim(in_dims, out_dims, -1);
funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
context, out_grad, in_grad, kps::IdentityFunctor<T>(), reduce_dims);
}
}
} // namespace phi
PD_REGISTER_KERNEL(expand_as_grad,
GPU,
......
......@@ -15,8 +15,70 @@
#include "paddle/phi/kernels/expand_as_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/expand_as_kernel_impl.h"
#include "paddle/phi/kernels/funcs/broadcast_function.h"
namespace phi {
template <typename T, typename Context>
void ExpandAsKernel(const Context& ctx,
const DenseTensor& x,
const paddle::optional<DenseTensor>& y,
const std::vector<int>& target_shape,
DenseTensor* out) {
int rank = x.dims().size();
int target_rank = static_cast<int>(target_shape.size());
auto vec_in_dims = phi::vectorize<int>(x.dims());
unsigned int diff = target_rank - rank;
vec_in_dims.insert(vec_in_dims.begin(), diff, 1);
for (unsigned int i = 0; i < vec_in_dims.size(); ++i) {
PADDLE_ENFORCE_NE(
target_shape[i],
0,
errors::InvalidArgument("The value of target shape cannot be zero."));
if (i < diff) {
PADDLE_ENFORCE_GT(
target_shape[i],
0,
errors::InvalidArgument(
"The expanded size (%d) for non-existing dimensions must be "
"positive for expand_as_v2 op.",
target_shape[i]));
} else if (target_shape[i] > 0) {
if (vec_in_dims[i] != 1) {
PADDLE_ENFORCE_EQ(
vec_in_dims[i],
target_shape[i],
errors::InvalidArgument(
"The value (%d) of the non-singleton dimension does not match"
" the corresponding value (%d) in shape for expand_as_v2 op.",
vec_in_dims[i],
target_shape[i]));
}
} else {
PADDLE_ENFORCE_EQ(
target_shape[i],
-1,
errors::InvalidArgument(
"When the value in shape is negative for expand_as_v2 op, "
"only -1 is supported, but the value received is %d.",
target_shape[i]));
}
}
out->Resize(phi::make_ddim(target_shape));
ctx.template Alloc<T>(out);
std::vector<const DenseTensor*> ins = {&x};
std::vector<DenseTensor*> outs = {out};
phi::funcs::BroadcastKernel<ElementwiseType::kUnary, T, T>(
ctx, ins, &outs, -1, kps::IdentityFunctor<T>());
}
} // namespace phi
PD_REGISTER_KERNEL(expand_as,
GPU,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册