未验证 提交 6354f81c 编写于 作者: Y Yiqun Liu 提交者: GitHub

Rename partial function name TensorReduceFunctorImpl to TensorReduceImpl. (#39387)

上级 d7dddf94
......@@ -20,15 +20,6 @@
#include <set>
#include <vector>
#ifdef __NVCC__
#include "cub/cub.cuh"
#endif
#ifdef __HIPCC__
#include <hipcub/hipcub.hpp>
namespace cub = hipcub;
#endif
#include "paddle/fluid/framework/tensor.h"
#include "paddle/pten/core/dense_tensor.h"
......@@ -46,7 +37,7 @@ void TensorReduceImpl(const platform::CUDADeviceContext& dev_ctx,
gpuStream_t stream) {
y->mutable_data<Ty>(x.place());
pten::kernels::TensorReduceFunctorImpl<Tx, Ty, ReduceOp, TransformOp>(
pten::kernels::TensorReduceImpl<Tx, Ty, ReduceOp, TransformOp>(
static_cast<const pten::GPUContext&>(dev_ctx), x, y, transform,
origin_reduce_dims, stream);
}
......
......@@ -2016,10 +2016,7 @@ void default_elementwise_add_grad(const GPUContext &ctx,
std::vector<int> reduce_dims =
funcs::GetReduceDim(x.dims(), out.dims(), axis);
gpuStream_t stream = ctx.stream();
kernels::TensorReduceFunctorImpl<T,
T,
kps::AddFunctor,
kps::IdentityFunctor<T>>(
kernels::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx, dout, dx, kps::IdentityFunctor<T>(), reduce_dims, stream);
}
}
......@@ -2034,10 +2031,7 @@ void default_elementwise_add_grad(const GPUContext &ctx,
std::vector<int> reduce_dims =
funcs::GetReduceDim(y.dims(), out.dims(), axis);
gpuStream_t stream = ctx.stream();
kernels::TensorReduceFunctorImpl<T,
T,
kps::AddFunctor,
kps::IdentityFunctor<T>>(
kernels::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx, dout, dy, kps::IdentityFunctor<T>(), reduce_dims, stream);
}
}
......@@ -2133,10 +2127,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
std::vector<int> reduce_dims =
funcs::GetReduceDim(x.dims(), out.dims(), axis);
gpuStream_t stream = ctx.stream();
kernels::TensorReduceFunctorImpl<T,
T,
kps::AddFunctor,
kps::IdentityFunctor<T>>(
kernels::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx, dout, dx, kps::IdentityFunctor<T>(), reduce_dims, stream);
}
}
......@@ -2157,10 +2148,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
std::vector<int> reduce_dims =
funcs::GetReduceDim(y.dims(), out.dims(), axis);
gpuStream_t stream = ctx.stream();
kernels::TensorReduceFunctorImpl<T,
T,
kps::AddFunctor,
kps::InverseFunctor<T>>(
kernels::TensorReduceImpl<T, T, kps::AddFunctor, kps::InverseFunctor<T>>(
ctx, dout, dy, kps::InverseFunctor<T>(), reduce_dims, stream);
}
}
......
......@@ -1007,7 +1007,7 @@ template <typename Tx,
static
typename std::enable_if<!std::is_same<Tx, paddle::platform::float16>::value,
void>::type
CubTensorReduceFunctorImpl(const Tx* x_data,
CubTensorReduceImpl(const Tx* x_data,
Ty* y_data,
const TransformOp& transform,
int reduce_num,
......@@ -1051,7 +1051,7 @@ template <typename Tx,
static
typename std::enable_if<std::is_same<Tx, paddle::platform::float16>::value,
void>::type
CubTensorReduceFunctorImpl(const Tx* x_data,
CubTensorReduceImpl(const Tx* x_data,
Ty* y_data,
const TransformOp& transform,
int reduce_num,
......@@ -1065,7 +1065,7 @@ template <typename Tx,
typename Ty,
template <typename> class ReduceOp,
typename TransformOp>
void TensorReduceFunctorImpl(const pten::GPUContext& dev_ctx,
void TensorReduceImpl(const pten::GPUContext& dev_ctx,
const pten::DenseTensor& x,
pten::DenseTensor* y,
const TransformOp& transform,
......@@ -1102,7 +1102,7 @@ void TensorReduceFunctorImpl(const pten::GPUContext& dev_ctx,
constexpr bool kIsTxFP16 = std::is_same<Tx, paddle::platform::float16>::value;
bool use_cub_reduce = config.reduce_num == numel && !kIsTxFP16;
if (use_cub_reduce) {
CubTensorReduceFunctorImpl<Tx, Ty, ReduceOp, TransformOp>(
CubTensorReduceImpl<Tx, Ty, ReduceOp, TransformOp>(
x_data, y_data, transform, config.reduce_num, x.place(), stream);
return;
}
......@@ -1239,10 +1239,10 @@ void Reduce(const GPUContext& dev_ctx,
pten::DataType::INT64,
pten::DataType::FLOAT16,
out_dtype,
"TensorReduceFunctorImpl",
"TensorReduceImpl",
([&] {
using MPType = typename kps::details::MPTypeTrait<data_t>::Type;
pten::kernels::TensorReduceFunctorImpl<data_t,
pten::kernels::TensorReduceImpl<data_t,
data_t,
ReduceOp,
TransformOp<data_t, MPType>>(
......@@ -1255,8 +1255,7 @@ void Reduce(const GPUContext& dev_ctx,
}));
} else {
using MPType = typename kps::details::MPTypeTrait<T>::Type;
pten::kernels::
TensorReduceFunctorImpl<T, T, ReduceOp, TransformOp<T, MPType>>(
pten::kernels::TensorReduceImpl<T, T, ReduceOp, TransformOp<T, MPType>>(
dev_ctx,
x,
out,
......
......@@ -60,10 +60,7 @@ struct ReduceSumForMatmulGrad<GPUContext, T> {
DenseTensor* output,
const std::vector<int>& reduce_dims) {
auto stream = dev_ctx.stream();
kernels::TensorReduceFunctorImpl<T,
T,
kps::AddFunctor,
kps::IdentityFunctor<T>>(
kernels::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
dev_ctx, input, output, kps::IdentityFunctor<T>(), reduce_dims, stream);
}
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册