未验证 提交 6354f81c 编写于 作者: Y Yiqun Liu 提交者: GitHub

Rename partial function name TensorReduceFunctorImpl to TensorReduceImpl. (#39387)

上级 d7dddf94
...@@ -20,15 +20,6 @@ ...@@ -20,15 +20,6 @@
#include <set> #include <set>
#include <vector> #include <vector>
#ifdef __NVCC__
#include "cub/cub.cuh"
#endif
#ifdef __HIPCC__
#include <hipcub/hipcub.hpp>
namespace cub = hipcub;
#endif
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
...@@ -46,7 +37,7 @@ void TensorReduceImpl(const platform::CUDADeviceContext& dev_ctx, ...@@ -46,7 +37,7 @@ void TensorReduceImpl(const platform::CUDADeviceContext& dev_ctx,
gpuStream_t stream) { gpuStream_t stream) {
y->mutable_data<Ty>(x.place()); y->mutable_data<Ty>(x.place());
pten::kernels::TensorReduceFunctorImpl<Tx, Ty, ReduceOp, TransformOp>( pten::kernels::TensorReduceImpl<Tx, Ty, ReduceOp, TransformOp>(
static_cast<const pten::GPUContext&>(dev_ctx), x, y, transform, static_cast<const pten::GPUContext&>(dev_ctx), x, y, transform,
origin_reduce_dims, stream); origin_reduce_dims, stream);
} }
......
...@@ -2016,10 +2016,7 @@ void default_elementwise_add_grad(const GPUContext &ctx, ...@@ -2016,10 +2016,7 @@ void default_elementwise_add_grad(const GPUContext &ctx,
std::vector<int> reduce_dims = std::vector<int> reduce_dims =
funcs::GetReduceDim(x.dims(), out.dims(), axis); funcs::GetReduceDim(x.dims(), out.dims(), axis);
gpuStream_t stream = ctx.stream(); gpuStream_t stream = ctx.stream();
kernels::TensorReduceFunctorImpl<T, kernels::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
T,
kps::AddFunctor,
kps::IdentityFunctor<T>>(
ctx, dout, dx, kps::IdentityFunctor<T>(), reduce_dims, stream); ctx, dout, dx, kps::IdentityFunctor<T>(), reduce_dims, stream);
} }
} }
...@@ -2034,10 +2031,7 @@ void default_elementwise_add_grad(const GPUContext &ctx, ...@@ -2034,10 +2031,7 @@ void default_elementwise_add_grad(const GPUContext &ctx,
std::vector<int> reduce_dims = std::vector<int> reduce_dims =
funcs::GetReduceDim(y.dims(), out.dims(), axis); funcs::GetReduceDim(y.dims(), out.dims(), axis);
gpuStream_t stream = ctx.stream(); gpuStream_t stream = ctx.stream();
kernels::TensorReduceFunctorImpl<T, kernels::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
T,
kps::AddFunctor,
kps::IdentityFunctor<T>>(
ctx, dout, dy, kps::IdentityFunctor<T>(), reduce_dims, stream); ctx, dout, dy, kps::IdentityFunctor<T>(), reduce_dims, stream);
} }
} }
...@@ -2133,10 +2127,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx, ...@@ -2133,10 +2127,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
std::vector<int> reduce_dims = std::vector<int> reduce_dims =
funcs::GetReduceDim(x.dims(), out.dims(), axis); funcs::GetReduceDim(x.dims(), out.dims(), axis);
gpuStream_t stream = ctx.stream(); gpuStream_t stream = ctx.stream();
kernels::TensorReduceFunctorImpl<T, kernels::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
T,
kps::AddFunctor,
kps::IdentityFunctor<T>>(
ctx, dout, dx, kps::IdentityFunctor<T>(), reduce_dims, stream); ctx, dout, dx, kps::IdentityFunctor<T>(), reduce_dims, stream);
} }
} }
...@@ -2157,10 +2148,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx, ...@@ -2157,10 +2148,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
std::vector<int> reduce_dims = std::vector<int> reduce_dims =
funcs::GetReduceDim(y.dims(), out.dims(), axis); funcs::GetReduceDim(y.dims(), out.dims(), axis);
gpuStream_t stream = ctx.stream(); gpuStream_t stream = ctx.stream();
kernels::TensorReduceFunctorImpl<T, kernels::TensorReduceImpl<T, T, kps::AddFunctor, kps::InverseFunctor<T>>(
T,
kps::AddFunctor,
kps::InverseFunctor<T>>(
ctx, dout, dy, kps::InverseFunctor<T>(), reduce_dims, stream); ctx, dout, dy, kps::InverseFunctor<T>(), reduce_dims, stream);
} }
} }
......
...@@ -1007,12 +1007,12 @@ template <typename Tx, ...@@ -1007,12 +1007,12 @@ template <typename Tx,
static static
typename std::enable_if<!std::is_same<Tx, paddle::platform::float16>::value, typename std::enable_if<!std::is_same<Tx, paddle::platform::float16>::value,
void>::type void>::type
CubTensorReduceFunctorImpl(const Tx* x_data, CubTensorReduceImpl(const Tx* x_data,
Ty* y_data, Ty* y_data,
const TransformOp& transform, const TransformOp& transform,
int reduce_num, int reduce_num,
const paddle::platform::Place& place, const paddle::platform::Place& place,
gpuStream_t stream) { gpuStream_t stream) {
auto reducer = ReduceOp<Ty>(); auto reducer = ReduceOp<Ty>();
cub::TransformInputIterator<Ty, TransformOp, const Tx*> trans_x(x_data, cub::TransformInputIterator<Ty, TransformOp, const Tx*> trans_x(x_data,
transform); transform);
...@@ -1051,12 +1051,12 @@ template <typename Tx, ...@@ -1051,12 +1051,12 @@ template <typename Tx,
static static
typename std::enable_if<std::is_same<Tx, paddle::platform::float16>::value, typename std::enable_if<std::is_same<Tx, paddle::platform::float16>::value,
void>::type void>::type
CubTensorReduceFunctorImpl(const Tx* x_data, CubTensorReduceImpl(const Tx* x_data,
Ty* y_data, Ty* y_data,
const TransformOp& transform, const TransformOp& transform,
int reduce_num, int reduce_num,
const paddle::platform::Place& place, const paddle::platform::Place& place,
gpuStream_t stream) { gpuStream_t stream) {
PADDLE_THROW(pten::errors::InvalidArgument( PADDLE_THROW(pten::errors::InvalidArgument(
"Tx should not be float16 when using cub::DeviceReduce::Reduce().")); "Tx should not be float16 when using cub::DeviceReduce::Reduce()."));
} }
...@@ -1065,12 +1065,12 @@ template <typename Tx, ...@@ -1065,12 +1065,12 @@ template <typename Tx,
typename Ty, typename Ty,
template <typename> class ReduceOp, template <typename> class ReduceOp,
typename TransformOp> typename TransformOp>
void TensorReduceFunctorImpl(const pten::GPUContext& dev_ctx, void TensorReduceImpl(const pten::GPUContext& dev_ctx,
const pten::DenseTensor& x, const pten::DenseTensor& x,
pten::DenseTensor* y, pten::DenseTensor* y,
const TransformOp& transform, const TransformOp& transform,
const std::vector<int>& origin_reduce_dims, const std::vector<int>& origin_reduce_dims,
gpuStream_t stream) { gpuStream_t stream) {
y->mutable_data<Ty>(x.place()); y->mutable_data<Ty>(x.place());
auto x_dim = pten::framework::vectorize<int>(x.dims()); auto x_dim = pten::framework::vectorize<int>(x.dims());
...@@ -1102,7 +1102,7 @@ void TensorReduceFunctorImpl(const pten::GPUContext& dev_ctx, ...@@ -1102,7 +1102,7 @@ void TensorReduceFunctorImpl(const pten::GPUContext& dev_ctx,
constexpr bool kIsTxFP16 = std::is_same<Tx, paddle::platform::float16>::value; constexpr bool kIsTxFP16 = std::is_same<Tx, paddle::platform::float16>::value;
bool use_cub_reduce = config.reduce_num == numel && !kIsTxFP16; bool use_cub_reduce = config.reduce_num == numel && !kIsTxFP16;
if (use_cub_reduce) { if (use_cub_reduce) {
CubTensorReduceFunctorImpl<Tx, Ty, ReduceOp, TransformOp>( CubTensorReduceImpl<Tx, Ty, ReduceOp, TransformOp>(
x_data, y_data, transform, config.reduce_num, x.place(), stream); x_data, y_data, transform, config.reduce_num, x.place(), stream);
return; return;
} }
...@@ -1239,13 +1239,13 @@ void Reduce(const GPUContext& dev_ctx, ...@@ -1239,13 +1239,13 @@ void Reduce(const GPUContext& dev_ctx,
pten::DataType::INT64, pten::DataType::INT64,
pten::DataType::FLOAT16, pten::DataType::FLOAT16,
out_dtype, out_dtype,
"TensorReduceFunctorImpl", "TensorReduceImpl",
([&] { ([&] {
using MPType = typename kps::details::MPTypeTrait<data_t>::Type; using MPType = typename kps::details::MPTypeTrait<data_t>::Type;
pten::kernels::TensorReduceFunctorImpl<data_t, pten::kernels::TensorReduceImpl<data_t,
data_t, data_t,
ReduceOp, ReduceOp,
TransformOp<data_t, MPType>>( TransformOp<data_t, MPType>>(
dev_ctx, dev_ctx,
tmp_tensor, tmp_tensor,
out, out,
...@@ -1255,14 +1255,13 @@ void Reduce(const GPUContext& dev_ctx, ...@@ -1255,14 +1255,13 @@ void Reduce(const GPUContext& dev_ctx,
})); }));
} else { } else {
using MPType = typename kps::details::MPTypeTrait<T>::Type; using MPType = typename kps::details::MPTypeTrait<T>::Type;
pten::kernels:: pten::kernels::TensorReduceImpl<T, T, ReduceOp, TransformOp<T, MPType>>(
TensorReduceFunctorImpl<T, T, ReduceOp, TransformOp<T, MPType>>( dev_ctx,
dev_ctx, x,
x, out,
out, TransformOp<T, MPType>(reduce_num),
TransformOp<T, MPType>(reduce_num), reduce_dims,
reduce_dims, stream);
stream);
} }
} }
} // namespace pten } // namespace pten
......
...@@ -60,10 +60,7 @@ struct ReduceSumForMatmulGrad<GPUContext, T> { ...@@ -60,10 +60,7 @@ struct ReduceSumForMatmulGrad<GPUContext, T> {
DenseTensor* output, DenseTensor* output,
const std::vector<int>& reduce_dims) { const std::vector<int>& reduce_dims) {
auto stream = dev_ctx.stream(); auto stream = dev_ctx.stream();
kernels::TensorReduceFunctorImpl<T, kernels::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
T,
kps::AddFunctor,
kps::IdentityFunctor<T>>(
dev_ctx, input, output, kps::IdentityFunctor<T>(), reduce_dims, stream); dev_ctx, input, output, kps::IdentityFunctor<T>(), reduce_dims, stream);
} }
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册