未验证 提交 688743bf 编写于 作者: Y Yiqun Liu 提交者: GitHub

Rename phi::func::TensorReduceImpl to phi::func::ReduceKernel. (#40183)

上级 c1d81ec1
......@@ -36,9 +36,9 @@ void TensorReduceImpl(const platform::CUDADeviceContext& dev_ctx,
gpuStream_t stream) {
y->mutable_data<Ty>(x.place());
phi::funcs::TensorReduceImpl<Tx, Ty, ReduceOp, TransformOp>(
phi::funcs::ReduceKernel<Tx, Ty, ReduceOp, TransformOp>(
static_cast<const phi::GPUContext&>(dev_ctx), x, y, transform,
origin_reduce_dims, stream);
origin_reduce_dims);
}
} // namespace operators
......
......@@ -45,13 +45,8 @@ class MatrixReduceSumFunctor<T, GPUContext> {
out_reduce_dims.push_back(idx);
}
}
TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
dev_ctx,
in,
out,
kps::IdentityFunctor<T>(),
out_reduce_dims,
dev_ctx.stream());
ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
dev_ctx, in, out, kps::IdentityFunctor<T>(), out_reduce_dims);
}
};
......
......@@ -1087,12 +1087,12 @@ template <typename Tx,
typename Ty,
template <typename> class ReduceOp,
typename TransformOp>
void TensorReduceImpl(const phi::GPUContext& dev_ctx,
const phi::DenseTensor& x,
phi::DenseTensor* y,
const TransformOp& transform,
const std::vector<int>& origin_reduce_dims,
KPStream stream) {
void ReduceKernel(const phi::GPUContext& dev_ctx,
const phi::DenseTensor& x,
phi::DenseTensor* y,
const TransformOp& transform,
const std::vector<int>& origin_reduce_dims) {
auto stream = dev_ctx.stream();
dev_ctx.Alloc<Ty>(y);
auto x_dim = phi::vectorize<int>(x.dims());
......
......@@ -87,13 +87,12 @@ void BroadcastTensorsGradKernel(const Context& ctx,
*input_tensor, ctx.GetPlace(), ctx, output_tensor);
} else {
// reduce_sum implementation on CUDA
funcs::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx,
*input_tensor,
output_tensor,
kps::IdentityFunctor<T>(),
reduce_dims_vec,
ctx.stream());
reduce_dims_vec);
}
}
}
......
......@@ -80,8 +80,8 @@ inline void CompareAllKernelImpl(const Context& ctx,
for (int i = 0; i < reduce_dims.size(); ++i) {
reduce_dims[i] = i;
}
funcs::TensorReduceImpl<bool, bool, BitwiseAdd, kps::IdentityFunctor<bool>>(
ctx, tmp, out, kps::IdentityFunctor<bool>(), reduce_dims, ctx.stream());
funcs::ReduceKernel<bool, bool, BitwiseAdd, kps::IdentityFunctor<bool>>(
ctx, tmp, out, kps::IdentityFunctor<bool>(), reduce_dims);
}
} // namespace phi
......
......@@ -29,13 +29,8 @@ void ReduceWrapper(const GPUContext &dev_ctx,
DenseTensor *dst) {
std::vector<int> reduce_dims =
funcs::GetReduceDim(dst->dims(), src->dims(), axis);
funcs::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
dev_ctx,
*src,
dst,
kps::IdentityFunctor<T>(),
reduce_dims,
dev_ctx.stream());
funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
dev_ctx, *src, dst, kps::IdentityFunctor<T>(), reduce_dims);
}
template <ElementwiseType ET, typename T, typename Functor>
......@@ -172,9 +167,8 @@ void DefaultElementwiseAddGrad(const GPUContext &ctx,
}
std::vector<int> reduce_dims =
funcs::GetReduceDim(x.dims(), out.dims(), axis);
gpuStream_t stream = ctx.stream();
funcs::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx, dout, dx, kps::IdentityFunctor<T>(), reduce_dims, stream);
funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx, dout, dx, kps::IdentityFunctor<T>(), reduce_dims);
}
}
// dy
......@@ -187,9 +181,8 @@ void DefaultElementwiseAddGrad(const GPUContext &ctx,
} else {
std::vector<int> reduce_dims =
funcs::GetReduceDim(y.dims(), out.dims(), axis);
gpuStream_t stream = ctx.stream();
funcs::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx, dout, dy, kps::IdentityFunctor<T>(), reduce_dims, stream);
funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx, dout, dy, kps::IdentityFunctor<T>(), reduce_dims);
}
}
}
......@@ -285,9 +278,8 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
}
std::vector<int> reduce_dims =
funcs::GetReduceDim(x.dims(), out.dims(), axis);
gpuStream_t stream = ctx.stream();
funcs::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx, dout, dx, kps::IdentityFunctor<T>(), reduce_dims, stream);
funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx, dout, dx, kps::IdentityFunctor<T>(), reduce_dims);
}
}
// dy
......@@ -306,9 +298,8 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
} else {
std::vector<int> reduce_dims =
funcs::GetReduceDim(y.dims(), out.dims(), axis);
gpuStream_t stream = ctx.stream();
funcs::TensorReduceImpl<T, T, kps::AddFunctor, kps::InverseFunctor<T>>(
ctx, dout, dy, kps::InverseFunctor<T>(), reduce_dims, stream);
funcs::ReduceKernel<T, T, kps::AddFunctor, kps::InverseFunctor<T>>(
ctx, dout, dy, kps::InverseFunctor<T>(), reduce_dims);
}
}
}
......
......@@ -39,8 +39,6 @@ void Reduce(const KPDevice& dev_ctx,
reduce_num *= (x.dims())[i];
}
KPStream stream = dev_ctx.stream();
if (out_dtype != phi::DataType::UNDEFINED && out_dtype != x.dtype()) {
auto tmp_tensor = phi::Cast<T>(dev_ctx, x, out_dtype);
PD_VISIT_BOOL_AND_FLOATING_AND_COMPLEX_AND_3_TYPES(
......@@ -48,29 +46,23 @@ void Reduce(const KPDevice& dev_ctx,
phi::DataType::INT64,
phi::DataType::FLOAT16,
out_dtype,
"TensorReduceImpl",
"ReduceKernel",
([&] {
using MPType = typename kps::details::MPTypeTrait<data_t>::Type;
phi::funcs::TensorReduceImpl<data_t,
data_t,
ReduceOp,
TransformOp<data_t, MPType>>(
phi::funcs::ReduceKernel<data_t,
data_t,
ReduceOp,
TransformOp<data_t, MPType>>(
dev_ctx,
tmp_tensor,
out,
TransformOp<data_t, MPType>(reduce_num),
reduce_dims,
stream);
reduce_dims);
}));
} else {
using MPType = typename kps::details::MPTypeTrait<T>::Type;
phi::funcs::TensorReduceImpl<T, T, ReduceOp, TransformOp<T, MPType>>(
dev_ctx,
x,
out,
TransformOp<T, MPType>(reduce_num),
reduce_dims,
stream);
phi::funcs::ReduceKernel<T, T, ReduceOp, TransformOp<T, MPType>>(
dev_ctx, x, out, TransformOp<T, MPType>(reduce_num), reduce_dims);
}
}
} // namespace phi
......
......@@ -69,17 +69,12 @@ void SigmoidCrossEntropyWithLogitsGradKernel(const Context &dev_ctx,
dev_ctx.template Alloc<T>(counts_tensor);
counts_tensor->Resize(in_grad->dims());
int limit = in_grad->numel();
int blocks = NumBlocks(limit);
int threads = kNumCUDAThreads;
std::vector<const DenseTensor *> ins = {&x, &label, &out_grad};
std::vector<DenseTensor *> outs = {in_grad, counts_tensor};
auto functor = SigmoidBwdFunctor<T>(ignore_index);
constexpr int Size = 2;
phi::funcs::ElementwiseKernel<T, decltype(functor), Size>(
phi::funcs::ElementwiseKernel<T, decltype(functor), 2>(
dev_ctx, ins, &outs, functor);
if (normalize) {
T *counts = dev_ctx.template Alloc<T>(counts_tensor);
DenseTensor *norm_tensor = new DenseTensor();
norm_tensor->Resize({sizeof(T)});
dev_ctx.template Alloc<T>(norm_tensor);
......@@ -89,13 +84,8 @@ void SigmoidCrossEntropyWithLogitsGradKernel(const Context &dev_ctx,
reduce_dim.push_back(i);
}
funcs::TensorReduceImpl<T, T, kps::AddFunctor, NonzeroFunctor<T>>(
dev_ctx,
*counts_tensor,
norm_tensor,
NonzeroFunctor<T>(),
reduce_dim,
dev_ctx.stream());
funcs::ReduceKernel<T, T, kps::AddFunctor, NonzeroFunctor<T>>(
dev_ctx, *counts_tensor, norm_tensor, NonzeroFunctor<T>(), reduce_dim);
T *norm = dev_ctx.template Alloc<T>(norm_tensor);
auto norm_cpu_mem = paddle::memory::Alloc(phi::CPUPlace(), sizeof(T));
T *norm_cpu_ptr = reinterpret_cast<T *>(norm_cpu_mem->ptr());
......@@ -114,6 +104,7 @@ void SigmoidCrossEntropyWithLogitsGradKernel(const Context &dev_ctx,
phi::funcs::ElementwiseKernel<T>(dev_ctx, div_ins, &div_outs, div_functor);
delete norm_tensor;
}
delete counts_tensor;
}
} // namespace phi
......
......@@ -69,17 +69,12 @@ void SigmoidCrossEntropyWithLogitsKernel(const Context &dev_ctx,
dev_ctx.template Alloc<T>(counts_tensor);
counts_tensor->Resize(out->dims());
int limit = out->numel();
int blocks = NumBlocks(limit);
int threads = kNumCUDAThreads;
std::vector<const DenseTensor *> ins = {&x, &label};
std::vector<DenseTensor *> outs = {out, counts_tensor};
auto functor = SigmoidFwdFunctor<T>(ignore_index);
constexpr int Size = 2;
phi::funcs::ElementwiseKernel<T, decltype(functor), Size>(
phi::funcs::ElementwiseKernel<T, decltype(functor), 2>(
dev_ctx, ins, &outs, functor);
if (normalize) {
T *counts = dev_ctx.template Alloc<T>(counts_tensor);
DenseTensor *norm_tensor = new DenseTensor();
norm_tensor->Resize({sizeof(T)});
dev_ctx.template Alloc<T>(norm_tensor);
......@@ -89,13 +84,8 @@ void SigmoidCrossEntropyWithLogitsKernel(const Context &dev_ctx,
reduce_dim.push_back(i);
}
funcs::TensorReduceImpl<T, T, kps::AddFunctor, NonzeroFunctor<T>>(
dev_ctx,
*counts_tensor,
norm_tensor,
NonzeroFunctor<T>(),
reduce_dim,
dev_ctx.stream());
funcs::ReduceKernel<T, T, kps::AddFunctor, NonzeroFunctor<T>>(
dev_ctx, *counts_tensor, norm_tensor, NonzeroFunctor<T>(), reduce_dim);
T *norm = dev_ctx.template Alloc<T>(norm_tensor);
auto norm_cpu_mem = paddle::memory::Alloc(phi::CPUPlace(), sizeof(T));
T *norm_cpu_ptr = reinterpret_cast<T *>(norm_cpu_mem->ptr());
......@@ -114,8 +104,8 @@ void SigmoidCrossEntropyWithLogitsKernel(const Context &dev_ctx,
phi::funcs::ElementwiseKernel<T>(dev_ctx, div_ins, &div_outs, div_functor);
delete norm_tensor;
delete counts_tensor;
}
delete counts_tensor;
}
} // namespace phi
......
......@@ -31,11 +31,10 @@ void TraceKernel(const Context& ctx,
T* out_data = ctx.template Alloc<T>(out);
auto diag = funcs::Diagonal<T, Context>(ctx, &x, offset, axis1, axis2);
if (diag.numel() > 0) {
auto stream = ctx.stream();
std::vector<int> reduce_dims;
reduce_dims.push_back(out->dims().size());
funcs::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx, diag, out, kps::IdentityFunctor<T>(), reduce_dims, stream);
funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx, diag, out, kps::IdentityFunctor<T>(), reduce_dims);
} else {
phi::funcs::SetConstant<Context, T> functor;
functor(ctx, out, static_cast<T>(0));
......
......@@ -59,9 +59,8 @@ struct ReduceSumForMatmulGrad<GPUContext, T> {
const DenseTensor& input,
DenseTensor* output,
const std::vector<int>& reduce_dims) {
auto stream = dev_ctx.stream();
funcs::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
dev_ctx, input, output, kps::IdentityFunctor<T>(), reduce_dims, stream);
funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
dev_ctx, input, output, kps::IdentityFunctor<T>(), reduce_dims);
}
};
#endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册