未验证 提交 e2e2d531 编写于 作者: C chentianyu03 提交者: GitHub

[phi]move reduce gpu impl funcs into pten/kernels/funcs (#39990)

* move reduce gpu impl funcs into pten/kernels/funcs

* change reduce header name and namespace

* fix spell word error

* change mutable_data to dev_ctx.Alloc

* modify place to devcontex

* format code style

* fix build error

* fix build error

* fix conflict
上级 3ac9bc95
...@@ -191,9 +191,9 @@ void SetConfigForColumnReduce(const int max_threads, const int reduce_num, ...@@ -191,9 +191,9 @@ void SetConfigForColumnReduce(const int max_threads, const int reduce_num,
int num_block = (max_threads / left_num); int num_block = (max_threads / left_num);
if (num_block > 1 && reduce_num >= REDUCE_SPLIT_BOUNDARY) { if (num_block > 1 && reduce_num >= REDUCE_SPLIT_BOUNDARY) {
*blocking_size = phi::kernels::details::GetLastPow2(reduce_num / num_block); *blocking_size = phi::funcs::details::GetLastPow2(reduce_num / num_block);
if (*blocking_size <= 1) { if (*blocking_size <= 1) {
*blocking_size = phi::kernels::details::GetLastPow2(sqrt(reduce_num)); *blocking_size = phi::funcs::details::GetLastPow2(sqrt(reduce_num));
} else if (*blocking_size * 2 < reduce_num) { } else if (*blocking_size * 2 < reduce_num) {
*blocking_size *= 2; *blocking_size *= 2;
} }
......
...@@ -39,9 +39,9 @@ TEST(test_reduce_rank_check, all) { ...@@ -39,9 +39,9 @@ TEST(test_reduce_rank_check, all) {
} }
if (is_valid) { if (is_valid) {
phi::kernels::details::CheckReduceRank(reduce_rank, rank); phi::funcs::details::CheckReduceRank(reduce_rank, rank);
} else { } else {
ASSERT_THROW(phi::kernels::details::CheckReduceRank(reduce_rank, rank), ASSERT_THROW(phi::funcs::details::CheckReduceRank(reduce_rank, rank),
paddle::platform::EnforceNotMet); paddle::platform::EnforceNotMet);
} }
} }
......
...@@ -23,8 +23,7 @@ ...@@ -23,8 +23,7 @@
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/gpu/reduce.h" #include "paddle/phi/kernels/funcs/reduce_function.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -37,7 +36,7 @@ void TensorReduceImpl(const platform::CUDADeviceContext& dev_ctx, ...@@ -37,7 +36,7 @@ void TensorReduceImpl(const platform::CUDADeviceContext& dev_ctx,
gpuStream_t stream) { gpuStream_t stream) {
y->mutable_data<Ty>(x.place()); y->mutable_data<Ty>(x.place());
phi::kernels::TensorReduceImpl<Tx, Ty, ReduceOp, TransformOp>( phi::funcs::TensorReduceImpl<Tx, Ty, ReduceOp, TransformOp>(
static_cast<const phi::GPUContext&>(dev_ctx), x, y, transform, static_cast<const phi::GPUContext&>(dev_ctx), x, y, transform,
origin_reduce_dims, stream); origin_reduce_dims, stream);
} }
......
此差异已折叠。
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/gpu/reduce.h" #include "paddle/phi/kernels/funcs/reduce_function.h"
#include "paddle/phi/kernels/primitive/functor_primitives.h" #include "paddle/phi/kernels/primitive/functor_primitives.h"
namespace phi { namespace phi {
...@@ -87,7 +87,7 @@ void BroadcastTensorsGradKernel(const Context& ctx, ...@@ -87,7 +87,7 @@ void BroadcastTensorsGradKernel(const Context& ctx,
*input_tensor, ctx.GetPlace(), ctx, output_tensor); *input_tensor, ctx.GetPlace(), ctx, output_tensor);
} else { } else {
// reduce_sum implementation on CUDA // reduce_sum implementation on CUDA
kernels::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>( funcs::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx, ctx,
*input_tensor, *input_tensor,
output_tensor, output_tensor,
......
...@@ -80,7 +80,7 @@ inline void CompareAllKernelImpl(const Context& ctx, ...@@ -80,7 +80,7 @@ inline void CompareAllKernelImpl(const Context& ctx,
for (int i = 0; i < reduce_dims.size(); ++i) { for (int i = 0; i < reduce_dims.size(); ++i) {
reduce_dims[i] = i; reduce_dims[i] = i;
} }
kernels::TensorReduceImpl<bool, bool, BitwiseAdd, kps::IdentityFunctor<bool>>( funcs::TensorReduceImpl<bool, bool, BitwiseAdd, kps::IdentityFunctor<bool>>(
ctx, tmp, out, kps::IdentityFunctor<bool>(), reduce_dims, ctx.stream()); ctx, tmp, out, kps::IdentityFunctor<bool>(), reduce_dims, ctx.stream());
} }
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/funcs/elementwise_grad_base.h" #include "paddle/phi/kernels/funcs/elementwise_grad_base.h"
#include "paddle/phi/kernels/gpu/reduce.h" #include "paddle/phi/kernels/funcs/reduce_function.h"
namespace phi { namespace phi {
...@@ -84,7 +84,7 @@ void DefaultElementwiseAddGrad(const GPUContext &ctx, ...@@ -84,7 +84,7 @@ void DefaultElementwiseAddGrad(const GPUContext &ctx,
std::vector<int> reduce_dims = std::vector<int> reduce_dims =
funcs::GetReduceDim(x.dims(), out.dims(), axis); funcs::GetReduceDim(x.dims(), out.dims(), axis);
gpuStream_t stream = ctx.stream(); gpuStream_t stream = ctx.stream();
kernels::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>( funcs::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx, dout, dx, kps::IdentityFunctor<T>(), reduce_dims, stream); ctx, dout, dx, kps::IdentityFunctor<T>(), reduce_dims, stream);
} }
} }
...@@ -99,7 +99,7 @@ void DefaultElementwiseAddGrad(const GPUContext &ctx, ...@@ -99,7 +99,7 @@ void DefaultElementwiseAddGrad(const GPUContext &ctx,
std::vector<int> reduce_dims = std::vector<int> reduce_dims =
funcs::GetReduceDim(y.dims(), out.dims(), axis); funcs::GetReduceDim(y.dims(), out.dims(), axis);
gpuStream_t stream = ctx.stream(); gpuStream_t stream = ctx.stream();
kernels::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>( funcs::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx, dout, dy, kps::IdentityFunctor<T>(), reduce_dims, stream); ctx, dout, dy, kps::IdentityFunctor<T>(), reduce_dims, stream);
} }
} }
...@@ -197,7 +197,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx, ...@@ -197,7 +197,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
std::vector<int> reduce_dims = std::vector<int> reduce_dims =
funcs::GetReduceDim(x.dims(), out.dims(), axis); funcs::GetReduceDim(x.dims(), out.dims(), axis);
gpuStream_t stream = ctx.stream(); gpuStream_t stream = ctx.stream();
kernels::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>( funcs::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx, dout, dx, kps::IdentityFunctor<T>(), reduce_dims, stream); ctx, dout, dx, kps::IdentityFunctor<T>(), reduce_dims, stream);
} }
} }
...@@ -218,7 +218,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx, ...@@ -218,7 +218,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
std::vector<int> reduce_dims = std::vector<int> reduce_dims =
funcs::GetReduceDim(y.dims(), out.dims(), axis); funcs::GetReduceDim(y.dims(), out.dims(), axis);
gpuStream_t stream = ctx.stream(); gpuStream_t stream = ctx.stream();
kernels::TensorReduceImpl<T, T, kps::AddFunctor, kps::InverseFunctor<T>>( funcs::TensorReduceImpl<T, T, kps::AddFunctor, kps::InverseFunctor<T>>(
ctx, dout, dy, kps::InverseFunctor<T>(), reduce_dims, stream); ctx, dout, dy, kps::InverseFunctor<T>(), reduce_dims, stream);
} }
} }
......
此差异已折叠。
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/diagonal.h" #include "paddle/phi/kernels/funcs/diagonal.h"
#include "paddle/phi/kernels/gpu/reduce.h" #include "paddle/phi/kernels/funcs/reduce_function.h"
namespace phi { namespace phi {
...@@ -34,7 +34,7 @@ void TraceKernel(const Context& ctx, ...@@ -34,7 +34,7 @@ void TraceKernel(const Context& ctx,
auto stream = ctx.stream(); auto stream = ctx.stream();
std::vector<int> reduce_dims; std::vector<int> reduce_dims;
reduce_dims.push_back(out->dims().size()); reduce_dims.push_back(out->dims().size());
kernels::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>( funcs::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
ctx, diag, out, kps::IdentityFunctor<T>(), reduce_dims, stream); ctx, diag, out, kps::IdentityFunctor<T>(), reduce_dims, stream);
} else { } else {
phi::funcs::SetConstant<Context, T> functor; phi::funcs::SetConstant<Context, T> functor;
......
...@@ -60,7 +60,7 @@ struct ReduceSumForMatmulGrad<GPUContext, T> { ...@@ -60,7 +60,7 @@ struct ReduceSumForMatmulGrad<GPUContext, T> {
DenseTensor* output, DenseTensor* output,
const std::vector<int>& reduce_dims) { const std::vector<int>& reduce_dims) {
auto stream = dev_ctx.stream(); auto stream = dev_ctx.stream();
kernels::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>( funcs::TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
dev_ctx, input, output, kps::IdentityFunctor<T>(), reduce_dims, stream); dev_ctx, input, output, kps::IdentityFunctor<T>(), reduce_dims, stream);
} }
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册