From 40a9b48877a5b6dbfffb88a3bccd69f37b05fbb8 Mon Sep 17 00:00:00 2001 From: PuQing Date: Thu, 10 Nov 2022 17:24:37 +0800 Subject: [PATCH] [PHI decoupling] remove "paddle/fluid/platform/device/gpu/gpu_launch_config.h" in phi (#47808) * rm fluid gpu_launch_config * fix type --- paddle/phi/kernels/funcs/elementwise_grad_base.h | 16 ++++++++-------- paddle/phi/kernels/funcs/gather.cu.h | 5 ++--- paddle/phi/kernels/funcs/reduce_function.h | 4 ++-- paddle/phi/kernels/funcs/scatter.cu.h | 8 ++++---- paddle/phi/kernels/gpu/histogram_kernel.cu | 2 +- paddle/phi/kernels/gpu/index_add_grad_kernel.cu | 4 ++-- paddle/phi/kernels/gpu/index_add_kernel.cu | 4 ++-- .../phi/kernels/gpu/index_sample_grad_kernel.cu | 8 ++++---- paddle/phi/kernels/gpu/index_sample_kernel.cu | 8 ++++---- .../phi/kernels/gpu/index_select_grad_kernel.cu | 4 ++-- paddle/phi/kernels/gpu/index_select_impl.h | 2 +- paddle/phi/kernels/gpu/index_select_kernel.cu | 4 ++-- paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu | 2 +- paddle/phi/kernels/gpu/nanmedian_kernel.cu | 2 +- paddle/phi/kernels/gpu/top_k_kernel.cu | 8 ++++---- 15 files changed, 40 insertions(+), 41 deletions(-) diff --git a/paddle/phi/kernels/funcs/elementwise_grad_base.h b/paddle/phi/kernels/funcs/elementwise_grad_base.h index e52c669c48..f8007319d6 100644 --- a/paddle/phi/kernels/funcs/elementwise_grad_base.h +++ b/paddle/phi/kernels/funcs/elementwise_grad_base.h @@ -25,7 +25,7 @@ limitations under the License. */ // See Note [ Why still include the fluid headers? ] #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/kernels/primitive/kernel_primitives.h" #endif @@ -982,7 +982,7 @@ static void ElemwiseGradBroadcast1CUDA(gpuStream_t stream, auto gplace = phi::GPUPlace(phi::backends::gpu::GetCurrentDeviceId()); auto *ctx = static_cast( paddle::platform::DeviceContextPool::Instance().Get(gplace)); - paddle::platform::LimitGridDim(*ctx, &grid_size); + phi::backends::gpu::LimitGridDim(*ctx, &grid_size); FastElemwiseGradBroadcast1CUDAKernel<<>>( x, y, out, dout, h, w, is_xsize_larger, dx_op, dy_op, dx, dy); } @@ -1007,7 +1007,7 @@ static void ElemwiseGradBroadcast2CUDA(gpuStream_t stream, auto gplace = phi::GPUPlace(phi::backends::gpu::GetCurrentDeviceId()); auto *ctx = static_cast( paddle::platform::DeviceContextPool::Instance().Get(gplace)); - paddle::platform::LimitGridDim(*ctx, &grid_size); + phi::backends::gpu::LimitGridDim(*ctx, &grid_size); ElemwiseGradBroadcast2CUDAKernel<<>>( x, y, out, dout, pre, n, post, is_xsize_larger, dx_op, dy_op, dx, dy); } @@ -1210,7 +1210,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x, } else { dim3 block_size = dim3(BLOCK_X, BLOCK_Y); dim3 grid_size = dim3((w + BLOCK_X - 1) / BLOCK_X); - paddle::platform::LimitGridDim(ctx, &grid_size); + phi::backends::gpu::LimitGridDim(ctx, &grid_size); FastCommonGradBroadcastCUDAKernelHeight<<>>( x_data, @@ -1387,7 +1387,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x, std::multiplies()); int block_size = std::min(ELEMWISE_MAX_BLOCK_DIM, mid); dim3 grid_size = dim3(pre * post); - paddle::platform::LimitGridDim(ctx, &grid_size); + phi::backends::gpu::LimitGridDim(ctx, &grid_size); // we need to calc y offset with blockid, so do x_pre/y_pre to get // left size. if (k_pre != pre) k_pre = pre / k_pre; @@ -1418,7 +1418,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x, std::multiplies()); int block_size = std::min(ELEMWISE_MAX_BLOCK_DIM, mid); dim3 grid_size = dim3(pre * post); - paddle::platform::LimitGridDim(ctx, &grid_size); + phi::backends::gpu::LimitGridDim(ctx, &grid_size); if (k_pre != pre) k_pre = pre / k_pre; FastCommonGradBroadcastOneCUDAKernel<<<<>>( p_src, p_index, p_output, index_size, slice_size); @@ -155,7 +154,7 @@ void GPUGatherNd(const phi::GPUContext& ctx, int block = 512; int64_t n = slice_size * remain_numel; dim3 grid = dim3((n + block - 1) / block); - paddle::platform::LimitGridDim(ctx, &grid); + phi::backends::gpu::LimitGridDim(ctx, &grid); GatherNdCUDAKernel<<>>(p_input, g_input_dims, diff --git a/paddle/phi/kernels/funcs/reduce_function.h b/paddle/phi/kernels/funcs/reduce_function.h index 9138fd85e6..9719fbd881 100644 --- a/paddle/phi/kernels/funcs/reduce_function.h +++ b/paddle/phi/kernels/funcs/reduce_function.h @@ -34,9 +34,9 @@ namespace cub = hipcub; #ifndef PADDLE_WITH_XPU_KP #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_info.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #endif #include "paddle/phi/kernels/cast_kernel.h" @@ -337,7 +337,7 @@ struct ReduceConfig { SetBlockDim(); #ifndef PADDLE_WITH_XPU_KP // step5: limit the grid to prevent thead overflow - paddle::platform::LimitGridDim(dev_ctx, &grid); + phi::backends::gpu::LimitGridDim(dev_ctx, &grid); #endif } diff --git a/paddle/phi/kernels/funcs/scatter.cu.h b/paddle/phi/kernels/funcs/scatter.cu.h index e10ae3951a..d42538edb7 100644 --- a/paddle/phi/kernels/funcs/scatter.cu.h +++ b/paddle/phi/kernels/funcs/scatter.cu.h @@ -16,8 +16,8 @@ limitations under the License. */ #include #include -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -158,7 +158,7 @@ void GPUScatterAssign(const phi::GPUContext& ctx, int block = 512; int64_t n = slice_size * index_size; dim3 grid = dim3((n + block - 1) / block); - paddle::platform::LimitGridDim(ctx, &grid); + phi::backends::gpu::LimitGridDim(ctx, &grid); // if not overwrite mode, init data if (!overwrite) { @@ -190,7 +190,7 @@ void GPUScatterGradForX(const phi::GPUContext& ctx, int64_t n = slice_size * index_size; int64_t height = (n + block - 1) / block; dim3 grid = dim3((n + block - 1) / block); - paddle::platform::LimitGridDim(ctx, &grid); + phi::backends::gpu::LimitGridDim(ctx, &grid); ScatterInitCUDAKernel<<>>( p_index, p_output, index_size, slice_size); @@ -231,7 +231,7 @@ void GPUScatterNdAdd(const phi::GPUContext& ctx, int block = 512; int64_t n = slice_size * remain_numel; dim3 grid = dim3((n + block - 1) / block); - paddle::platform::LimitGridDim(ctx, &grid); + phi::backends::gpu::LimitGridDim(ctx, &grid); ScatterNdCUDAKernel <<>>(p_update, diff --git a/paddle/phi/kernels/gpu/histogram_kernel.cu b/paddle/phi/kernels/gpu/histogram_kernel.cu index cd89bf98a3..4cc6bc3557 100644 --- a/paddle/phi/kernels/gpu/histogram_kernel.cu +++ b/paddle/phi/kernels/gpu/histogram_kernel.cu @@ -14,9 +14,9 @@ #include "paddle/phi/kernels/histogram_kernel.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" diff --git a/paddle/phi/kernels/gpu/index_add_grad_kernel.cu b/paddle/phi/kernels/gpu/index_add_grad_kernel.cu index c868843925..ddc8a65ad5 100644 --- a/paddle/phi/kernels/gpu/index_add_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/index_add_grad_kernel.cu @@ -14,9 +14,9 @@ #include "paddle/phi/kernels/index_add_grad_kernel.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_info.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -71,7 +71,7 @@ void IndexAddGradKernel(const Context& ctx, // get add_value_grad: index_select(out_grad, index, axis) unsigned int block_dim = PADDLE_CUDA_NUM_THREADS; dim3 grid_dim = dim3((numel + block_dim - 1) / block_dim); - paddle::platform::LimitGridDim(ctx, &grid_dim); + phi::backends::gpu::LimitGridDim(ctx, &grid_dim); if (index_type == phi::DataType::INT64) { const int64_t* index_data = index.data(); diff --git a/paddle/phi/kernels/gpu/index_add_kernel.cu b/paddle/phi/kernels/gpu/index_add_kernel.cu index ff8fb17020..047e54b99a 100644 --- a/paddle/phi/kernels/gpu/index_add_kernel.cu +++ b/paddle/phi/kernels/gpu/index_add_kernel.cu @@ -14,9 +14,9 @@ #include "paddle/phi/kernels/index_add_kernel.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_info.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/utils/data_type.h" @@ -75,7 +75,7 @@ void IndexAddKernel(const Context& ctx, unsigned int block_dim = PADDLE_CUDA_NUM_THREADS; dim3 grid_dim = dim3((numel + block_dim - 1) / block_dim); - paddle::platform::LimitGridDim(ctx, &grid_dim); + phi::backends::gpu::LimitGridDim(ctx, &grid_dim); // copy input to output. // todo(@limin29): inplace do not need copy. diff --git a/paddle/phi/kernels/gpu/index_sample_grad_kernel.cu b/paddle/phi/kernels/gpu/index_sample_grad_kernel.cu index d2671dff7b..5368d98c56 100644 --- a/paddle/phi/kernels/gpu/index_sample_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/index_sample_grad_kernel.cu @@ -18,9 +18,9 @@ #include #include "paddle/fluid/framework/convert_utils.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -92,16 +92,16 @@ void IndexSampleGradKernel(const Context& ctx, size_t index_length = index_dim[1]; bool same_data_in_index_row = index_length == 1 ? false : true; - auto block_width = paddle::platform::RoundToPowerOfTwo(index_length); + auto block_width = phi::backends::gpu::RoundToPowerOfTwo(index_length); block_width = MIN(block_width, PREDEFINED_BLOCK_SIZE_X); auto block_height = - paddle::platform::RoundToPowerOfTwo(index_length * batch_size) / + phi::backends::gpu::RoundToPowerOfTwo(index_length * batch_size) / block_width; block_height = MIN(block_height, PREDEFINED_BLOCK_SIZE / block_width); dim3 block_dim(block_width, block_height); dim3 grid_dim((index_length + block_dim.x - 1) / block_dim.x, (batch_size + block_dim.y - 1) / block_dim.y); - paddle::platform::LimitGridDim(ctx, &grid_dim); + phi::backends::gpu::LimitGridDim(ctx, &grid_dim); phi::funcs::SetConstant set_zero; set_zero(ctx, x_grad, static_cast(0)); diff --git a/paddle/phi/kernels/gpu/index_sample_kernel.cu b/paddle/phi/kernels/gpu/index_sample_kernel.cu index 9b95d761fc..5e6bd8701a 100644 --- a/paddle/phi/kernels/gpu/index_sample_kernel.cu +++ b/paddle/phi/kernels/gpu/index_sample_kernel.cu @@ -18,8 +18,8 @@ #include #include "paddle/fluid/framework/convert_utils.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -80,16 +80,16 @@ void IndexSampleKernel(const Context& ctx, size_t input_length = input_dim[1]; size_t index_length = index_dim[1]; - auto block_width = paddle::platform::RoundToPowerOfTwo(index_length); + auto block_width = phi::backends::gpu::RoundToPowerOfTwo(index_length); block_width = MIN(block_width, PREDEFINED_BLOCK_SIZE_X); int block_height = - paddle::platform::RoundToPowerOfTwo(index_length * batch_size) / + phi::backends::gpu::RoundToPowerOfTwo(index_length * batch_size) / block_width; block_height = MIN(block_height, PREDEFINED_BLOCK_SIZE / block_width); dim3 block_dim(block_width, block_height); dim3 grid_dim((index_length + block_dim.x - 1) / block_dim.x, (batch_size + block_dim.y - 1) / block_dim.y); - paddle::platform::LimitGridDim(ctx, &grid_dim); + phi::backends::gpu::LimitGridDim(ctx, &grid_dim); if (index_type == DataType::INT64) { const int64_t* index_data = index.data(); diff --git a/paddle/phi/kernels/gpu/index_select_grad_kernel.cu b/paddle/phi/kernels/gpu/index_select_grad_kernel.cu index 8561744e87..fb9157db55 100644 --- a/paddle/phi/kernels/gpu/index_select_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/index_select_grad_kernel.cu @@ -14,9 +14,9 @@ #include "paddle/phi/kernels/index_select_grad_kernel.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_info.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -87,7 +87,7 @@ void IndexSelectGradKernel(const Context& ctx, unsigned int block_dim = PADDLE_CUDA_NUM_THREADS; dim3 grid_dim = dim3((numel + block_dim - 1) / block_dim); - paddle::platform::LimitGridDim(ctx, &grid_dim); + phi::backends::gpu::LimitGridDim(ctx, &grid_dim); phi::funcs::SetConstant index_select_grad_init; index_select_grad_init(ctx, x_grad, static_cast(0)); diff --git a/paddle/phi/kernels/gpu/index_select_impl.h b/paddle/phi/kernels/gpu/index_select_impl.h index fc631b6515..da9cdbf527 100644 --- a/paddle/phi/kernels/gpu/index_select_impl.h +++ b/paddle/phi/kernels/gpu/index_select_impl.h @@ -14,9 +14,9 @@ #pragma once -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_info.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/utils/data_type.h" diff --git a/paddle/phi/kernels/gpu/index_select_kernel.cu b/paddle/phi/kernels/gpu/index_select_kernel.cu index e9228b54ed..135ae52651 100644 --- a/paddle/phi/kernels/gpu/index_select_kernel.cu +++ b/paddle/phi/kernels/gpu/index_select_kernel.cu @@ -14,9 +14,9 @@ #include "paddle/phi/kernels/index_select_kernel.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_info.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/gpu/index_select_impl.h" @@ -62,7 +62,7 @@ void IndexSelectKernel(const Context& ctx, unsigned int block_dim = PADDLE_CUDA_NUM_THREADS; dim3 grid_dim = dim3((numel + block_dim - 1) / block_dim); - paddle::platform::LimitGridDim(ctx, &grid_dim); + phi::backends::gpu::LimitGridDim(ctx, &grid_dim); if (index_type == phi::DataType::INT64) { const int64_t* index_data = index.data(); diff --git a/paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu b/paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu index b633311048..30fd93533e 100644 --- a/paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu @@ -14,9 +14,9 @@ #include "paddle/phi/kernels/nanmedian_grad_kernel.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_meta.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/gpu/nanmedian_kernel.cu b/paddle/phi/kernels/gpu/nanmedian_kernel.cu index f61e413c98..132b9fa10b 100644 --- a/paddle/phi/kernels/gpu/nanmedian_kernel.cu +++ b/paddle/phi/kernels/gpu/nanmedian_kernel.cu @@ -15,9 +15,9 @@ #include "paddle/phi/kernels/nanmedian_kernel.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/impl/nanmedian_kernel_impl.h" diff --git a/paddle/phi/kernels/gpu/top_k_kernel.cu b/paddle/phi/kernels/gpu/top_k_kernel.cu index 9fc21b19a1..ecca63e52f 100644 --- a/paddle/phi/kernels/gpu/top_k_kernel.cu +++ b/paddle/phi/kernels/gpu/top_k_kernel.cu @@ -173,8 +173,8 @@ void TopkKernel(const Context& dev_ctx, // NOTE: old matrix implementation of stride is different to eigen. const int kMaxHeight = 2048; int gridx = input_height < kMaxHeight ? input_height : kMaxHeight; - paddle::platform::GpuLaunchConfig config = - paddle::platform::GetGpuLaunchConfig1D(dev_ctx, input_width); + auto config = + phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, input_width); switch (config.thread_per_block.x) { #ifdef PADDLE_WITH_HIP FIXED_BLOCK_DIM( @@ -282,8 +282,8 @@ void TopkKernel(const Context& dev_ctx, const int kMaxHeight = 2048; int gridx = input_height < kMaxHeight ? input_height : kMaxHeight; - paddle::platform::GpuLaunchConfig config = - paddle::platform::GetGpuLaunchConfig1D(dev_ctx, input_width); + auto config = + phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, input_width); switch (config.thread_per_block.x) { #ifdef PADDLE_WITH_HIP FIXED_BLOCK_DIM( -- GitLab