diff --git a/paddle/phi/kernels/funcs/elementwise_grad_base.h b/paddle/phi/kernels/funcs/elementwise_grad_base.h index e52c669c48d9897b8387cd086b6118e16bd6f8bf..f8007319d697ce12d6087993e41806760f1a5606 100644 --- a/paddle/phi/kernels/funcs/elementwise_grad_base.h +++ b/paddle/phi/kernels/funcs/elementwise_grad_base.h @@ -25,7 +25,7 @@ limitations under the License. */ // See Note [ Why still include the fluid headers? ] #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/kernels/primitive/kernel_primitives.h" #endif @@ -982,7 +982,7 @@ static void ElemwiseGradBroadcast1CUDA(gpuStream_t stream, auto gplace = phi::GPUPlace(phi::backends::gpu::GetCurrentDeviceId()); auto *ctx = static_cast( paddle::platform::DeviceContextPool::Instance().Get(gplace)); - paddle::platform::LimitGridDim(*ctx, &grid_size); + phi::backends::gpu::LimitGridDim(*ctx, &grid_size); FastElemwiseGradBroadcast1CUDAKernel<<>>( x, y, out, dout, h, w, is_xsize_larger, dx_op, dy_op, dx, dy); } @@ -1007,7 +1007,7 @@ static void ElemwiseGradBroadcast2CUDA(gpuStream_t stream, auto gplace = phi::GPUPlace(phi::backends::gpu::GetCurrentDeviceId()); auto *ctx = static_cast( paddle::platform::DeviceContextPool::Instance().Get(gplace)); - paddle::platform::LimitGridDim(*ctx, &grid_size); + phi::backends::gpu::LimitGridDim(*ctx, &grid_size); ElemwiseGradBroadcast2CUDAKernel<<>>( x, y, out, dout, pre, n, post, is_xsize_larger, dx_op, dy_op, dx, dy); } @@ -1210,7 +1210,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x, } else { dim3 block_size = dim3(BLOCK_X, BLOCK_Y); dim3 grid_size = dim3((w + BLOCK_X - 1) / BLOCK_X); - paddle::platform::LimitGridDim(ctx, &grid_size); + phi::backends::gpu::LimitGridDim(ctx, &grid_size); FastCommonGradBroadcastCUDAKernelHeight<<>>( x_data, @@ -1387,7 +1387,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x, std::multiplies()); int block_size = std::min(ELEMWISE_MAX_BLOCK_DIM, mid); dim3 grid_size = dim3(pre * post); - paddle::platform::LimitGridDim(ctx, &grid_size); + phi::backends::gpu::LimitGridDim(ctx, &grid_size); // we need to calc y offset with blockid, so do x_pre/y_pre to get // left size. if (k_pre != pre) k_pre = pre / k_pre; @@ -1418,7 +1418,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x, std::multiplies()); int block_size = std::min(ELEMWISE_MAX_BLOCK_DIM, mid); dim3 grid_size = dim3(pre * post); - paddle::platform::LimitGridDim(ctx, &grid_size); + phi::backends::gpu::LimitGridDim(ctx, &grid_size); if (k_pre != pre) k_pre = pre / k_pre; FastCommonGradBroadcastOneCUDAKernel<<<<>>( p_src, p_index, p_output, index_size, slice_size); @@ -155,7 +154,7 @@ void GPUGatherNd(const phi::GPUContext& ctx, int block = 512; int64_t n = slice_size * remain_numel; dim3 grid = dim3((n + block - 1) / block); - paddle::platform::LimitGridDim(ctx, &grid); + phi::backends::gpu::LimitGridDim(ctx, &grid); GatherNdCUDAKernel<<>>(p_input, g_input_dims, diff --git a/paddle/phi/kernels/funcs/reduce_function.h b/paddle/phi/kernels/funcs/reduce_function.h index 9138fd85e65aa4ad91d5847024a22c14adfe7465..9719fbd88160e1b94cb74ad9ec5b387612fd8433 100644 --- a/paddle/phi/kernels/funcs/reduce_function.h +++ b/paddle/phi/kernels/funcs/reduce_function.h @@ -34,9 +34,9 @@ namespace cub = hipcub; #ifndef PADDLE_WITH_XPU_KP #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_info.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #endif #include "paddle/phi/kernels/cast_kernel.h" @@ -337,7 +337,7 @@ struct ReduceConfig { SetBlockDim(); #ifndef PADDLE_WITH_XPU_KP // step5: limit the grid to prevent thead overflow - paddle::platform::LimitGridDim(dev_ctx, &grid); + phi::backends::gpu::LimitGridDim(dev_ctx, &grid); #endif } diff --git a/paddle/phi/kernels/funcs/scatter.cu.h b/paddle/phi/kernels/funcs/scatter.cu.h index e10ae3951ae2f0d3b98c1ceed9aa42438127c1e1..d42538edb7561e49a3ac4ce471252bb48f6b6929 100644 --- a/paddle/phi/kernels/funcs/scatter.cu.h +++ b/paddle/phi/kernels/funcs/scatter.cu.h @@ -16,8 +16,8 @@ limitations under the License. */ #include #include -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -158,7 +158,7 @@ void GPUScatterAssign(const phi::GPUContext& ctx, int block = 512; int64_t n = slice_size * index_size; dim3 grid = dim3((n + block - 1) / block); - paddle::platform::LimitGridDim(ctx, &grid); + phi::backends::gpu::LimitGridDim(ctx, &grid); // if not overwrite mode, init data if (!overwrite) { @@ -190,7 +190,7 @@ void GPUScatterGradForX(const phi::GPUContext& ctx, int64_t n = slice_size * index_size; int64_t height = (n + block - 1) / block; dim3 grid = dim3((n + block - 1) / block); - paddle::platform::LimitGridDim(ctx, &grid); + phi::backends::gpu::LimitGridDim(ctx, &grid); ScatterInitCUDAKernel<<>>( p_index, p_output, index_size, slice_size); @@ -231,7 +231,7 @@ void GPUScatterNdAdd(const phi::GPUContext& ctx, int block = 512; int64_t n = slice_size * remain_numel; dim3 grid = dim3((n + block - 1) / block); - paddle::platform::LimitGridDim(ctx, &grid); + phi::backends::gpu::LimitGridDim(ctx, &grid); ScatterNdCUDAKernel <<>>(p_update, diff --git a/paddle/phi/kernels/gpu/histogram_kernel.cu b/paddle/phi/kernels/gpu/histogram_kernel.cu index cd89bf98a305bba23563f83c3110e1a162611afd..4cc6bc35578d19c54821dc06924232342a78e1bb 100644 --- a/paddle/phi/kernels/gpu/histogram_kernel.cu +++ b/paddle/phi/kernels/gpu/histogram_kernel.cu @@ -14,9 +14,9 @@ #include "paddle/phi/kernels/histogram_kernel.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" diff --git a/paddle/phi/kernels/gpu/index_add_grad_kernel.cu b/paddle/phi/kernels/gpu/index_add_grad_kernel.cu index c868843925aa75118a95710edc4d687bc11155f8..ddc8a65ad51edd0874914c6d10c0031e234808a8 100644 --- a/paddle/phi/kernels/gpu/index_add_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/index_add_grad_kernel.cu @@ -14,9 +14,9 @@ #include "paddle/phi/kernels/index_add_grad_kernel.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_info.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -71,7 +71,7 @@ void IndexAddGradKernel(const Context& ctx, // get add_value_grad: index_select(out_grad, index, axis) unsigned int block_dim = PADDLE_CUDA_NUM_THREADS; dim3 grid_dim = dim3((numel + block_dim - 1) / block_dim); - paddle::platform::LimitGridDim(ctx, &grid_dim); + phi::backends::gpu::LimitGridDim(ctx, &grid_dim); if (index_type == phi::DataType::INT64) { const int64_t* index_data = index.data(); diff --git a/paddle/phi/kernels/gpu/index_add_kernel.cu b/paddle/phi/kernels/gpu/index_add_kernel.cu index ff8fb1702075cbad562130b93091c03b886ce411..047e54b99aa3b9b78516e1b35e6583b1a5184878 100644 --- a/paddle/phi/kernels/gpu/index_add_kernel.cu +++ b/paddle/phi/kernels/gpu/index_add_kernel.cu @@ -14,9 +14,9 @@ #include "paddle/phi/kernels/index_add_kernel.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_info.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/utils/data_type.h" @@ -75,7 +75,7 @@ void IndexAddKernel(const Context& ctx, unsigned int block_dim = PADDLE_CUDA_NUM_THREADS; dim3 grid_dim = dim3((numel + block_dim - 1) / block_dim); - paddle::platform::LimitGridDim(ctx, &grid_dim); + phi::backends::gpu::LimitGridDim(ctx, &grid_dim); // copy input to output. // todo(@limin29): inplace do not need copy. diff --git a/paddle/phi/kernels/gpu/index_sample_grad_kernel.cu b/paddle/phi/kernels/gpu/index_sample_grad_kernel.cu index d2671dff7b0184f50345bdeeadb59d018a3badf0..5368d98c56a95edd95aee0a308bd924fe4f64cd7 100644 --- a/paddle/phi/kernels/gpu/index_sample_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/index_sample_grad_kernel.cu @@ -18,9 +18,9 @@ #include #include "paddle/fluid/framework/convert_utils.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -92,16 +92,16 @@ void IndexSampleGradKernel(const Context& ctx, size_t index_length = index_dim[1]; bool same_data_in_index_row = index_length == 1 ? false : true; - auto block_width = paddle::platform::RoundToPowerOfTwo(index_length); + auto block_width = phi::backends::gpu::RoundToPowerOfTwo(index_length); block_width = MIN(block_width, PREDEFINED_BLOCK_SIZE_X); auto block_height = - paddle::platform::RoundToPowerOfTwo(index_length * batch_size) / + phi::backends::gpu::RoundToPowerOfTwo(index_length * batch_size) / block_width; block_height = MIN(block_height, PREDEFINED_BLOCK_SIZE / block_width); dim3 block_dim(block_width, block_height); dim3 grid_dim((index_length + block_dim.x - 1) / block_dim.x, (batch_size + block_dim.y - 1) / block_dim.y); - paddle::platform::LimitGridDim(ctx, &grid_dim); + phi::backends::gpu::LimitGridDim(ctx, &grid_dim); phi::funcs::SetConstant set_zero; set_zero(ctx, x_grad, static_cast(0)); diff --git a/paddle/phi/kernels/gpu/index_sample_kernel.cu b/paddle/phi/kernels/gpu/index_sample_kernel.cu index 9b95d761fcbad475d142553741ffd658f0688884..5e6bd8701a9dc7b39f6c0266e53ea49ef0de45ec 100644 --- a/paddle/phi/kernels/gpu/index_sample_kernel.cu +++ b/paddle/phi/kernels/gpu/index_sample_kernel.cu @@ -18,8 +18,8 @@ #include #include "paddle/fluid/framework/convert_utils.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -80,16 +80,16 @@ void IndexSampleKernel(const Context& ctx, size_t input_length = input_dim[1]; size_t index_length = index_dim[1]; - auto block_width = paddle::platform::RoundToPowerOfTwo(index_length); + auto block_width = phi::backends::gpu::RoundToPowerOfTwo(index_length); block_width = MIN(block_width, PREDEFINED_BLOCK_SIZE_X); int block_height = - paddle::platform::RoundToPowerOfTwo(index_length * batch_size) / + phi::backends::gpu::RoundToPowerOfTwo(index_length * batch_size) / block_width; block_height = MIN(block_height, PREDEFINED_BLOCK_SIZE / block_width); dim3 block_dim(block_width, block_height); dim3 grid_dim((index_length + block_dim.x - 1) / block_dim.x, (batch_size + block_dim.y - 1) / block_dim.y); - paddle::platform::LimitGridDim(ctx, &grid_dim); + phi::backends::gpu::LimitGridDim(ctx, &grid_dim); if (index_type == DataType::INT64) { const int64_t* index_data = index.data(); diff --git a/paddle/phi/kernels/gpu/index_select_grad_kernel.cu b/paddle/phi/kernels/gpu/index_select_grad_kernel.cu index 8561744e87b1f85312867447c5a9672280744d6b..fb9157db557e65062afd27d2da562ab232529585 100644 --- a/paddle/phi/kernels/gpu/index_select_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/index_select_grad_kernel.cu @@ -14,9 +14,9 @@ #include "paddle/phi/kernels/index_select_grad_kernel.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_info.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -87,7 +87,7 @@ void IndexSelectGradKernel(const Context& ctx, unsigned int block_dim = PADDLE_CUDA_NUM_THREADS; dim3 grid_dim = dim3((numel + block_dim - 1) / block_dim); - paddle::platform::LimitGridDim(ctx, &grid_dim); + phi::backends::gpu::LimitGridDim(ctx, &grid_dim); phi::funcs::SetConstant index_select_grad_init; index_select_grad_init(ctx, x_grad, static_cast(0)); diff --git a/paddle/phi/kernels/gpu/index_select_impl.h b/paddle/phi/kernels/gpu/index_select_impl.h index fc631b651540fb1f002c533da8e5b142cdc30ddc..da9cdbf52783b2822f167c264f7be3bd48741847 100644 --- a/paddle/phi/kernels/gpu/index_select_impl.h +++ b/paddle/phi/kernels/gpu/index_select_impl.h @@ -14,9 +14,9 @@ #pragma once -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_info.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/utils/data_type.h" diff --git a/paddle/phi/kernels/gpu/index_select_kernel.cu b/paddle/phi/kernels/gpu/index_select_kernel.cu index e9228b54edf7c171f115a13fe4954d639db862ac..135ae5265189753d4281de71a61aba3eb063f1f2 100644 --- a/paddle/phi/kernels/gpu/index_select_kernel.cu +++ b/paddle/phi/kernels/gpu/index_select_kernel.cu @@ -14,9 +14,9 @@ #include "paddle/phi/kernels/index_select_kernel.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_info.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/gpu/index_select_impl.h" @@ -62,7 +62,7 @@ void IndexSelectKernel(const Context& ctx, unsigned int block_dim = PADDLE_CUDA_NUM_THREADS; dim3 grid_dim = dim3((numel + block_dim - 1) / block_dim); - paddle::platform::LimitGridDim(ctx, &grid_dim); + phi::backends::gpu::LimitGridDim(ctx, &grid_dim); if (index_type == phi::DataType::INT64) { const int64_t* index_data = index.data(); diff --git a/paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu b/paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu index b6333110485aff6d9cef572de0840a84e29c3684..30fd93533ed54ee80f61acc1e0dc900c442c5809 100644 --- a/paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu @@ -14,9 +14,9 @@ #include "paddle/phi/kernels/nanmedian_grad_kernel.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_meta.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/gpu/nanmedian_kernel.cu b/paddle/phi/kernels/gpu/nanmedian_kernel.cu index f61e413c9887d1d31df6686da4d2e120fc400c5c..132b9fa10b7c6fe9e38a2647b8950192f1fe6226 100644 --- a/paddle/phi/kernels/gpu/nanmedian_kernel.cu +++ b/paddle/phi/kernels/gpu/nanmedian_kernel.cu @@ -15,9 +15,9 @@ #include "paddle/phi/kernels/nanmedian_kernel.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/impl/nanmedian_kernel_impl.h" diff --git a/paddle/phi/kernels/gpu/top_k_kernel.cu b/paddle/phi/kernels/gpu/top_k_kernel.cu index 9fc21b19a156c2587df02b3fbcde47a26fe59383..ecca63e52f05d2a59040d574748040bce614dc8d 100644 --- a/paddle/phi/kernels/gpu/top_k_kernel.cu +++ b/paddle/phi/kernels/gpu/top_k_kernel.cu @@ -173,8 +173,8 @@ void TopkKernel(const Context& dev_ctx, // NOTE: old matrix implementation of stride is different to eigen. const int kMaxHeight = 2048; int gridx = input_height < kMaxHeight ? input_height : kMaxHeight; - paddle::platform::GpuLaunchConfig config = - paddle::platform::GetGpuLaunchConfig1D(dev_ctx, input_width); + auto config = + phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, input_width); switch (config.thread_per_block.x) { #ifdef PADDLE_WITH_HIP FIXED_BLOCK_DIM( @@ -282,8 +282,8 @@ void TopkKernel(const Context& dev_ctx, const int kMaxHeight = 2048; int gridx = input_height < kMaxHeight ? input_height : kMaxHeight; - paddle::platform::GpuLaunchConfig config = - paddle::platform::GetGpuLaunchConfig1D(dev_ctx, input_width); + auto config = + phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, input_width); switch (config.thread_per_block.x) { #ifdef PADDLE_WITH_HIP FIXED_BLOCK_DIM(