未验证 提交 40a9b488 编写于 作者: P PuQing 提交者: GitHub

[PHI decoupling] remove "paddle/fluid/platform/device/gpu/gpu_launch_config.h" in phi (#47808)

* rm fluid gpu_launch_config

* fix type
上级 0f3fb562
...@@ -25,7 +25,7 @@ limitations under the License. */ ...@@ -25,7 +25,7 @@ limitations under the License. */
// See Note [ Why still include the fluid headers? ] // See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h" #include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/kernels/primitive/kernel_primitives.h" #include "paddle/phi/kernels/primitive/kernel_primitives.h"
#endif #endif
...@@ -982,7 +982,7 @@ static void ElemwiseGradBroadcast1CUDA(gpuStream_t stream, ...@@ -982,7 +982,7 @@ static void ElemwiseGradBroadcast1CUDA(gpuStream_t stream,
auto gplace = phi::GPUPlace(phi::backends::gpu::GetCurrentDeviceId()); auto gplace = phi::GPUPlace(phi::backends::gpu::GetCurrentDeviceId());
auto *ctx = static_cast<GPUContext *>( auto *ctx = static_cast<GPUContext *>(
paddle::platform::DeviceContextPool::Instance().Get(gplace)); paddle::platform::DeviceContextPool::Instance().Get(gplace));
paddle::platform::LimitGridDim(*ctx, &grid_size); phi::backends::gpu::LimitGridDim(*ctx, &grid_size);
FastElemwiseGradBroadcast1CUDAKernel<<<grid_size, block_size, 0, stream>>>( FastElemwiseGradBroadcast1CUDAKernel<<<grid_size, block_size, 0, stream>>>(
x, y, out, dout, h, w, is_xsize_larger, dx_op, dy_op, dx, dy); x, y, out, dout, h, w, is_xsize_larger, dx_op, dy_op, dx, dy);
} }
...@@ -1007,7 +1007,7 @@ static void ElemwiseGradBroadcast2CUDA(gpuStream_t stream, ...@@ -1007,7 +1007,7 @@ static void ElemwiseGradBroadcast2CUDA(gpuStream_t stream,
auto gplace = phi::GPUPlace(phi::backends::gpu::GetCurrentDeviceId()); auto gplace = phi::GPUPlace(phi::backends::gpu::GetCurrentDeviceId());
auto *ctx = static_cast<GPUContext *>( auto *ctx = static_cast<GPUContext *>(
paddle::platform::DeviceContextPool::Instance().Get(gplace)); paddle::platform::DeviceContextPool::Instance().Get(gplace));
paddle::platform::LimitGridDim(*ctx, &grid_size); phi::backends::gpu::LimitGridDim(*ctx, &grid_size);
ElemwiseGradBroadcast2CUDAKernel<<<grid_size, block_size, 0, stream>>>( ElemwiseGradBroadcast2CUDAKernel<<<grid_size, block_size, 0, stream>>>(
x, y, out, dout, pre, n, post, is_xsize_larger, dx_op, dy_op, dx, dy); x, y, out, dout, pre, n, post, is_xsize_larger, dx_op, dy_op, dx, dy);
} }
...@@ -1210,7 +1210,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x, ...@@ -1210,7 +1210,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x,
} else { } else {
dim3 block_size = dim3(BLOCK_X, BLOCK_Y); dim3 block_size = dim3(BLOCK_X, BLOCK_Y);
dim3 grid_size = dim3((w + BLOCK_X - 1) / BLOCK_X); dim3 grid_size = dim3((w + BLOCK_X - 1) / BLOCK_X);
paddle::platform::LimitGridDim(ctx, &grid_size); phi::backends::gpu::LimitGridDim(ctx, &grid_size);
FastCommonGradBroadcastCUDAKernelHeight<<<grid_size, FastCommonGradBroadcastCUDAKernelHeight<<<grid_size,
block_size, block_size,
0, 0,
...@@ -1247,7 +1247,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x, ...@@ -1247,7 +1247,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x,
} else { } else {
dim3 block_size = dim3(BLOCK_X, BLOCK_Y); dim3 block_size = dim3(BLOCK_X, BLOCK_Y);
dim3 grid_size = dim3((w + BLOCK_X - 1) / BLOCK_X); dim3 grid_size = dim3((w + BLOCK_X - 1) / BLOCK_X);
paddle::platform::LimitGridDim(ctx, &grid_size); phi::backends::gpu::LimitGridDim(ctx, &grid_size);
FastCommonGradBroadcastCUDAKernelHeight<<<grid_size, FastCommonGradBroadcastCUDAKernelHeight<<<grid_size,
block_size, block_size,
0, 0,
...@@ -1345,7 +1345,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x, ...@@ -1345,7 +1345,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x,
int block_size = std::min(ELEMWISE_MAX_BLOCK_DIM, mid); int block_size = std::min(ELEMWISE_MAX_BLOCK_DIM, mid);
dim3 grid_size = dim3(pre * post); dim3 grid_size = dim3(pre * post);
paddle::platform::LimitGridDim(ctx, &grid_size); phi::backends::gpu::LimitGridDim(ctx, &grid_size);
FastCommonGradBroadcastAllCUDAKernel<<<grid_size, block_size, 0, stream>>>( FastCommonGradBroadcastAllCUDAKernel<<<grid_size, block_size, 0, stream>>>(
x_data, x_data,
...@@ -1387,7 +1387,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x, ...@@ -1387,7 +1387,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x,
std::multiplies<int>()); std::multiplies<int>());
int block_size = std::min(ELEMWISE_MAX_BLOCK_DIM, mid); int block_size = std::min(ELEMWISE_MAX_BLOCK_DIM, mid);
dim3 grid_size = dim3(pre * post); dim3 grid_size = dim3(pre * post);
paddle::platform::LimitGridDim(ctx, &grid_size); phi::backends::gpu::LimitGridDim(ctx, &grid_size);
// we need to calc y offset with blockid, so do x_pre/y_pre to get // we need to calc y offset with blockid, so do x_pre/y_pre to get
// left size. // left size.
if (k_pre != pre) k_pre = pre / k_pre; if (k_pre != pre) k_pre = pre / k_pre;
...@@ -1418,7 +1418,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x, ...@@ -1418,7 +1418,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x,
std::multiplies<int>()); std::multiplies<int>());
int block_size = std::min(ELEMWISE_MAX_BLOCK_DIM, mid); int block_size = std::min(ELEMWISE_MAX_BLOCK_DIM, mid);
dim3 grid_size = dim3(pre * post); dim3 grid_size = dim3(pre * post);
paddle::platform::LimitGridDim(ctx, &grid_size); phi::backends::gpu::LimitGridDim(ctx, &grid_size);
if (k_pre != pre) k_pre = pre / k_pre; if (k_pre != pre) k_pre = pre / k_pre;
FastCommonGradBroadcastOneCUDAKernel<<<grid_size, FastCommonGradBroadcastOneCUDAKernel<<<grid_size,
......
...@@ -18,7 +18,6 @@ limitations under the License. */ ...@@ -18,7 +18,6 @@ limitations under the License. */
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
// TODO(paddle-dev): move gpu_primitives.h to phi // TODO(paddle-dev): move gpu_primitives.h to phi
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/common/place.h" #include "paddle/phi/common/place.h"
...@@ -113,7 +112,7 @@ void GPUGather(const phi::GPUContext& ctx, ...@@ -113,7 +112,7 @@ void GPUGather(const phi::GPUContext& ctx,
int block = 512; int block = 512;
int64_t n = slice_size * index_size; int64_t n = slice_size * index_size;
dim3 grid = dim3((n + block - 1) / block); dim3 grid = dim3((n + block - 1) / block);
paddle::platform::LimitGridDim(ctx, &grid); phi::backends::gpu::LimitGridDim(ctx, &grid);
GatherCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>( GatherCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>(
p_src, p_index, p_output, index_size, slice_size); p_src, p_index, p_output, index_size, slice_size);
...@@ -155,7 +154,7 @@ void GPUGatherNd(const phi::GPUContext& ctx, ...@@ -155,7 +154,7 @@ void GPUGatherNd(const phi::GPUContext& ctx,
int block = 512; int block = 512;
int64_t n = slice_size * remain_numel; int64_t n = slice_size * remain_numel;
dim3 grid = dim3((n + block - 1) / block); dim3 grid = dim3((n + block - 1) / block);
paddle::platform::LimitGridDim(ctx, &grid); phi::backends::gpu::LimitGridDim(ctx, &grid);
GatherNdCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>(p_input, GatherNdCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>(p_input,
g_input_dims, g_input_dims,
......
...@@ -34,9 +34,9 @@ namespace cub = hipcub; ...@@ -34,9 +34,9 @@ namespace cub = hipcub;
#ifndef PADDLE_WITH_XPU_KP #ifndef PADDLE_WITH_XPU_KP
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h" #include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#endif #endif
#include "paddle/phi/kernels/cast_kernel.h" #include "paddle/phi/kernels/cast_kernel.h"
...@@ -337,7 +337,7 @@ struct ReduceConfig { ...@@ -337,7 +337,7 @@ struct ReduceConfig {
SetBlockDim(); SetBlockDim();
#ifndef PADDLE_WITH_XPU_KP #ifndef PADDLE_WITH_XPU_KP
// step5: limit the grid to prevent thead overflow // step5: limit the grid to prevent thead overflow
paddle::platform::LimitGridDim(dev_ctx, &grid); phi::backends::gpu::LimitGridDim(dev_ctx, &grid);
#endif #endif
} }
......
...@@ -16,8 +16,8 @@ limitations under the License. */ ...@@ -16,8 +16,8 @@ limitations under the License. */
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/common/place.h" #include "paddle/phi/common/place.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
...@@ -158,7 +158,7 @@ void GPUScatterAssign(const phi::GPUContext& ctx, ...@@ -158,7 +158,7 @@ void GPUScatterAssign(const phi::GPUContext& ctx,
int block = 512; int block = 512;
int64_t n = slice_size * index_size; int64_t n = slice_size * index_size;
dim3 grid = dim3((n + block - 1) / block); dim3 grid = dim3((n + block - 1) / block);
paddle::platform::LimitGridDim(ctx, &grid); phi::backends::gpu::LimitGridDim(ctx, &grid);
// if not overwrite mode, init data // if not overwrite mode, init data
if (!overwrite) { if (!overwrite) {
...@@ -190,7 +190,7 @@ void GPUScatterGradForX(const phi::GPUContext& ctx, ...@@ -190,7 +190,7 @@ void GPUScatterGradForX(const phi::GPUContext& ctx,
int64_t n = slice_size * index_size; int64_t n = slice_size * index_size;
int64_t height = (n + block - 1) / block; int64_t height = (n + block - 1) / block;
dim3 grid = dim3((n + block - 1) / block); dim3 grid = dim3((n + block - 1) / block);
paddle::platform::LimitGridDim(ctx, &grid); phi::backends::gpu::LimitGridDim(ctx, &grid);
ScatterInitCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>( ScatterInitCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>(
p_index, p_output, index_size, slice_size); p_index, p_output, index_size, slice_size);
...@@ -231,7 +231,7 @@ void GPUScatterNdAdd(const phi::GPUContext& ctx, ...@@ -231,7 +231,7 @@ void GPUScatterNdAdd(const phi::GPUContext& ctx,
int block = 512; int block = 512;
int64_t n = slice_size * remain_numel; int64_t n = slice_size * remain_numel;
dim3 grid = dim3((n + block - 1) / block); dim3 grid = dim3((n + block - 1) / block);
paddle::platform::LimitGridDim(ctx, &grid); phi::backends::gpu::LimitGridDim(ctx, &grid);
ScatterNdCUDAKernel<T, IndexT> ScatterNdCUDAKernel<T, IndexT>
<<<grid, block, 0, ctx.stream()>>>(p_update, <<<grid, block, 0, ctx.stream()>>>(p_update,
......
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
#include "paddle/phi/kernels/histogram_kernel.h" #include "paddle/phi/kernels/histogram_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h"
......
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
#include "paddle/phi/kernels/index_add_grad_kernel.h" #include "paddle/phi/kernels/index_add_grad_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/core/utils/data_type.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
...@@ -71,7 +71,7 @@ void IndexAddGradKernel(const Context& ctx, ...@@ -71,7 +71,7 @@ void IndexAddGradKernel(const Context& ctx,
// get add_value_grad: index_select(out_grad, index, axis) // get add_value_grad: index_select(out_grad, index, axis)
unsigned int block_dim = PADDLE_CUDA_NUM_THREADS; unsigned int block_dim = PADDLE_CUDA_NUM_THREADS;
dim3 grid_dim = dim3((numel + block_dim - 1) / block_dim); dim3 grid_dim = dim3((numel + block_dim - 1) / block_dim);
paddle::platform::LimitGridDim(ctx, &grid_dim); phi::backends::gpu::LimitGridDim(ctx, &grid_dim);
if (index_type == phi::DataType::INT64) { if (index_type == phi::DataType::INT64) {
const int64_t* index_data = index.data<int64_t>(); const int64_t* index_data = index.data<int64_t>();
......
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
#include "paddle/phi/kernels/index_add_kernel.h" #include "paddle/phi/kernels/index_add_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/core/utils/data_type.h"
...@@ -75,7 +75,7 @@ void IndexAddKernel(const Context& ctx, ...@@ -75,7 +75,7 @@ void IndexAddKernel(const Context& ctx,
unsigned int block_dim = PADDLE_CUDA_NUM_THREADS; unsigned int block_dim = PADDLE_CUDA_NUM_THREADS;
dim3 grid_dim = dim3((numel + block_dim - 1) / block_dim); dim3 grid_dim = dim3((numel + block_dim - 1) / block_dim);
paddle::platform::LimitGridDim(ctx, &grid_dim); phi::backends::gpu::LimitGridDim(ctx, &grid_dim);
// copy input to output. // copy input to output.
// todo(@limin29): inplace do not need copy. // todo(@limin29): inplace do not need copy.
......
...@@ -18,9 +18,9 @@ ...@@ -18,9 +18,9 @@
#include <vector> #include <vector>
#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
...@@ -92,16 +92,16 @@ void IndexSampleGradKernel(const Context& ctx, ...@@ -92,16 +92,16 @@ void IndexSampleGradKernel(const Context& ctx,
size_t index_length = index_dim[1]; size_t index_length = index_dim[1];
bool same_data_in_index_row = index_length == 1 ? false : true; bool same_data_in_index_row = index_length == 1 ? false : true;
auto block_width = paddle::platform::RoundToPowerOfTwo(index_length); auto block_width = phi::backends::gpu::RoundToPowerOfTwo(index_length);
block_width = MIN(block_width, PREDEFINED_BLOCK_SIZE_X); block_width = MIN(block_width, PREDEFINED_BLOCK_SIZE_X);
auto block_height = auto block_height =
paddle::platform::RoundToPowerOfTwo(index_length * batch_size) / phi::backends::gpu::RoundToPowerOfTwo(index_length * batch_size) /
block_width; block_width;
block_height = MIN(block_height, PREDEFINED_BLOCK_SIZE / block_width); block_height = MIN(block_height, PREDEFINED_BLOCK_SIZE / block_width);
dim3 block_dim(block_width, block_height); dim3 block_dim(block_width, block_height);
dim3 grid_dim((index_length + block_dim.x - 1) / block_dim.x, dim3 grid_dim((index_length + block_dim.x - 1) / block_dim.x,
(batch_size + block_dim.y - 1) / block_dim.y); (batch_size + block_dim.y - 1) / block_dim.y);
paddle::platform::LimitGridDim(ctx, &grid_dim); phi::backends::gpu::LimitGridDim(ctx, &grid_dim);
phi::funcs::SetConstant<Context, T> set_zero; phi::funcs::SetConstant<Context, T> set_zero;
set_zero(ctx, x_grad, static_cast<T>(0)); set_zero(ctx, x_grad, static_cast<T>(0));
......
...@@ -18,8 +18,8 @@ ...@@ -18,8 +18,8 @@
#include <vector> #include <vector>
#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
...@@ -80,16 +80,16 @@ void IndexSampleKernel(const Context& ctx, ...@@ -80,16 +80,16 @@ void IndexSampleKernel(const Context& ctx,
size_t input_length = input_dim[1]; size_t input_length = input_dim[1];
size_t index_length = index_dim[1]; size_t index_length = index_dim[1];
auto block_width = paddle::platform::RoundToPowerOfTwo(index_length); auto block_width = phi::backends::gpu::RoundToPowerOfTwo(index_length);
block_width = MIN(block_width, PREDEFINED_BLOCK_SIZE_X); block_width = MIN(block_width, PREDEFINED_BLOCK_SIZE_X);
int block_height = int block_height =
paddle::platform::RoundToPowerOfTwo(index_length * batch_size) / phi::backends::gpu::RoundToPowerOfTwo(index_length * batch_size) /
block_width; block_width;
block_height = MIN(block_height, PREDEFINED_BLOCK_SIZE / block_width); block_height = MIN(block_height, PREDEFINED_BLOCK_SIZE / block_width);
dim3 block_dim(block_width, block_height); dim3 block_dim(block_width, block_height);
dim3 grid_dim((index_length + block_dim.x - 1) / block_dim.x, dim3 grid_dim((index_length + block_dim.x - 1) / block_dim.x,
(batch_size + block_dim.y - 1) / block_dim.y); (batch_size + block_dim.y - 1) / block_dim.y);
paddle::platform::LimitGridDim(ctx, &grid_dim); phi::backends::gpu::LimitGridDim(ctx, &grid_dim);
if (index_type == DataType::INT64) { if (index_type == DataType::INT64) {
const int64_t* index_data = index.data<int64_t>(); const int64_t* index_data = index.data<int64_t>();
......
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
#include "paddle/phi/kernels/index_select_grad_kernel.h" #include "paddle/phi/kernels/index_select_grad_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/core/utils/data_type.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
...@@ -87,7 +87,7 @@ void IndexSelectGradKernel(const Context& ctx, ...@@ -87,7 +87,7 @@ void IndexSelectGradKernel(const Context& ctx,
unsigned int block_dim = PADDLE_CUDA_NUM_THREADS; unsigned int block_dim = PADDLE_CUDA_NUM_THREADS;
dim3 grid_dim = dim3((numel + block_dim - 1) / block_dim); dim3 grid_dim = dim3((numel + block_dim - 1) / block_dim);
paddle::platform::LimitGridDim(ctx, &grid_dim); phi::backends::gpu::LimitGridDim(ctx, &grid_dim);
phi::funcs::SetConstant<phi::GPUContext, T> index_select_grad_init; phi::funcs::SetConstant<phi::GPUContext, T> index_select_grad_init;
index_select_grad_init(ctx, x_grad, static_cast<T>(0)); index_select_grad_init(ctx, x_grad, static_cast<T>(0));
......
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
#pragma once #pragma once
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/core/utils/data_type.h"
......
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
#include "paddle/phi/kernels/index_select_kernel.h" #include "paddle/phi/kernels/index_select_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/core/utils/data_type.h"
#include "paddle/phi/kernels/gpu/index_select_impl.h" #include "paddle/phi/kernels/gpu/index_select_impl.h"
...@@ -62,7 +62,7 @@ void IndexSelectKernel(const Context& ctx, ...@@ -62,7 +62,7 @@ void IndexSelectKernel(const Context& ctx,
unsigned int block_dim = PADDLE_CUDA_NUM_THREADS; unsigned int block_dim = PADDLE_CUDA_NUM_THREADS;
dim3 grid_dim = dim3((numel + block_dim - 1) / block_dim); dim3 grid_dim = dim3((numel + block_dim - 1) / block_dim);
paddle::platform::LimitGridDim(ctx, &grid_dim); phi::backends::gpu::LimitGridDim(ctx, &grid_dim);
if (index_type == phi::DataType::INT64) { if (index_type == phi::DataType::INT64) {
const int64_t* index_data = index.data<int64_t>(); const int64_t* index_data = index.data<int64_t>();
......
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
#include "paddle/phi/kernels/nanmedian_grad_kernel.h" #include "paddle/phi/kernels/nanmedian_grad_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_meta.h" #include "paddle/phi/core/tensor_meta.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
......
...@@ -15,9 +15,9 @@ ...@@ -15,9 +15,9 @@
#include "paddle/phi/kernels/nanmedian_kernel.h" #include "paddle/phi/kernels/nanmedian_kernel.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/full_kernel.h"
#include "paddle/phi/kernels/impl/nanmedian_kernel_impl.h" #include "paddle/phi/kernels/impl/nanmedian_kernel_impl.h"
......
...@@ -173,8 +173,8 @@ void TopkKernel(const Context& dev_ctx, ...@@ -173,8 +173,8 @@ void TopkKernel(const Context& dev_ctx,
// NOTE: old matrix implementation of stride is different to eigen. // NOTE: old matrix implementation of stride is different to eigen.
const int kMaxHeight = 2048; const int kMaxHeight = 2048;
int gridx = input_height < kMaxHeight ? input_height : kMaxHeight; int gridx = input_height < kMaxHeight ? input_height : kMaxHeight;
paddle::platform::GpuLaunchConfig config = auto config =
paddle::platform::GetGpuLaunchConfig1D(dev_ctx, input_width); phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, input_width);
switch (config.thread_per_block.x) { switch (config.thread_per_block.x) {
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
FIXED_BLOCK_DIM( FIXED_BLOCK_DIM(
...@@ -282,8 +282,8 @@ void TopkKernel(const Context& dev_ctx, ...@@ -282,8 +282,8 @@ void TopkKernel(const Context& dev_ctx,
const int kMaxHeight = 2048; const int kMaxHeight = 2048;
int gridx = input_height < kMaxHeight ? input_height : kMaxHeight; int gridx = input_height < kMaxHeight ? input_height : kMaxHeight;
paddle::platform::GpuLaunchConfig config = auto config =
paddle::platform::GetGpuLaunchConfig1D(dev_ctx, input_width); phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, input_width);
switch (config.thread_per_block.x) { switch (config.thread_per_block.x) {
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
FIXED_BLOCK_DIM( FIXED_BLOCK_DIM(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册