Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
40a9b488
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
40a9b488
编写于
11月 10, 2022
作者:
P
PuQing
提交者:
GitHub
11月 10, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[PHI decoupling] remove "paddle/fluid/platform/device/gpu/gpu_launch_config.h" in phi (#47808)
* rm fluid gpu_launch_config * fix type
上级
0f3fb562
变更
15
显示空白变更内容
内联
并排
Showing
15 changed file
with
40 addition
and
41 deletion
+40
-41
paddle/phi/kernels/funcs/elementwise_grad_base.h
paddle/phi/kernels/funcs/elementwise_grad_base.h
+8
-8
paddle/phi/kernels/funcs/gather.cu.h
paddle/phi/kernels/funcs/gather.cu.h
+2
-3
paddle/phi/kernels/funcs/reduce_function.h
paddle/phi/kernels/funcs/reduce_function.h
+2
-2
paddle/phi/kernels/funcs/scatter.cu.h
paddle/phi/kernels/funcs/scatter.cu.h
+4
-4
paddle/phi/kernels/gpu/histogram_kernel.cu
paddle/phi/kernels/gpu/histogram_kernel.cu
+1
-1
paddle/phi/kernels/gpu/index_add_grad_kernel.cu
paddle/phi/kernels/gpu/index_add_grad_kernel.cu
+2
-2
paddle/phi/kernels/gpu/index_add_kernel.cu
paddle/phi/kernels/gpu/index_add_kernel.cu
+2
-2
paddle/phi/kernels/gpu/index_sample_grad_kernel.cu
paddle/phi/kernels/gpu/index_sample_grad_kernel.cu
+4
-4
paddle/phi/kernels/gpu/index_sample_kernel.cu
paddle/phi/kernels/gpu/index_sample_kernel.cu
+4
-4
paddle/phi/kernels/gpu/index_select_grad_kernel.cu
paddle/phi/kernels/gpu/index_select_grad_kernel.cu
+2
-2
paddle/phi/kernels/gpu/index_select_impl.h
paddle/phi/kernels/gpu/index_select_impl.h
+1
-1
paddle/phi/kernels/gpu/index_select_kernel.cu
paddle/phi/kernels/gpu/index_select_kernel.cu
+2
-2
paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu
paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu
+1
-1
paddle/phi/kernels/gpu/nanmedian_kernel.cu
paddle/phi/kernels/gpu/nanmedian_kernel.cu
+1
-1
paddle/phi/kernels/gpu/top_k_kernel.cu
paddle/phi/kernels/gpu/top_k_kernel.cu
+4
-4
未找到文件。
paddle/phi/kernels/funcs/elementwise_grad_base.h
浏览文件 @
40a9b488
...
@@ -25,7 +25,7 @@ limitations under the License. */
...
@@ -25,7 +25,7 @@ limitations under the License. */
// See Note [ Why still include the fluid headers? ]
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
#include "paddle/
fluid/platform/device
/gpu/gpu_launch_config.h"
#include "paddle/
phi/backends
/gpu/gpu_launch_config.h"
#include "paddle/phi/kernels/primitive/kernel_primitives.h"
#include "paddle/phi/kernels/primitive/kernel_primitives.h"
#endif
#endif
...
@@ -982,7 +982,7 @@ static void ElemwiseGradBroadcast1CUDA(gpuStream_t stream,
...
@@ -982,7 +982,7 @@ static void ElemwiseGradBroadcast1CUDA(gpuStream_t stream,
auto
gplace
=
phi
::
GPUPlace
(
phi
::
backends
::
gpu
::
GetCurrentDeviceId
());
auto
gplace
=
phi
::
GPUPlace
(
phi
::
backends
::
gpu
::
GetCurrentDeviceId
());
auto
*
ctx
=
static_cast
<
GPUContext
*>
(
auto
*
ctx
=
static_cast
<
GPUContext
*>
(
paddle
::
platform
::
DeviceContextPool
::
Instance
().
Get
(
gplace
));
paddle
::
platform
::
DeviceContextPool
::
Instance
().
Get
(
gplace
));
p
addle
::
platform
::
LimitGridDim
(
*
ctx
,
&
grid_size
);
p
hi
::
backends
::
gpu
::
LimitGridDim
(
*
ctx
,
&
grid_size
);
FastElemwiseGradBroadcast1CUDAKernel
<<<
grid_size
,
block_size
,
0
,
stream
>>>
(
FastElemwiseGradBroadcast1CUDAKernel
<<<
grid_size
,
block_size
,
0
,
stream
>>>
(
x
,
y
,
out
,
dout
,
h
,
w
,
is_xsize_larger
,
dx_op
,
dy_op
,
dx
,
dy
);
x
,
y
,
out
,
dout
,
h
,
w
,
is_xsize_larger
,
dx_op
,
dy_op
,
dx
,
dy
);
}
}
...
@@ -1007,7 +1007,7 @@ static void ElemwiseGradBroadcast2CUDA(gpuStream_t stream,
...
@@ -1007,7 +1007,7 @@ static void ElemwiseGradBroadcast2CUDA(gpuStream_t stream,
auto
gplace
=
phi
::
GPUPlace
(
phi
::
backends
::
gpu
::
GetCurrentDeviceId
());
auto
gplace
=
phi
::
GPUPlace
(
phi
::
backends
::
gpu
::
GetCurrentDeviceId
());
auto
*
ctx
=
static_cast
<
GPUContext
*>
(
auto
*
ctx
=
static_cast
<
GPUContext
*>
(
paddle
::
platform
::
DeviceContextPool
::
Instance
().
Get
(
gplace
));
paddle
::
platform
::
DeviceContextPool
::
Instance
().
Get
(
gplace
));
p
addle
::
platform
::
LimitGridDim
(
*
ctx
,
&
grid_size
);
p
hi
::
backends
::
gpu
::
LimitGridDim
(
*
ctx
,
&
grid_size
);
ElemwiseGradBroadcast2CUDAKernel
<<<
grid_size
,
block_size
,
0
,
stream
>>>
(
ElemwiseGradBroadcast2CUDAKernel
<<<
grid_size
,
block_size
,
0
,
stream
>>>
(
x
,
y
,
out
,
dout
,
pre
,
n
,
post
,
is_xsize_larger
,
dx_op
,
dy_op
,
dx
,
dy
);
x
,
y
,
out
,
dout
,
pre
,
n
,
post
,
is_xsize_larger
,
dx_op
,
dy_op
,
dx
,
dy
);
}
}
...
@@ -1210,7 +1210,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x,
...
@@ -1210,7 +1210,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x,
}
else
{
}
else
{
dim3
block_size
=
dim3
(
BLOCK_X
,
BLOCK_Y
);
dim3
block_size
=
dim3
(
BLOCK_X
,
BLOCK_Y
);
dim3
grid_size
=
dim3
((
w
+
BLOCK_X
-
1
)
/
BLOCK_X
);
dim3
grid_size
=
dim3
((
w
+
BLOCK_X
-
1
)
/
BLOCK_X
);
p
addle
::
platform
::
LimitGridDim
(
ctx
,
&
grid_size
);
p
hi
::
backends
::
gpu
::
LimitGridDim
(
ctx
,
&
grid_size
);
FastCommonGradBroadcastCUDAKernelHeight
<<<
grid_size
,
FastCommonGradBroadcastCUDAKernelHeight
<<<
grid_size
,
block_size
,
block_size
,
0
,
0
,
...
@@ -1247,7 +1247,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x,
...
@@ -1247,7 +1247,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x,
}
else
{
}
else
{
dim3
block_size
=
dim3
(
BLOCK_X
,
BLOCK_Y
);
dim3
block_size
=
dim3
(
BLOCK_X
,
BLOCK_Y
);
dim3
grid_size
=
dim3
((
w
+
BLOCK_X
-
1
)
/
BLOCK_X
);
dim3
grid_size
=
dim3
((
w
+
BLOCK_X
-
1
)
/
BLOCK_X
);
p
addle
::
platform
::
LimitGridDim
(
ctx
,
&
grid_size
);
p
hi
::
backends
::
gpu
::
LimitGridDim
(
ctx
,
&
grid_size
);
FastCommonGradBroadcastCUDAKernelHeight
<<<
grid_size
,
FastCommonGradBroadcastCUDAKernelHeight
<<<
grid_size
,
block_size
,
block_size
,
0
,
0
,
...
@@ -1345,7 +1345,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x,
...
@@ -1345,7 +1345,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x,
int
block_size
=
std
::
min
(
ELEMWISE_MAX_BLOCK_DIM
,
mid
);
int
block_size
=
std
::
min
(
ELEMWISE_MAX_BLOCK_DIM
,
mid
);
dim3
grid_size
=
dim3
(
pre
*
post
);
dim3
grid_size
=
dim3
(
pre
*
post
);
p
addle
::
platform
::
LimitGridDim
(
ctx
,
&
grid_size
);
p
hi
::
backends
::
gpu
::
LimitGridDim
(
ctx
,
&
grid_size
);
FastCommonGradBroadcastAllCUDAKernel
<<<
grid_size
,
block_size
,
0
,
stream
>>>
(
FastCommonGradBroadcastAllCUDAKernel
<<<
grid_size
,
block_size
,
0
,
stream
>>>
(
x_data
,
x_data
,
...
@@ -1387,7 +1387,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x,
...
@@ -1387,7 +1387,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x,
std
::
multiplies
<
int
>
());
std
::
multiplies
<
int
>
());
int
block_size
=
std
::
min
(
ELEMWISE_MAX_BLOCK_DIM
,
mid
);
int
block_size
=
std
::
min
(
ELEMWISE_MAX_BLOCK_DIM
,
mid
);
dim3
grid_size
=
dim3
(
pre
*
post
);
dim3
grid_size
=
dim3
(
pre
*
post
);
p
addle
::
platform
::
LimitGridDim
(
ctx
,
&
grid_size
);
p
hi
::
backends
::
gpu
::
LimitGridDim
(
ctx
,
&
grid_size
);
// we need to calc y offset with blockid, so do x_pre/y_pre to get
// we need to calc y offset with blockid, so do x_pre/y_pre to get
// left size.
// left size.
if
(
k_pre
!=
pre
)
k_pre
=
pre
/
k_pre
;
if
(
k_pre
!=
pre
)
k_pre
=
pre
/
k_pre
;
...
@@ -1418,7 +1418,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x,
...
@@ -1418,7 +1418,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x,
std
::
multiplies
<
int
>
());
std
::
multiplies
<
int
>
());
int
block_size
=
std
::
min
(
ELEMWISE_MAX_BLOCK_DIM
,
mid
);
int
block_size
=
std
::
min
(
ELEMWISE_MAX_BLOCK_DIM
,
mid
);
dim3
grid_size
=
dim3
(
pre
*
post
);
dim3
grid_size
=
dim3
(
pre
*
post
);
p
addle
::
platform
::
LimitGridDim
(
ctx
,
&
grid_size
);
p
hi
::
backends
::
gpu
::
LimitGridDim
(
ctx
,
&
grid_size
);
if
(
k_pre
!=
pre
)
k_pre
=
pre
/
k_pre
;
if
(
k_pre
!=
pre
)
k_pre
=
pre
/
k_pre
;
FastCommonGradBroadcastOneCUDAKernel
<<<
grid_size
,
FastCommonGradBroadcastOneCUDAKernel
<<<
grid_size
,
...
...
paddle/phi/kernels/funcs/gather.cu.h
浏览文件 @
40a9b488
...
@@ -18,7 +18,6 @@ limitations under the License. */
...
@@ -18,7 +18,6 @@ limitations under the License. */
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memcpy.h"
// TODO(paddle-dev): move gpu_primitives.h to phi
// TODO(paddle-dev): move gpu_primitives.h to phi
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/common/place.h"
...
@@ -113,7 +112,7 @@ void GPUGather(const phi::GPUContext& ctx,
...
@@ -113,7 +112,7 @@ void GPUGather(const phi::GPUContext& ctx,
int
block
=
512
;
int
block
=
512
;
int64_t
n
=
slice_size
*
index_size
;
int64_t
n
=
slice_size
*
index_size
;
dim3
grid
=
dim3
((
n
+
block
-
1
)
/
block
);
dim3
grid
=
dim3
((
n
+
block
-
1
)
/
block
);
p
addle
::
platform
::
LimitGridDim
(
ctx
,
&
grid
);
p
hi
::
backends
::
gpu
::
LimitGridDim
(
ctx
,
&
grid
);
GatherCUDAKernel
<
T
,
IndexT
><<<
grid
,
block
,
0
,
ctx
.
stream
()
>>>
(
GatherCUDAKernel
<
T
,
IndexT
><<<
grid
,
block
,
0
,
ctx
.
stream
()
>>>
(
p_src
,
p_index
,
p_output
,
index_size
,
slice_size
);
p_src
,
p_index
,
p_output
,
index_size
,
slice_size
);
...
@@ -155,7 +154,7 @@ void GPUGatherNd(const phi::GPUContext& ctx,
...
@@ -155,7 +154,7 @@ void GPUGatherNd(const phi::GPUContext& ctx,
int
block
=
512
;
int
block
=
512
;
int64_t
n
=
slice_size
*
remain_numel
;
int64_t
n
=
slice_size
*
remain_numel
;
dim3
grid
=
dim3
((
n
+
block
-
1
)
/
block
);
dim3
grid
=
dim3
((
n
+
block
-
1
)
/
block
);
p
addle
::
platform
::
LimitGridDim
(
ctx
,
&
grid
);
p
hi
::
backends
::
gpu
::
LimitGridDim
(
ctx
,
&
grid
);
GatherNdCUDAKernel
<
T
,
IndexT
><<<
grid
,
block
,
0
,
ctx
.
stream
()
>>>
(
p_input
,
GatherNdCUDAKernel
<
T
,
IndexT
><<<
grid
,
block
,
0
,
ctx
.
stream
()
>>>
(
p_input
,
g_input_dims
,
g_input_dims
,
...
...
paddle/phi/kernels/funcs/reduce_function.h
浏览文件 @
40a9b488
...
@@ -34,9 +34,9 @@ namespace cub = hipcub;
...
@@ -34,9 +34,9 @@ namespace cub = hipcub;
#ifndef PADDLE_WITH_XPU_KP
#ifndef PADDLE_WITH_XPU_KP
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#endif
#endif
#include "paddle/phi/kernels/cast_kernel.h"
#include "paddle/phi/kernels/cast_kernel.h"
...
@@ -337,7 +337,7 @@ struct ReduceConfig {
...
@@ -337,7 +337,7 @@ struct ReduceConfig {
SetBlockDim
();
SetBlockDim
();
#ifndef PADDLE_WITH_XPU_KP
#ifndef PADDLE_WITH_XPU_KP
// step5: limit the grid to prevent thead overflow
// step5: limit the grid to prevent thead overflow
p
addle
::
platform
::
LimitGridDim
(
dev_ctx
,
&
grid
);
p
hi
::
backends
::
gpu
::
LimitGridDim
(
dev_ctx
,
&
grid
);
#endif
#endif
}
}
...
...
paddle/phi/kernels/funcs/scatter.cu.h
浏览文件 @
40a9b488
...
@@ -16,8 +16,8 @@ limitations under the License. */
...
@@ -16,8 +16,8 @@ limitations under the License. */
#include <unordered_set>
#include <unordered_set>
#include <vector>
#include <vector>
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
...
@@ -158,7 +158,7 @@ void GPUScatterAssign(const phi::GPUContext& ctx,
...
@@ -158,7 +158,7 @@ void GPUScatterAssign(const phi::GPUContext& ctx,
int
block
=
512
;
int
block
=
512
;
int64_t
n
=
slice_size
*
index_size
;
int64_t
n
=
slice_size
*
index_size
;
dim3
grid
=
dim3
((
n
+
block
-
1
)
/
block
);
dim3
grid
=
dim3
((
n
+
block
-
1
)
/
block
);
p
addle
::
platform
::
LimitGridDim
(
ctx
,
&
grid
);
p
hi
::
backends
::
gpu
::
LimitGridDim
(
ctx
,
&
grid
);
// if not overwrite mode, init data
// if not overwrite mode, init data
if
(
!
overwrite
)
{
if
(
!
overwrite
)
{
...
@@ -190,7 +190,7 @@ void GPUScatterGradForX(const phi::GPUContext& ctx,
...
@@ -190,7 +190,7 @@ void GPUScatterGradForX(const phi::GPUContext& ctx,
int64_t
n
=
slice_size
*
index_size
;
int64_t
n
=
slice_size
*
index_size
;
int64_t
height
=
(
n
+
block
-
1
)
/
block
;
int64_t
height
=
(
n
+
block
-
1
)
/
block
;
dim3
grid
=
dim3
((
n
+
block
-
1
)
/
block
);
dim3
grid
=
dim3
((
n
+
block
-
1
)
/
block
);
p
addle
::
platform
::
LimitGridDim
(
ctx
,
&
grid
);
p
hi
::
backends
::
gpu
::
LimitGridDim
(
ctx
,
&
grid
);
ScatterInitCUDAKernel
<
T
,
IndexT
><<<
grid
,
block
,
0
,
ctx
.
stream
()
>>>
(
ScatterInitCUDAKernel
<
T
,
IndexT
><<<
grid
,
block
,
0
,
ctx
.
stream
()
>>>
(
p_index
,
p_output
,
index_size
,
slice_size
);
p_index
,
p_output
,
index_size
,
slice_size
);
...
@@ -231,7 +231,7 @@ void GPUScatterNdAdd(const phi::GPUContext& ctx,
...
@@ -231,7 +231,7 @@ void GPUScatterNdAdd(const phi::GPUContext& ctx,
int
block
=
512
;
int
block
=
512
;
int64_t
n
=
slice_size
*
remain_numel
;
int64_t
n
=
slice_size
*
remain_numel
;
dim3
grid
=
dim3
((
n
+
block
-
1
)
/
block
);
dim3
grid
=
dim3
((
n
+
block
-
1
)
/
block
);
p
addle
::
platform
::
LimitGridDim
(
ctx
,
&
grid
);
p
hi
::
backends
::
gpu
::
LimitGridDim
(
ctx
,
&
grid
);
ScatterNdCUDAKernel
<
T
,
IndexT
>
ScatterNdCUDAKernel
<
T
,
IndexT
>
<<<
grid
,
block
,
0
,
ctx
.
stream
()
>>>
(
p_update
,
<<<
grid
,
block
,
0
,
ctx
.
stream
()
>>>
(
p_update
,
...
...
paddle/phi/kernels/gpu/histogram_kernel.cu
浏览文件 @
40a9b488
...
@@ -14,9 +14,9 @@
...
@@ -14,9 +14,9 @@
#include "paddle/phi/kernels/histogram_kernel.h"
#include "paddle/phi/kernels/histogram_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/eigen/eigen_function.h"
#include "paddle/phi/kernels/funcs/eigen/eigen_function.h"
...
...
paddle/phi/kernels/gpu/index_add_grad_kernel.cu
浏览文件 @
40a9b488
...
@@ -14,9 +14,9 @@
...
@@ -14,9 +14,9 @@
#include "paddle/phi/kernels/index_add_grad_kernel.h"
#include "paddle/phi/kernels/index_add_grad_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/utils/data_type.h"
#include "paddle/phi/core/utils/data_type.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
...
@@ -71,7 +71,7 @@ void IndexAddGradKernel(const Context& ctx,
...
@@ -71,7 +71,7 @@ void IndexAddGradKernel(const Context& ctx,
// get add_value_grad: index_select(out_grad, index, axis)
// get add_value_grad: index_select(out_grad, index, axis)
unsigned
int
block_dim
=
PADDLE_CUDA_NUM_THREADS
;
unsigned
int
block_dim
=
PADDLE_CUDA_NUM_THREADS
;
dim3
grid_dim
=
dim3
((
numel
+
block_dim
-
1
)
/
block_dim
);
dim3
grid_dim
=
dim3
((
numel
+
block_dim
-
1
)
/
block_dim
);
p
addle
::
platform
::
LimitGridDim
(
ctx
,
&
grid_dim
);
p
hi
::
backends
::
gpu
::
LimitGridDim
(
ctx
,
&
grid_dim
);
if
(
index_type
==
phi
::
DataType
::
INT64
)
{
if
(
index_type
==
phi
::
DataType
::
INT64
)
{
const
int64_t
*
index_data
=
index
.
data
<
int64_t
>
();
const
int64_t
*
index_data
=
index
.
data
<
int64_t
>
();
...
...
paddle/phi/kernels/gpu/index_add_kernel.cu
浏览文件 @
40a9b488
...
@@ -14,9 +14,9 @@
...
@@ -14,9 +14,9 @@
#include "paddle/phi/kernels/index_add_kernel.h"
#include "paddle/phi/kernels/index_add_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/utils/data_type.h"
#include "paddle/phi/core/utils/data_type.h"
...
@@ -75,7 +75,7 @@ void IndexAddKernel(const Context& ctx,
...
@@ -75,7 +75,7 @@ void IndexAddKernel(const Context& ctx,
unsigned
int
block_dim
=
PADDLE_CUDA_NUM_THREADS
;
unsigned
int
block_dim
=
PADDLE_CUDA_NUM_THREADS
;
dim3
grid_dim
=
dim3
((
numel
+
block_dim
-
1
)
/
block_dim
);
dim3
grid_dim
=
dim3
((
numel
+
block_dim
-
1
)
/
block_dim
);
p
addle
::
platform
::
LimitGridDim
(
ctx
,
&
grid_dim
);
p
hi
::
backends
::
gpu
::
LimitGridDim
(
ctx
,
&
grid_dim
);
// copy input to output.
// copy input to output.
// todo(@limin29): inplace do not need copy.
// todo(@limin29): inplace do not need copy.
...
...
paddle/phi/kernels/gpu/index_sample_grad_kernel.cu
浏览文件 @
40a9b488
...
@@ -18,9 +18,9 @@
...
@@ -18,9 +18,9 @@
#include <vector>
#include <vector>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
...
@@ -92,16 +92,16 @@ void IndexSampleGradKernel(const Context& ctx,
...
@@ -92,16 +92,16 @@ void IndexSampleGradKernel(const Context& ctx,
size_t
index_length
=
index_dim
[
1
];
size_t
index_length
=
index_dim
[
1
];
bool
same_data_in_index_row
=
index_length
==
1
?
false
:
true
;
bool
same_data_in_index_row
=
index_length
==
1
?
false
:
true
;
auto
block_width
=
p
addle
::
platform
::
RoundToPowerOfTwo
(
index_length
);
auto
block_width
=
p
hi
::
backends
::
gpu
::
RoundToPowerOfTwo
(
index_length
);
block_width
=
MIN
(
block_width
,
PREDEFINED_BLOCK_SIZE_X
);
block_width
=
MIN
(
block_width
,
PREDEFINED_BLOCK_SIZE_X
);
auto
block_height
=
auto
block_height
=
p
addle
::
platform
::
RoundToPowerOfTwo
(
index_length
*
batch_size
)
/
p
hi
::
backends
::
gpu
::
RoundToPowerOfTwo
(
index_length
*
batch_size
)
/
block_width
;
block_width
;
block_height
=
MIN
(
block_height
,
PREDEFINED_BLOCK_SIZE
/
block_width
);
block_height
=
MIN
(
block_height
,
PREDEFINED_BLOCK_SIZE
/
block_width
);
dim3
block_dim
(
block_width
,
block_height
);
dim3
block_dim
(
block_width
,
block_height
);
dim3
grid_dim
((
index_length
+
block_dim
.
x
-
1
)
/
block_dim
.
x
,
dim3
grid_dim
((
index_length
+
block_dim
.
x
-
1
)
/
block_dim
.
x
,
(
batch_size
+
block_dim
.
y
-
1
)
/
block_dim
.
y
);
(
batch_size
+
block_dim
.
y
-
1
)
/
block_dim
.
y
);
p
addle
::
platform
::
LimitGridDim
(
ctx
,
&
grid_dim
);
p
hi
::
backends
::
gpu
::
LimitGridDim
(
ctx
,
&
grid_dim
);
phi
::
funcs
::
SetConstant
<
Context
,
T
>
set_zero
;
phi
::
funcs
::
SetConstant
<
Context
,
T
>
set_zero
;
set_zero
(
ctx
,
x_grad
,
static_cast
<
T
>
(
0
));
set_zero
(
ctx
,
x_grad
,
static_cast
<
T
>
(
0
));
...
...
paddle/phi/kernels/gpu/index_sample_kernel.cu
浏览文件 @
40a9b488
...
@@ -18,8 +18,8 @@
...
@@ -18,8 +18,8 @@
#include <vector>
#include <vector>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
...
@@ -80,16 +80,16 @@ void IndexSampleKernel(const Context& ctx,
...
@@ -80,16 +80,16 @@ void IndexSampleKernel(const Context& ctx,
size_t
input_length
=
input_dim
[
1
];
size_t
input_length
=
input_dim
[
1
];
size_t
index_length
=
index_dim
[
1
];
size_t
index_length
=
index_dim
[
1
];
auto
block_width
=
p
addle
::
platform
::
RoundToPowerOfTwo
(
index_length
);
auto
block_width
=
p
hi
::
backends
::
gpu
::
RoundToPowerOfTwo
(
index_length
);
block_width
=
MIN
(
block_width
,
PREDEFINED_BLOCK_SIZE_X
);
block_width
=
MIN
(
block_width
,
PREDEFINED_BLOCK_SIZE_X
);
int
block_height
=
int
block_height
=
p
addle
::
platform
::
RoundToPowerOfTwo
(
index_length
*
batch_size
)
/
p
hi
::
backends
::
gpu
::
RoundToPowerOfTwo
(
index_length
*
batch_size
)
/
block_width
;
block_width
;
block_height
=
MIN
(
block_height
,
PREDEFINED_BLOCK_SIZE
/
block_width
);
block_height
=
MIN
(
block_height
,
PREDEFINED_BLOCK_SIZE
/
block_width
);
dim3
block_dim
(
block_width
,
block_height
);
dim3
block_dim
(
block_width
,
block_height
);
dim3
grid_dim
((
index_length
+
block_dim
.
x
-
1
)
/
block_dim
.
x
,
dim3
grid_dim
((
index_length
+
block_dim
.
x
-
1
)
/
block_dim
.
x
,
(
batch_size
+
block_dim
.
y
-
1
)
/
block_dim
.
y
);
(
batch_size
+
block_dim
.
y
-
1
)
/
block_dim
.
y
);
p
addle
::
platform
::
LimitGridDim
(
ctx
,
&
grid_dim
);
p
hi
::
backends
::
gpu
::
LimitGridDim
(
ctx
,
&
grid_dim
);
if
(
index_type
==
DataType
::
INT64
)
{
if
(
index_type
==
DataType
::
INT64
)
{
const
int64_t
*
index_data
=
index
.
data
<
int64_t
>
();
const
int64_t
*
index_data
=
index
.
data
<
int64_t
>
();
...
...
paddle/phi/kernels/gpu/index_select_grad_kernel.cu
浏览文件 @
40a9b488
...
@@ -14,9 +14,9 @@
...
@@ -14,9 +14,9 @@
#include "paddle/phi/kernels/index_select_grad_kernel.h"
#include "paddle/phi/kernels/index_select_grad_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/utils/data_type.h"
#include "paddle/phi/core/utils/data_type.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
...
@@ -87,7 +87,7 @@ void IndexSelectGradKernel(const Context& ctx,
...
@@ -87,7 +87,7 @@ void IndexSelectGradKernel(const Context& ctx,
unsigned
int
block_dim
=
PADDLE_CUDA_NUM_THREADS
;
unsigned
int
block_dim
=
PADDLE_CUDA_NUM_THREADS
;
dim3
grid_dim
=
dim3
((
numel
+
block_dim
-
1
)
/
block_dim
);
dim3
grid_dim
=
dim3
((
numel
+
block_dim
-
1
)
/
block_dim
);
p
addle
::
platform
::
LimitGridDim
(
ctx
,
&
grid_dim
);
p
hi
::
backends
::
gpu
::
LimitGridDim
(
ctx
,
&
grid_dim
);
phi
::
funcs
::
SetConstant
<
phi
::
GPUContext
,
T
>
index_select_grad_init
;
phi
::
funcs
::
SetConstant
<
phi
::
GPUContext
,
T
>
index_select_grad_init
;
index_select_grad_init
(
ctx
,
x_grad
,
static_cast
<
T
>
(
0
));
index_select_grad_init
(
ctx
,
x_grad
,
static_cast
<
T
>
(
0
));
...
...
paddle/phi/kernels/gpu/index_select_impl.h
浏览文件 @
40a9b488
...
@@ -14,9 +14,9 @@
...
@@ -14,9 +14,9 @@
#pragma once
#pragma once
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/utils/data_type.h"
#include "paddle/phi/core/utils/data_type.h"
...
...
paddle/phi/kernels/gpu/index_select_kernel.cu
浏览文件 @
40a9b488
...
@@ -14,9 +14,9 @@
...
@@ -14,9 +14,9 @@
#include "paddle/phi/kernels/index_select_kernel.h"
#include "paddle/phi/kernels/index_select_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/utils/data_type.h"
#include "paddle/phi/core/utils/data_type.h"
#include "paddle/phi/kernels/gpu/index_select_impl.h"
#include "paddle/phi/kernels/gpu/index_select_impl.h"
...
@@ -62,7 +62,7 @@ void IndexSelectKernel(const Context& ctx,
...
@@ -62,7 +62,7 @@ void IndexSelectKernel(const Context& ctx,
unsigned
int
block_dim
=
PADDLE_CUDA_NUM_THREADS
;
unsigned
int
block_dim
=
PADDLE_CUDA_NUM_THREADS
;
dim3
grid_dim
=
dim3
((
numel
+
block_dim
-
1
)
/
block_dim
);
dim3
grid_dim
=
dim3
((
numel
+
block_dim
-
1
)
/
block_dim
);
p
addle
::
platform
::
LimitGridDim
(
ctx
,
&
grid_dim
);
p
hi
::
backends
::
gpu
::
LimitGridDim
(
ctx
,
&
grid_dim
);
if
(
index_type
==
phi
::
DataType
::
INT64
)
{
if
(
index_type
==
phi
::
DataType
::
INT64
)
{
const
int64_t
*
index_data
=
index
.
data
<
int64_t
>
();
const
int64_t
*
index_data
=
index
.
data
<
int64_t
>
();
...
...
paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu
浏览文件 @
40a9b488
...
@@ -14,9 +14,9 @@
...
@@ -14,9 +14,9 @@
#include "paddle/phi/kernels/nanmedian_grad_kernel.h"
#include "paddle/phi/kernels/nanmedian_grad_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_meta.h"
#include "paddle/phi/core/tensor_meta.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
...
...
paddle/phi/kernels/gpu/nanmedian_kernel.cu
浏览文件 @
40a9b488
...
@@ -15,9 +15,9 @@
...
@@ -15,9 +15,9 @@
#include "paddle/phi/kernels/nanmedian_kernel.h"
#include "paddle/phi/kernels/nanmedian_kernel.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/full_kernel.h"
#include "paddle/phi/kernels/full_kernel.h"
#include "paddle/phi/kernels/impl/nanmedian_kernel_impl.h"
#include "paddle/phi/kernels/impl/nanmedian_kernel_impl.h"
...
...
paddle/phi/kernels/gpu/top_k_kernel.cu
浏览文件 @
40a9b488
...
@@ -173,8 +173,8 @@ void TopkKernel(const Context& dev_ctx,
...
@@ -173,8 +173,8 @@ void TopkKernel(const Context& dev_ctx,
// NOTE: old matrix implementation of stride is different to eigen.
// NOTE: old matrix implementation of stride is different to eigen.
const
int
kMaxHeight
=
2048
;
const
int
kMaxHeight
=
2048
;
int
gridx
=
input_height
<
kMaxHeight
?
input_height
:
kMaxHeight
;
int
gridx
=
input_height
<
kMaxHeight
?
input_height
:
kMaxHeight
;
paddle
::
platform
::
GpuLaunchConfig
config
=
auto
config
=
p
addle
::
platform
::
GetGpuLaunchConfig1D
(
dev_ctx
,
input_width
);
p
hi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
dev_ctx
,
input_width
);
switch
(
config
.
thread_per_block
.
x
)
{
switch
(
config
.
thread_per_block
.
x
)
{
#ifdef PADDLE_WITH_HIP
#ifdef PADDLE_WITH_HIP
FIXED_BLOCK_DIM
(
FIXED_BLOCK_DIM
(
...
@@ -282,8 +282,8 @@ void TopkKernel(const Context& dev_ctx,
...
@@ -282,8 +282,8 @@ void TopkKernel(const Context& dev_ctx,
const
int
kMaxHeight
=
2048
;
const
int
kMaxHeight
=
2048
;
int
gridx
=
input_height
<
kMaxHeight
?
input_height
:
kMaxHeight
;
int
gridx
=
input_height
<
kMaxHeight
?
input_height
:
kMaxHeight
;
paddle
::
platform
::
GpuLaunchConfig
config
=
auto
config
=
p
addle
::
platform
::
GetGpuLaunchConfig1D
(
dev_ctx
,
input_width
);
p
hi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
dev_ctx
,
input_width
);
switch
(
config
.
thread_per_block
.
x
)
{
switch
(
config
.
thread_per_block
.
x
)
{
#ifdef PADDLE_WITH_HIP
#ifdef PADDLE_WITH_HIP
FIXED_BLOCK_DIM
(
FIXED_BLOCK_DIM
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录