Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
a4b30a12
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a4b30a12
编写于
4月 01, 2021
作者:
Q
Qi Li
提交者:
GitHub
4月 01, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[ROCM] fix depthwise conv failure on ROCM, test=develop (#31998)
上级
68e7de26
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
10 addition
and
1 deletion
+10
-1
paddle/fluid/operators/math/depthwise_conv.cu
paddle/fluid/operators/math/depthwise_conv.cu
+10
-1
未找到文件。
paddle/fluid/operators/math/depthwise_conv.cu
浏览文件 @
a4b30a12
...
@@ -613,6 +613,9 @@ class DepthwiseConvFunctor<platform::CUDADeviceContext, T,
...
@@ -613,6 +613,9 @@ class DepthwiseConvFunctor<platform::CUDADeviceContext, T,
thread
=
(
output_width
-
1
)
/
2
+
1
;
thread
=
(
output_width
-
1
)
/
2
+
1
;
else
if
(
output_width
>
512
&&
output_width
<=
1024
)
else
if
(
output_width
>
512
&&
output_width
<=
1024
)
thread
=
output_width
;
thread
=
output_width
;
#ifdef __HIPCC__
thread
=
std
::
min
(
thread
,
256
);
#endif
int
blocks
=
std
::
min
(
std
::
max
(
thread
/
output_width
,
1
),
output_height
);
int
blocks
=
std
::
min
(
std
::
max
(
thread
/
output_width
,
1
),
output_height
);
dim3
threads
(
std
::
min
(
output_width
,
thread
),
blocks
,
1
);
dim3
threads
(
std
::
min
(
output_width
,
thread
),
blocks
,
1
);
dim3
grid
(
output_channels
,
batch_size
,
1
);
dim3
grid
(
output_channels
,
batch_size
,
1
);
...
@@ -620,7 +623,13 @@ class DepthwiseConvFunctor<platform::CUDADeviceContext, T,
...
@@ -620,7 +623,13 @@ class DepthwiseConvFunctor<platform::CUDADeviceContext, T,
int
nums_output
=
int
nums_output
=
batch_size
*
output_channels
*
output_height
*
output_width
;
batch_size
*
output_channels
*
output_height
*
output_width
;
#ifdef __HIPCC__
int
block_size
=
256
;
int
grid_size
=
std
::
min
((
nums_output
+
block_size
-
1
)
/
block_size
,
256
);
#else
int
block_size
=
512
;
int
block_size
=
512
;
int
grid_size
=
(
nums_output
+
block_size
-
1
)
/
block_size
;
#endif
#define check_case(c_filter_multiplier, c_stride, c_filter) \
#define check_case(c_filter_multiplier, c_stride, c_filter) \
if (c_filter_multiplier == 0 || \
if (c_filter_multiplier == 0 || \
...
@@ -630,7 +639,7 @@ class DepthwiseConvFunctor<platform::CUDADeviceContext, T,
...
@@ -630,7 +639,7 @@ class DepthwiseConvFunctor<platform::CUDADeviceContext, T,
c_filter == -1)) { \
c_filter == -1)) { \
if (c_filter == -1) { \
if (c_filter == -1) { \
threads.x = block_size; \
threads.x = block_size; \
grid.x =
(nums_output + block_size - 1) / block_size;
\
grid.x =
grid_size;
\
threads.y = threads.z = grid.y = grid.z = 1; \
threads.y = threads.z = grid.y = grid.z = 1; \
} \
} \
KernelDepthwiseConvSp< \
KernelDepthwiseConvSp< \
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录