Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
8a5a45f8
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
8a5a45f8
编写于
6月 01, 2021
作者:
W
whs
提交者:
GitHub
6月 01, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix cuda kernel launch of grid sampler (#33100) (#33232)
上级
3fe99ad5
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
56 addition
and
14 deletion
+56
-14
paddle/fluid/operators/grid_sampler_op.cu
paddle/fluid/operators/grid_sampler_op.cu
+14
-12
python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
...n/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
+2
-0
python/paddle/fluid/tests/unittests/test_grid_sampler_op.py
python/paddle/fluid/tests/unittests/test_grid_sampler_op.py
+40
-2
未找到文件。
paddle/fluid/operators/grid_sampler_op.cu
浏览文件 @
8a5a45f8
...
...
@@ -187,7 +187,6 @@ __global__ void grid_sample_cuda_kernel(const int nthreads, int n, int out_c,
int
out_sC
=
out_h
*
out_w
;
int
out_sH
=
out_w
;
int
out_sW
=
1
;
CUDA_KERNEL_LOOP
(
index
,
nthreads
)
{
const
int
w
=
index
%
out_w
;
const
int
h
=
(
index
/
out_w
)
%
out_h
;
...
...
@@ -199,7 +198,6 @@ __global__ void grid_sample_cuda_kernel(const int nthreads, int n, int out_c,
ix
=
compute_positions
(
ix
,
in_w
,
padding_mode
,
align_corners
);
iy
=
compute_positions
(
iy
,
in_h
,
padding_mode
,
align_corners
);
if
(
mode
==
Mode
::
bilinear
)
{
int
ix_nw
=
static_cast
<
int
>
(
floor
(
ix
));
int
iy_nw
=
static_cast
<
int
>
(
floor
(
iy
));
...
...
@@ -216,6 +214,7 @@ __global__ void grid_sample_cuda_kernel(const int nthreads, int n, int out_c,
T
se
=
(
ix
-
ix_nw
)
*
(
iy
-
iy_nw
);
auto
inp_offset_NC
=
n
*
inp_sN
;
auto
out_ptr_NCHW
=
output
+
n
*
out_sN
+
h
*
out_sH
+
w
*
out_sW
;
for
(
int
c
=
0
;
c
<
out_c
;
++
c
,
inp_offset_NC
+=
inp_sC
,
out_ptr_NCHW
+=
out_sC
)
{
...
...
@@ -291,17 +290,17 @@ class GridSampleOpCUDAKernel : public framework::OpKernel<T> {
<<
"; out_w: "
<<
out_w
;
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Output"
);
auto
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
VLOG
(
3
)
<<
"set constant"
;
VLOG
(
3
)
<<
"out dims: "
<<
output
->
dims
()[
0
]
<<
"; "
<<
output
->
dims
()[
1
]
<<
"; "
<<
output
->
dims
()[
2
]
<<
"; "
<<
output
->
dims
()[
3
]
;
math
::
SetConstant
<
paddle
::
platform
::
CUDADeviceContext
,
T
>
()(
dev_ctx
,
output
,
static_cast
<
T
>
(
0
));
int
count
=
static_cast
<
int
>
(
n
*
out_h
*
out_w
);
auto
cu_stream
=
dev_ctx
.
stream
();
int
block
=
512
;
int
grid_size
=
(
count
+
block
-
1
)
/
block
;
grid_sample_cuda_kernel
<
T
><<<
block
,
grid_size
,
0
,
cu_stream
>>>
(
int
block_size
=
512
;
int
grid_size
=
(
count
+
block_size
-
1
)
/
block_size
;
VLOG
(
3
)
<<
"cuda launch - grid dims: "
<<
grid_size
<<
"; block dims"
<<
block_size
;
grid_sample_cuda_kernel
<
T
><<<
grid_size
,
block_size
,
0
,
cu_stream
>>>
(
count
,
n
,
c
,
out_h
,
out_w
,
in_h
,
in_w
,
input
->
data
<
T
>
(),
grid
->
data
<
T
>
(),
output_data
,
mode
,
padding_mode
,
align_corners
);
}
...
...
@@ -475,9 +474,12 @@ class GridSampleGradOpCUDAKernel : public framework::OpKernel<T> {
int
count
=
static_cast
<
int
>
(
n
*
out_h
*
out_w
);
auto
cu_stream
=
dev_ctx
.
stream
();
int
block
=
512
;
int
grid_size
=
(
count
+
block
-
1
)
/
block
;
grid_sampler_cuda_backward_kernel
<
T
><<<
block
,
grid_size
,
0
,
cu_stream
>>>
(
int
block_size
=
512
;
int
grid_size
=
(
count
+
block_size
-
1
)
/
block_size
;
VLOG
(
3
)
<<
"cuda launch grad kernel - grid dims: "
<<
grid_size
<<
"; block dims"
<<
block_size
<<
"; count: "
<<
count
;
grid_sampler_cuda_backward_kernel
<
T
><<<
grid_size
,
block_size
,
0
,
cu_stream
>>>
(
count
,
output_grad
->
data
<
T
>
(),
input
->
data
<
T
>
(),
grid
->
data
<
T
>
(),
n
,
c
,
out_h
,
out_w
,
in_h
,
in_w
,
input_grad
->
data
<
T
>
(),
grid_grad_data
,
mode
,
padding_mode
,
align_corners
);
...
...
python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
浏览文件 @
8a5a45f8
...
...
@@ -19,6 +19,8 @@ import numpy as np
from
op_test
import
OpTest
import
paddle.fluid.core
as
core
import
paddle.fluid
as
fluid
import
paddle
paddle
.
enable_static
()
def
bilinear_interp_np
(
input
,
...
...
python/paddle/fluid/tests/unittests/test_grid_sampler_op.py
浏览文件 @
8a5a45f8
...
...
@@ -12,9 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
import
paddle.fluid.core
as
core
from
op_test
import
OpTest
,
skip_check_grad_ci
paddle
.
enable_static
()
def
AffineGrid
(
theta
,
grid_shape
):
...
...
@@ -159,7 +162,6 @@ class TestGridSamplerOp(OpTest):
"padding_mode"
:
self
.
padding_mode
,
"mode"
:
self
.
mode
}
# print("X: {}".format(x))
self
.
outputs
=
{
'Output'
:
GridSampler
(
x
,
grid
,
self
.
align_corners
,
self
.
mode
,
self
.
padding_mode
)
...
...
@@ -236,5 +238,41 @@ class Case4(TestGridSamplerOp):
self
.
numeric_grad_delta
=
0.0001
@
skip_check_grad_ci
(
reason
=
"'check_grad' on large inputs is too slow, "
+
"however it is desirable to cover the forward pass"
)
class
LargeInputCase
(
TestGridSamplerOp
):
def
get_places
(
self
):
places
=
[]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
core
.
CUDAPlace
(
0
))
return
places
def
initTestCase
(
self
):
self
.
no_need_check_grad
=
True
self
.
x_shape
=
(
2
,
3
,
128
,
128
)
self
.
grid_shape
=
(
2
,
130
,
130
,
2
)
self
.
theta_shape
=
(
2
,
2
,
3
)
self
.
align_corners
=
False
self
.
padding_mode
=
"reflection"
self
.
mode
=
"bilinear"
def
test_check_grad_normal
(
self
):
pass
@
skip_check_grad_ci
(
reason
=
"'check_grad' on large inputs is too slow, "
+
"however it is desirable to cover the forward pass"
)
class
Case5
(
LargeInputCase
):
def
initTestCase
(
self
):
self
.
no_need_check_grad
=
True
self
.
x_shape
=
(
2
,
3
,
128
,
128
)
self
.
grid_shape
=
(
2
,
130
,
130
,
2
)
self
.
theta_shape
=
(
2
,
2
,
3
)
self
.
align_corners
=
True
self
.
padding_mode
=
"zeros"
self
.
mode
=
"bilinear"
self
.
use_cudnn
=
False
if
core
.
is_compiled_with_rocm
()
else
True
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录