Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
00efd1d8
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
00efd1d8
编写于
9月 17, 2019
作者:
C
chengjuntao
提交者:
GitHub
9月 17, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add deformable conv v1 op and cpu version of deformable conv v2 (#18500)
* add deformable conv v1 op, test=develop
上级
40c66f8d
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
2627 addition
and
90 deletion
+2627
-90
cmake/operators.cmake
cmake/operators.cmake
+1
-1
paddle/fluid/API.spec
paddle/fluid/API.spec
+1
-1
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+1
-3
paddle/fluid/operators/deformable_conv_filter.cu.h
paddle/fluid/operators/deformable_conv_filter.cu.h
+37
-0
paddle/fluid/operators/deformable_conv_func.h
paddle/fluid/operators/deformable_conv_func.h
+149
-0
paddle/fluid/operators/deformable_conv_op.cc
paddle/fluid/operators/deformable_conv_op.cc
+8
-2
paddle/fluid/operators/deformable_conv_op.cu
paddle/fluid/operators/deformable_conv_op.cu
+1
-0
paddle/fluid/operators/deformable_conv_op.h
paddle/fluid/operators/deformable_conv_op.h
+613
-0
paddle/fluid/operators/deformable_conv_v1_op.cc
paddle/fluid/operators/deformable_conv_v1_op.cc
+272
-0
paddle/fluid/operators/deformable_conv_v1_op.cu
paddle/fluid/operators/deformable_conv_v1_op.cu
+609
-0
paddle/fluid/operators/deformable_conv_v1_op.h
paddle/fluid/operators/deformable_conv_v1_op.h
+564
-0
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+63
-24
python/paddle/fluid/tests/unittests/test_deformable_conv_op.py
...n/paddle/fluid/tests/unittests/test_deformable_conv_op.py
+20
-33
python/paddle/fluid/tests/unittests/test_deformable_conv_v1_op.py
...addle/fluid/tests/unittests/test_deformable_conv_v1_op.py
+240
-0
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+48
-26
未找到文件。
cmake/operators.cmake
浏览文件 @
00efd1d8
...
@@ -110,7 +110,7 @@ function(op_library TARGET)
...
@@ -110,7 +110,7 @@ function(op_library TARGET)
# Define operators that don't need pybind here.
# Define operators that don't need pybind here.
foreach
(
manual_pybind_op
"compare_op"
"logical_op"
"nccl_op"
foreach
(
manual_pybind_op
"compare_op"
"logical_op"
"nccl_op"
"tensor_array_read_write_op"
"tensorrt_engine_op"
"conv_fusion_op"
"tensor_array_read_write_op"
"tensorrt_engine_op"
"conv_fusion_op"
"fusion_transpose_flatten_concat_op"
"fusion_conv_inception_op"
"sync_batch_norm_op"
"d
eformable_conv_op"
"d
gc_op"
)
"fusion_transpose_flatten_concat_op"
"fusion_conv_inception_op"
"sync_batch_norm_op"
"dgc_op"
)
if
(
"
${
TARGET
}
"
STREQUAL
"
${
manual_pybind_op
}
"
)
if
(
"
${
TARGET
}
"
STREQUAL
"
${
manual_pybind_op
}
"
)
set
(
pybind_flag 1
)
set
(
pybind_flag 1
)
endif
()
endif
()
...
...
paddle/fluid/API.spec
100755 → 100644
浏览文件 @
00efd1d8
...
@@ -285,7 +285,7 @@ paddle.fluid.layers.fsp_matrix (ArgSpec(args=['x', 'y'], varargs=None, keywords=
...
@@ -285,7 +285,7 @@ paddle.fluid.layers.fsp_matrix (ArgSpec(args=['x', 'y'], varargs=None, keywords=
paddle.fluid.layers.continuous_value_model (ArgSpec(args=['input', 'cvm', 'use_cvm'], varargs=None, keywords=None, defaults=(True,)), ('document', 'c03490ffaa1b78258747157c313db4cd'))
paddle.fluid.layers.continuous_value_model (ArgSpec(args=['input', 'cvm', 'use_cvm'], varargs=None, keywords=None, defaults=(True,)), ('document', 'c03490ffaa1b78258747157c313db4cd'))
paddle.fluid.layers.where (ArgSpec(args=['condition'], varargs=None, keywords=None, defaults=None), ('document', 'b1e1487760295e1ff55307b880a99e18'))
paddle.fluid.layers.where (ArgSpec(args=['condition'], varargs=None, keywords=None, defaults=None), ('document', 'b1e1487760295e1ff55307b880a99e18'))
paddle.fluid.layers.sign (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', 'fa2f457a81714430c5677c2d68744728'))
paddle.fluid.layers.sign (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', 'fa2f457a81714430c5677c2d68744728'))
paddle.fluid.layers.deformable_conv (ArgSpec(args=['input', 'offset', 'mask', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'deformable_groups', 'im2col_step', 'param_attr', 'bias_attr', '
name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, None, None, None)), ('document', '4d83ba6b971cfd590493b0925b3e081e
'))
paddle.fluid.layers.deformable_conv (ArgSpec(args=['input', 'offset', 'mask', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'deformable_groups', 'im2col_step', 'param_attr', 'bias_attr', '
modulated', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, None, None, True, None)), ('document', '335193ac57d41d7199f8d26d30c069b1
'))
paddle.fluid.layers.unfold (ArgSpec(args=['x', 'kernel_sizes', 'strides', 'paddings', 'dilations', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None)), ('document', '3f884662ad443d9ecc2b3734b4f61ad6'))
paddle.fluid.layers.unfold (ArgSpec(args=['x', 'kernel_sizes', 'strides', 'paddings', 'dilations', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None)), ('document', '3f884662ad443d9ecc2b3734b4f61ad6'))
paddle.fluid.layers.deformable_roi_pooling (ArgSpec(args=['input', 'rois', 'trans', 'no_trans', 'spatial_scale', 'group_size', 'pooled_height', 'pooled_width', 'part_size', 'sample_per_part', 'trans_std', 'position_sensitive', 'name'], varargs=None, keywords=None, defaults=(False, 1.0, [1, 1], 1, 1, None, 1, 0.1, False, None)), ('document', '99c03e3f249e36854f87dedaa17c8f35'))
paddle.fluid.layers.deformable_roi_pooling (ArgSpec(args=['input', 'rois', 'trans', 'no_trans', 'spatial_scale', 'group_size', 'pooled_height', 'pooled_width', 'part_size', 'sample_per_part', 'trans_std', 'position_sensitive', 'name'], varargs=None, keywords=None, defaults=(False, 1.0, [1, 1], 1, 1, None, 1, 0.1, False, None)), ('document', '99c03e3f249e36854f87dedaa17c8f35'))
paddle.fluid.layers.match_matrix_tensor (ArgSpec(args=['x', 'y', 'channel_num', 'act', 'param_attr', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, 'float32', None)), ('document', 'b6ea7d4ddeacae85e37d1e47d5262948'))
paddle.fluid.layers.match_matrix_tensor (ArgSpec(args=['x', 'y', 'channel_num', 'act', 'param_attr', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, 'float32', None)), ('document', 'b6ea7d4ddeacae85e37d1e47d5262948'))
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
00efd1d8
...
@@ -55,7 +55,7 @@ if (NOT WITH_MKL)
...
@@ -55,7 +55,7 @@ if (NOT WITH_MKL)
endif
()
endif
()
register_operators
(
EXCLUDES py_func_op warpctc_op dgc_op conv_fusion_op
register_operators
(
EXCLUDES py_func_op warpctc_op dgc_op conv_fusion_op
sync_batch_norm_op
deformable_conv_op
${
OP_ONLY_MKL
}
DEPS
${
OP_HEADER_DEPS
}
${
OP_PREFETCH_DEPS
}
)
sync_batch_norm_op
${
OP_ONLY_MKL
}
DEPS
${
OP_HEADER_DEPS
}
${
OP_PREFETCH_DEPS
}
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
# warpctc_op needs cudnn 7 above
# warpctc_op needs cudnn 7 above
...
@@ -73,8 +73,6 @@ if (WITH_GPU)
...
@@ -73,8 +73,6 @@ if (WITH_GPU)
op_library
(
sync_batch_norm_op
)
op_library
(
sync_batch_norm_op
)
file
(
APPEND
${
pybind_file
}
"USE_CUDA_ONLY_OP(sync_batch_norm);
\n
"
)
file
(
APPEND
${
pybind_file
}
"USE_CUDA_ONLY_OP(sync_batch_norm);
\n
"
)
endif
()
endif
()
op_library
(
deformable_conv_op
)
file
(
APPEND
${
pybind_file
}
"USE_CUDA_ONLY_OP(deformable_conv);
\n
"
)
else
()
else
()
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale
)
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale
)
endif
()
endif
()
...
...
paddle/fluid/operators/deformable_conv_filter.cu.h
0 → 100644
浏览文件 @
00efd1d8
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Part of the following code in this file refs to
// https://github.com/msracver/Deformable-ConvNets/blob/master/faster_rcnn/operator_cxx/deformable_convolution.cu
//
// Copyright (c) 2017 Microsoft
// Licensed under The Apache-2.0 License [see LICENSE for details]
// \file deformable_psroi_pooling.cu
// \brief
// \author Yi Li, Guodong Zhang, Jifeng Dai
#pragma once
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/math_function.h"
template
<
typename
T
>
__global__
void
FilterGradAddupCUDAKernel
(
const
int
nthreads
,
const
int
n
,
const
int
height
,
const
int
width
,
const
T
*
dweight_3d
,
T
*
filter_grad
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
offset
=
blockDim
.
x
*
gridDim
.
x
;
for
(
size_t
i
=
index
;
i
<
nthreads
;
i
+=
offset
)
{
filter_grad
[
i
]
=
filter_grad
[
i
]
+
dweight_3d
[
i
];
}
}
paddle/fluid/operators/deformable_conv_func.h
0 → 100644
浏览文件 @
00efd1d8
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Part of the following code in this file refs to
// https://github.com/msracver/Deformable-ConvNets/blob/master/faster_rcnn/operator_cxx/deformable_convolution.cu
//
// Copyright (c) 2017 Microsoft
// Licensed under The Apache-2.0 License [see LICENSE for details]
// \file deformable_psroi_pooling.cu
// \brief
// \author Yi Li, Guodong Zhang, Jifeng Dai
#pragma once
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/hostdevice.h"
template
<
typename
T
>
HOSTDEVICE
T
DmcnGetGradientWeight
(
T
argmax_h
,
T
argmax_w
,
const
int
h
,
const
int
w
,
const
int
height
,
const
int
width
)
{
if
(
argmax_h
<=
-
1
||
argmax_h
>=
height
||
argmax_w
<=
-
1
||
argmax_w
>=
width
)
{
return
0
;
}
int
argmax_h_low
=
floor
(
argmax_h
);
int
argmax_w_low
=
floor
(
argmax_w
);
int
argmax_h_high
=
argmax_h_low
+
1
;
int
argmax_w_high
=
argmax_w_low
+
1
;
T
weight
=
0
;
weight
=
(
h
==
argmax_h_low
&&
w
==
argmax_w_low
)
?
(
h
+
1
-
argmax_h
)
*
(
w
+
1
-
argmax_w
)
:
weight
;
weight
=
(
h
==
argmax_h_low
&&
w
==
argmax_w_high
)
?
(
h
+
1
-
argmax_h
)
*
(
argmax_w
+
1
-
w
)
:
weight
;
weight
=
(
h
==
argmax_h_high
&&
w
==
argmax_w_low
)
?
(
argmax_h
+
1
-
h
)
*
(
w
+
1
-
argmax_w
)
:
weight
;
weight
=
(
h
==
argmax_h_high
&&
w
==
argmax_w_high
)
?
(
argmax_h
+
1
-
h
)
*
(
argmax_w
+
1
-
w
)
:
weight
;
return
weight
;
}
template
<
typename
T
>
HOSTDEVICE
T
DmcnGetCoordinateWeight
(
T
argmax_h
,
T
argmax_w
,
const
int
height
,
const
int
width
,
const
T
*
im_data
,
const
int
data_width
,
const
int
bp_dir
)
{
if
(
argmax_h
<=
-
1
||
argmax_h
>=
height
||
argmax_w
<=
-
1
||
argmax_w
>=
width
)
{
return
0
;
}
int
argmax_h_low
=
floor
(
argmax_h
);
int
argmax_w_low
=
floor
(
argmax_w
);
int
argmax_h_high
=
argmax_h_low
+
1
;
int
argmax_w_high
=
argmax_w_low
+
1
;
T
weight
=
0
;
if
(
bp_dir
==
0
)
{
weight
+=
(
argmax_h_low
>=
0
&&
argmax_w_low
>=
0
)
?
-
1
*
(
argmax_w_low
+
1
-
argmax_w
)
*
im_data
[
argmax_h_low
*
data_width
+
argmax_w_low
]
:
0
;
weight
+=
(
argmax_h_low
>=
0
&&
argmax_w_high
<=
width
-
1
)
?
-
1
*
(
argmax_w
-
argmax_w_low
)
*
im_data
[
argmax_h_low
*
data_width
+
argmax_w_high
]
:
0
;
weight
+=
(
argmax_h_high
<=
height
-
1
&&
argmax_w_low
>=
0
)
?
(
argmax_w_low
+
1
-
argmax_w
)
*
im_data
[
argmax_h_high
*
data_width
+
argmax_w_low
]
:
0
;
weight
+=
(
argmax_h_high
<=
height
-
1
&&
argmax_w_high
<=
width
-
1
)
?
(
argmax_w
-
argmax_w_low
)
*
im_data
[
argmax_h_high
*
data_width
+
argmax_w_high
]
:
0
;
}
else
if
(
bp_dir
==
1
)
{
weight
+=
(
argmax_h_low
>=
0
&&
argmax_w_low
>=
0
)
?
-
1
*
(
argmax_h_low
+
1
-
argmax_h
)
*
im_data
[
argmax_h_low
*
data_width
+
argmax_w_low
]
:
0
;
weight
+=
(
argmax_h_low
>=
0
&&
argmax_w_high
<=
width
-
1
)
?
(
argmax_h_low
+
1
-
argmax_h
)
*
im_data
[
argmax_h_low
*
data_width
+
argmax_w_high
]
:
0
;
weight
+=
(
argmax_h_high
<=
height
-
1
&&
argmax_w_low
>=
0
)
?
-
1
*
(
argmax_h
-
argmax_h_low
)
*
im_data
[
argmax_h_high
*
data_width
+
argmax_w_low
]
:
0
;
weight
+=
(
argmax_h_high
<=
height
-
1
&&
argmax_w_high
<=
width
-
1
)
?
(
argmax_h
-
argmax_h_low
)
*
im_data
[
argmax_h_high
*
data_width
+
argmax_w_high
]
:
0
;
}
return
weight
;
}
template
<
typename
T
>
HOSTDEVICE
T
DmcnIm2colBilinear
(
const
T
*
bottom_data
,
const
int
data_width
,
const
int
height
,
const
int
width
,
T
h
,
T
w
)
{
int
h_low
=
floor
(
h
);
int
w_low
=
floor
(
w
);
int
h_high
=
h_low
+
1
;
int
w_high
=
w_low
+
1
;
T
lh
=
h
-
h_low
;
T
lw
=
w
-
w_low
;
T
hh
=
1
-
lh
;
T
hw
=
1
-
lw
;
T
v1
=
(
h_low
>=
0
&&
w_low
>=
0
)
?
bottom_data
[
h_low
*
data_width
+
w_low
]
:
0
;
T
v2
=
(
h_low
>=
0
&&
w_high
<=
width
-
1
)
?
bottom_data
[
h_low
*
data_width
+
w_high
]
:
0
;
T
v3
=
(
h_high
<=
height
-
1
&&
w_low
>=
0
)
?
bottom_data
[
h_high
*
data_width
+
w_low
]
:
0
;
T
v4
=
(
h_high
<=
height
-
1
&&
w_high
<=
width
-
1
)
?
bottom_data
[
h_high
*
data_width
+
w_high
]
:
0
;
T
w1
=
hh
*
hw
;
T
w2
=
hh
*
lw
;
T
w3
=
lh
*
hw
;
T
w4
=
lh
*
lw
;
return
w1
*
v1
+
w2
*
v2
+
w3
*
v3
+
w4
*
v4
;
}
paddle/fluid/operators/deformable_conv_op.cc
浏览文件 @
00efd1d8
...
@@ -12,6 +12,8 @@
...
@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/operators/deformable_conv_op.h"
#include <memory>
#include "paddle/fluid/operators/conv_op.h"
#include "paddle/fluid/operators/conv_op.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -197,7 +199,6 @@ class DeformableConvOp : public framework::OperatorWithKernel {
...
@@ -197,7 +199,6 @@ class DeformableConvOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ
(
mask_dims
[
1
]
/
(
filter_dims
[
2
]
*
filter_dims
[
3
]),
PADDLE_ENFORCE_EQ
(
mask_dims
[
1
]
/
(
filter_dims
[
2
]
*
filter_dims
[
3
]),
deformable_groups
,
deformable_groups
,
"mask filter must divide deformable group size."
);
"mask filter must divide deformable group size."
);
ctx
->
SetOutputDim
(
"Output"
,
framework
::
make_ddim
(
output_shape
));
ctx
->
SetOutputDim
(
"Output"
,
framework
::
make_ddim
(
output_shape
));
}
}
...
@@ -274,5 +275,10 @@ namespace ops = paddle::operators;
...
@@ -274,5 +275,10 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR
(
deformable_conv
,
ops
::
DeformableConvOp
,
REGISTER_OPERATOR
(
deformable_conv
,
ops
::
DeformableConvOp
,
ops
::
DeformableConvOpMaker
,
ops
::
DeformableConvOpMaker
,
ops
::
DeformableConvGradOpDescMaker
);
ops
::
DeformableConvGradOpDescMaker
);
REGISTER_OPERATOR
(
deformable_conv_grad
,
ops
::
DeformableConvGradOp
);
REGISTER_OPERATOR
(
deformable_conv_grad
,
ops
::
DeformableConvGradOp
);
REGISTER_OP_CPU_KERNEL
(
deformable_conv
,
ops
::
DeformableConvCPUKernel
<
float
>
,
ops
::
DeformableConvCPUKernel
<
double
>
);
REGISTER_OP_CPU_KERNEL
(
deformable_conv_grad
,
ops
::
DeformableConvGradCPUKernel
<
float
>
,
ops
::
DeformableConvGradCPUKernel
<
double
>
);
paddle/fluid/operators/deformable_conv_op.cu
浏览文件 @
00efd1d8
...
@@ -24,6 +24,7 @@
...
@@ -24,6 +24,7 @@
#include <algorithm>
#include <algorithm>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/deformable_conv_op.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/cuda_primitives.h"
...
...
paddle/fluid/operators/deformable_conv_op.h
0 → 100644
浏览文件 @
00efd1d8
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Part of the following code in this file refs to
// https://github.com/msracver/Deformable-ConvNets/blob/master/faster_rcnn/operator_cxx/deformable_convolution.cu
//
// Copyright (c) 2017 Microsoft
// Licensed under The Apache-2.0 License [see LICENSE for details]
// \file deformable_psroi_pooling.cu
// \brief
// \author Yi Li, Guodong Zhang, Jifeng Dai
#pragma once
#include <algorithm>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/deformable_conv_func.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
CPUDeviceContext
=
platform
::
CPUDeviceContext
;
template
<
typename
T
>
void
ModulatedDeformableCol2imCPUKernel
(
const
int
num_kernels
,
const
T
*
data_col
,
const
T
*
data_offset
,
const
T
*
data_mask
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
channel_per_deformable_group
,
const
int
batch_size
,
const
int
deformable_group
,
const
int
height_col
,
const
int
width_col
,
T
*
grad_im
)
{
for
(
size_t
thread
=
0
;
thread
<
num_kernels
;
thread
++
)
{
const
int
j
=
(
thread
/
width_col
/
height_col
/
batch_size
)
%
kernel_w
;
const
int
i
=
(
thread
/
width_col
/
height_col
/
batch_size
/
kernel_w
)
%
kernel_h
;
const
int
c
=
thread
/
width_col
/
height_col
/
batch_size
/
kernel_w
/
kernel_h
;
const
int
deformable_group_index
=
c
/
channel_per_deformable_group
;
int
w_out
=
thread
%
width_col
;
int
h_out
=
(
thread
/
width_col
)
%
height_col
;
int
b
=
(
thread
/
width_col
/
height_col
)
%
batch_size
;
int
w_in
=
w_out
*
stride_w
-
pad_w
;
int
h_in
=
h_out
*
stride_h
-
pad_h
;
const
T
*
data_offset_ptr
=
data_offset
+
(
b
*
deformable_group
+
deformable_group_index
)
*
2
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
const
T
*
data_mask_ptr
=
data_mask
+
(
b
*
deformable_group
+
deformable_group_index
)
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
const
int
data_offset_h_ptr
=
((
2
*
(
i
*
kernel_w
+
j
))
*
height_col
+
h_out
)
*
width_col
+
w_out
;
const
int
data_offset_w_ptr
=
((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_out
)
*
width_col
+
w_out
;
const
int
data_mask_hw_ptr
=
((
i
*
kernel_w
+
j
)
*
height_col
+
h_out
)
*
width_col
+
w_out
;
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
];
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
];
const
T
mask
=
data_mask_ptr
[
data_mask_hw_ptr
];
const
T
cur_inv_h_data
=
h_in
+
i
*
dilation_h
+
offset_h
;
const
T
cur_inv_w_data
=
w_in
+
j
*
dilation_w
+
offset_w
;
const
T
cur_top_grad
=
data_col
[
thread
]
*
mask
;
const
int
cur_h
=
static_cast
<
int
>
(
cur_inv_h_data
);
const
int
cur_w
=
static_cast
<
int
>
(
cur_inv_w_data
);
for
(
int
dy
=
-
2
;
dy
<=
2
;
dy
++
)
{
for
(
int
dx
=
-
2
;
dx
<=
2
;
dx
++
)
{
if
(
cur_h
+
dy
>=
0
&&
cur_h
+
dy
<
height
&&
cur_w
+
dx
>=
0
&&
cur_w
+
dx
<
width
&&
abs
(
cur_inv_h_data
-
(
cur_h
+
dy
))
<
1
&&
abs
(
cur_inv_w_data
-
(
cur_w
+
dx
))
<
1
)
{
int
cur_bottom_grad_pos
=
((
b
*
channels
+
c
)
*
height
+
cur_h
+
dy
)
*
width
+
cur_w
+
dx
;
T
weight
=
DmcnGetGradientWeight
(
cur_inv_h_data
,
cur_inv_w_data
,
cur_h
+
dy
,
cur_w
+
dx
,
height
,
width
);
*
(
grad_im
+
cur_bottom_grad_pos
)
=
*
(
grad_im
+
cur_bottom_grad_pos
)
+
weight
*
cur_top_grad
;
}
}
}
}
}
template
<
typename
T
>
static
inline
void
ModulatedDeformableCol2imCPU
(
const
platform
::
CPUDeviceContext
&
ctx
,
const
T
*
data_col
,
const
T
*
data_offset
,
const
T
*
data_mask
,
const
std
::
vector
<
int64_t
>
im_shape
,
const
std
::
vector
<
int64_t
>
col_shape
,
const
std
::
vector
<
int64_t
>
kernel_shape
,
const
std
::
vector
<
int
>
pad
,
const
std
::
vector
<
int
>
stride
,
const
std
::
vector
<
int
>
dilation
,
const
int
deformable_group
,
T
*
grad_im
)
{
int
channel_per_deformable_group
=
im_shape
[
0
]
/
deformable_group
;
int
num_kernels
=
col_shape
[
0
]
*
col_shape
[
1
]
*
col_shape
[
2
]
*
col_shape
[
3
];
ModulatedDeformableCol2imCPUKernel
(
num_kernels
,
data_col
,
data_offset
,
data_mask
,
im_shape
[
0
],
im_shape
[
1
],
im_shape
[
2
],
kernel_shape
[
2
],
kernel_shape
[
3
],
pad
[
0
],
pad
[
1
],
stride
[
0
],
stride
[
1
],
dilation
[
0
],
dilation
[
1
],
channel_per_deformable_group
,
col_shape
[
1
],
deformable_group
,
col_shape
[
2
],
col_shape
[
3
],
grad_im
);
}
template
<
typename
T
>
void
ModulatedDeformableCol2imCoordCPUKernel
(
const
int
num_kernels
,
const
T
*
data_col
,
const
T
*
data_im
,
const
T
*
data_offset
,
const
T
*
data_mask
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
channel_per_deformable_group
,
const
int
batch_size
,
const
int
offset_channels
,
const
int
deformable_group
,
const
int
height_col
,
const
int
width_col
,
T
*
grad_offset
,
T
*
grad_mask
)
{
for
(
size_t
i
=
0
;
i
<
num_kernels
;
i
++
)
{
T
val
=
0
,
mval
=
0
;
const
int
w
=
i
%
width_col
;
const
int
h
=
(
i
/
width_col
)
%
height_col
;
const
int
c
=
(
i
/
width_col
/
height_col
)
%
offset_channels
;
const
int
b
=
(
i
/
width_col
/
height_col
)
/
offset_channels
;
const
int
deformable_group_index
=
c
/
(
2
*
kernel_h
*
kernel_w
);
const
int
col_step
=
kernel_h
*
kernel_w
;
int
cnt
=
0
;
const
T
*
data_col_ptr
=
data_col
+
deformable_group_index
*
channel_per_deformable_group
*
batch_size
*
width_col
*
height_col
;
const
T
*
data_im_ptr
=
data_im
+
(
b
*
deformable_group
+
deformable_group_index
)
*
channel_per_deformable_group
/
kernel_h
/
kernel_w
*
height
*
width
;
const
T
*
data_offset_ptr
=
data_offset
+
(
b
*
deformable_group
+
deformable_group_index
)
*
2
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
const
T
*
data_mask_ptr
=
data_mask
+
(
b
*
deformable_group
+
deformable_group_index
)
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
const
int
offset_c
=
c
-
deformable_group_index
*
2
*
kernel_h
*
kernel_w
;
for
(
int
col_c
=
offset_c
/
2
;
col_c
<
channel_per_deformable_group
;
col_c
+=
col_step
)
{
const
int
col_pos
=
(((
col_c
*
batch_size
+
b
)
*
height_col
)
+
h
)
*
width_col
+
w
;
const
int
bp_dir
=
offset_c
%
2
;
int
j
=
(
col_pos
/
width_col
/
height_col
/
batch_size
)
%
kernel_w
;
int
i
=
(
col_pos
/
width_col
/
height_col
/
batch_size
/
kernel_w
)
%
kernel_h
;
int
w_out
=
col_pos
%
width_col
;
int
h_out
=
(
col_pos
/
width_col
)
%
height_col
;
int
w_in
=
w_out
*
stride_w
-
pad_w
;
int
h_in
=
h_out
*
stride_h
-
pad_h
;
const
int
data_offset_h_ptr
=
(((
2
*
(
i
*
kernel_w
+
j
))
*
height_col
+
h_out
)
*
width_col
+
w_out
);
const
int
data_offset_w_ptr
=
(((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_out
)
*
width_col
+
w_out
);
const
int
data_mask_hw_ptr
=
(((
i
*
kernel_w
+
j
)
*
height_col
+
h_out
)
*
width_col
+
w_out
);
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
];
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
];
const
T
mask
=
data_mask_ptr
[
data_mask_hw_ptr
];
T
inv_h
=
h_in
+
i
*
dilation_h
+
offset_h
;
T
inv_w
=
w_in
+
j
*
dilation_w
+
offset_w
;
if
(
inv_h
<=
-
1
||
inv_w
<=
-
1
||
inv_h
>=
height
||
inv_w
>=
width
)
{
inv_h
=
inv_w
=
-
2
;
}
else
{
mval
+=
data_col_ptr
[
col_pos
]
*
DmcnIm2colBilinear
(
data_im_ptr
+
cnt
*
height
*
width
,
width
,
height
,
width
,
inv_h
,
inv_w
);
}
const
T
weight
=
DmcnGetCoordinateWeight
(
inv_h
,
inv_w
,
height
,
width
,
data_im_ptr
+
cnt
*
height
*
width
,
width
,
bp_dir
);
val
+=
weight
*
data_col_ptr
[
col_pos
]
*
mask
;
cnt
+=
1
;
}
grad_offset
[
i
]
=
val
;
if
(
offset_c
%
2
==
0
)
grad_mask
[(((
b
*
deformable_group
+
deformable_group_index
)
*
kernel_h
*
kernel_w
+
offset_c
/
2
)
*
height_col
+
h
)
*
width_col
+
w
]
=
mval
;
}
}
template
<
typename
T
>
static
inline
void
ModulatedDeformableCol2imCoordCPU
(
const
platform
::
CPUDeviceContext
&
ctx
,
const
T
*
data_col
,
const
T
*
data_im
,
const
T
*
data_offset
,
const
T
*
data_mask
,
const
std
::
vector
<
int64_t
>
im_shape
,
const
std
::
vector
<
int64_t
>
col_shape
,
const
std
::
vector
<
int64_t
>
kernel_shape
,
const
std
::
vector
<
int
>
paddings
,
const
std
::
vector
<
int
>
strides
,
const
std
::
vector
<
int
>
dilations
,
const
int
deformable_groups
,
T
*
grad_offset
,
T
*
grad_mask
)
{
int
num_kernels
=
2
*
kernel_shape
[
2
]
*
kernel_shape
[
3
]
*
col_shape
[
1
]
*
col_shape
[
2
]
*
col_shape
[
3
]
*
deformable_groups
;
int
channel_per_deformable_group
=
col_shape
[
0
]
/
deformable_groups
;
ModulatedDeformableCol2imCoordCPUKernel
(
num_kernels
,
data_col
,
data_im
,
data_offset
,
data_mask
,
im_shape
[
0
],
im_shape
[
1
],
im_shape
[
2
],
kernel_shape
[
2
],
kernel_shape
[
3
],
paddings
[
0
],
paddings
[
1
],
strides
[
0
],
strides
[
1
],
dilations
[
0
],
dilations
[
1
],
channel_per_deformable_group
,
col_shape
[
1
],
2
*
kernel_shape
[
2
]
*
kernel_shape
[
3
]
*
deformable_groups
,
deformable_groups
,
col_shape
[
2
],
col_shape
[
3
],
grad_offset
,
grad_mask
);
}
template
<
typename
T
>
void
ModulatedDeformableIm2colCPUKernel
(
const
int
num_kernels
,
const
T
*
data_im
,
const
T
*
data_offset
,
const
T
*
data_mask
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
channel_per_deformable_group
,
const
int
batch_size
,
const
int
num_channels
,
const
int
deformable_group
,
const
int
height_col
,
const
int
width_col
,
T
*
data_col
)
{
for
(
size_t
i
=
0
;
i
<
num_kernels
;
i
++
)
{
const
int
w_col
=
i
%
width_col
;
const
int
h_col
=
(
i
/
width_col
)
%
height_col
;
const
int
b_col
=
(
i
/
width_col
)
/
height_col
%
batch_size
;
const
int
c_im
=
(
i
/
width_col
/
height_col
)
/
batch_size
;
const
int
c_col
=
c_im
*
kernel_h
*
kernel_w
;
const
int
deformable_group_index
=
c_im
/
channel_per_deformable_group
;
const
int
h_in
=
h_col
*
stride_h
-
pad_h
;
const
int
w_in
=
w_col
*
stride_w
-
pad_w
;
T
*
data_col_ptr
=
data_col
+
((
c_col
*
batch_size
+
b_col
)
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
T
*
data_im_ptr
=
data_im
+
(
b_col
*
num_channels
+
c_im
)
*
height
*
width
;
const
T
*
data_offset_ptr
=
data_offset
+
(
b_col
*
deformable_group
+
deformable_group_index
)
*
2
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
const
T
*
data_mask_ptr
=
data_mask
+
(
b_col
*
deformable_group
+
deformable_group_index
)
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
for
(
int
i
=
0
;
i
<
kernel_h
;
++
i
)
{
for
(
int
j
=
0
;
j
<
kernel_w
;
++
j
)
{
const
int
data_offset_h_ptr
=
((
2
*
(
i
*
kernel_w
+
j
))
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
int
data_offset_w_ptr
=
((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
int
data_mask_hw_ptr
=
((
i
*
kernel_w
+
j
)
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
];
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
];
const
T
mask
=
data_mask_ptr
[
data_mask_hw_ptr
];
T
val
=
static_cast
<
T
>
(
0
);
const
T
h_im
=
h_in
+
i
*
dilation_h
+
offset_h
;
const
T
w_im
=
w_in
+
j
*
dilation_w
+
offset_w
;
if
(
h_im
>
-
1
&&
w_im
>
-
1
&&
h_im
<
height
&&
w_im
<
width
)
{
val
=
DmcnIm2colBilinear
(
data_im_ptr
,
width
,
height
,
width
,
h_im
,
w_im
);
}
*
data_col_ptr
=
val
*
mask
;
data_col_ptr
+=
batch_size
*
height_col
*
width_col
;
}
}
}
}
template
<
typename
T
>
static
inline
void
ModulatedDeformableIm2colCPU
(
const
platform
::
CPUDeviceContext
&
ctx
,
const
T
*
data_im
,
const
T
*
data_offset
,
const
T
*
data_mask
,
const
std
::
vector
<
int64_t
>
im_shape
,
const
std
::
vector
<
int64_t
>
col_shape
,
const
std
::
vector
<
int64_t
>
filter_shape
,
const
std
::
vector
<
int
>
paddings
,
const
std
::
vector
<
int
>
strides
,
const
std
::
vector
<
int
>
dilations
,
const
int
deformable_groups
,
T
*
data_col
)
{
int
channel_per_deformable_group
=
im_shape
[
0
]
/
deformable_groups
;
int
num_kernels
=
im_shape
[
0
]
*
col_shape
[
1
]
*
col_shape
[
2
]
*
col_shape
[
3
];
// get outputs of im2col with offset by bilinear interpolation
ModulatedDeformableIm2colCPUKernel
(
num_kernels
,
data_im
,
data_offset
,
data_mask
,
im_shape
[
1
],
im_shape
[
2
],
filter_shape
[
2
],
filter_shape
[
3
],
paddings
[
0
],
paddings
[
1
],
strides
[
0
],
strides
[
1
],
dilations
[
0
],
dilations
[
1
],
channel_per_deformable_group
,
col_shape
[
1
],
im_shape
[
0
],
deformable_groups
,
col_shape
[
2
],
col_shape
[
3
],
data_col
);
}
template
<
typename
T
>
void
FilterGradAddupCPUKernel
(
const
int
nthreads
,
const
int
n
,
const
int
height
,
const
int
width
,
const
T
*
dweight_3d
,
T
*
filter_grad
)
{
for
(
size_t
i
=
0
;
i
<
nthreads
;
i
++
)
{
filter_grad
[
i
]
=
filter_grad
[
i
]
+
dweight_3d
[
i
];
}
}
template
<
typename
T
>
class
DeformableConvCPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
*
offset
=
ctx
.
Input
<
Tensor
>
(
"Offset"
);
auto
*
mask
=
ctx
.
Input
<
Tensor
>
(
"Mask"
);
Tensor
filter
=
*
ctx
.
Input
<
Tensor
>
(
"Filter"
);
Tensor
*
output
=
ctx
.
Output
<
Tensor
>
(
"Output"
);
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
&
dev_ctx
=
ctx
.
template
device_context
<
CPUDeviceContext
>();
const
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
const
int
deformable_groups
=
ctx
.
Attr
<
int
>
(
"deformable_groups"
);
const
int
im2col_step
=
ctx
.
Attr
<
int
>
(
"im2col_step"
);
const
std
::
vector
<
int
>
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
const
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
const
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
std
::
vector
<
int64_t
>
filter_shape_vec
(
framework
::
vectorize
(
filter
.
dims
()));
std
::
vector
<
int64_t
>
output_shape_vec
(
framework
::
vectorize
(
output
->
dims
()));
// col_shape_vec: {c_i * k_h * k_w, im2col_step, o_h, o_w}
std
::
vector
<
int64_t
>
col_buffer_shape_vec
(
filter_shape_vec
.
size
());
col_buffer_shape_vec
[
0
]
=
input
->
dims
()[
1
]
*
filter
.
dims
()[
2
]
*
filter
.
dims
()[
3
];
col_buffer_shape_vec
[
1
]
=
im2col_step
;
for
(
size_t
j
=
0
;
j
<
filter_shape_vec
.
size
()
-
2
;
++
j
)
{
col_buffer_shape_vec
[
j
+
2
]
=
output_shape_vec
[
j
+
2
];
}
framework
::
DDim
col_shape
(
framework
::
make_ddim
(
col_buffer_shape_vec
));
std
::
vector
<
int64_t
>
output_buffer_shape_vec
(
1
);
output_buffer_shape_vec
[
0
]
=
batch_size
*
output_shape_vec
[
1
]
*
output_shape_vec
[
2
]
*
output_shape_vec
[
3
];
framework
::
DDim
output_shape
(
framework
::
make_ddim
(
output_buffer_shape_vec
));
Tensor
col_buffer
;
Tensor
output_buffer
;
col_buffer
=
ctx
.
AllocateTmpTensor
<
T
,
CPUDeviceContext
>
(
col_shape
,
dev_ctx
);
output_buffer
=
ctx
.
AllocateTmpTensor
<
T
,
CPUDeviceContext
>
(
output_shape
,
dev_ctx
);
int64_t
M
=
output_shape_vec
[
1
]
/
groups
;
int64_t
N
=
im2col_step
*
output_shape_vec
[
2
]
*
output_shape_vec
[
3
];
int64_t
K
=
input
->
dims
()[
1
]
*
filter_shape_vec
[
2
]
*
filter_shape_vec
[
3
]
/
groups
;
Tensor
weight_3d
;
weight_3d
.
ShareDataWith
(
filter
).
Resize
(
framework
::
make_ddim
({
groups
,
M
,
K
}));
Tensor
col_buffer_3d
;
col_buffer_3d
.
ShareDataWith
(
col_buffer
)
.
Resize
(
framework
::
make_ddim
({
groups
,
K
,
N
}));
Tensor
output_4d
;
output_4d
.
ShareDataWith
(
output_buffer
)
.
Resize
(
framework
::
make_ddim
({
batch_size
/
im2col_step
,
groups
,
M
,
N
}));
output_4d
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
framework
::
DDim
input_shape
=
framework
::
slice_ddim
(
input
->
dims
(),
1
,
input
->
dims
().
size
());
std
::
vector
<
int64_t
>
input_shape_vec
=
framework
::
vectorize
(
input_shape
);
int
input_dim
=
input
->
numel
()
/
input
->
dims
()[
0
];
int
input_offset_dim
=
offset
->
numel
()
/
offset
->
dims
()[
0
];
int
input_mask_dim
=
mask
->
numel
()
/
mask
->
dims
()[
0
];
auto
blas
=
math
::
GetBlas
<
CPUDeviceContext
,
T
>
(
dev_ctx
);
const
T
*
input_ptr
=
input
->
data
<
T
>
();
const
T
*
offset_ptr
=
offset
->
data
<
T
>
();
const
T
*
mask_ptr
=
mask
->
data
<
T
>
();
col_buffer
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
col_buffer_ptr
=
col_buffer
.
data
<
T
>
();
for
(
int
i
=
0
;
i
<
batch_size
/
im2col_step
;
++
i
)
{
ModulatedDeformableIm2colCPU
(
dev_ctx
,
input_ptr
+
i
*
im2col_step
*
input_dim
,
offset_ptr
+
i
*
im2col_step
*
input_offset_dim
,
mask_ptr
+
i
*
im2col_step
*
input_mask_dim
,
input_shape_vec
,
col_buffer_shape_vec
,
filter_shape_vec
,
paddings
,
strides
,
dilations
,
deformable_groups
,
col_buffer_ptr
);
Tensor
output_3d
=
output_4d
.
Slice
(
i
,
i
+
1
).
Resize
(
framework
::
slice_ddim
(
output_4d
.
dims
(),
1
,
output_4d
.
dims
().
size
()));
// get the product of pixel and weight
for
(
int
g
=
0
;
g
<
groups
;
++
g
)
{
Tensor
weight_3d_slice
=
weight_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
weight_3d
.
dims
(),
1
,
weight_3d
.
dims
().
size
()));
Tensor
col_buffer_3d_slice
=
col_buffer_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
col_buffer_3d
.
dims
(),
1
,
col_buffer_3d
.
dims
().
size
()));
Tensor
output_3d_slice
=
output_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
output_3d
.
dims
(),
1
,
output_3d
.
dims
().
size
()));
blas
.
MatMul
(
weight_3d_slice
,
false
,
col_buffer_3d_slice
,
false
,
T
(
1.0
),
&
output_3d_slice
,
T
(
0.0
));
}
}
output
->
ShareDataWith
(
output_buffer
)
.
Resize
(
framework
::
make_ddim
(
output_shape_vec
));
}
};
template
<
typename
T
>
class
DeformableConvGradCPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
Tensor
*
output_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Output"
));
Tensor
*
input_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Input"
));
Tensor
*
filter_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Filter"
));
Tensor
*
offset_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Offset"
));
Tensor
*
mask_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Mask"
));
const
Tensor
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
Tensor
offset
=
*
ctx
.
Input
<
Tensor
>
(
"Offset"
);
Tensor
mask
=
*
ctx
.
Input
<
Tensor
>
(
"Mask"
);
Tensor
filter
=
*
ctx
.
Input
<
Tensor
>
(
"Filter"
);
if
(
!
input_grad
&&
!
filter_grad
&&
!
offset_grad
&&
!
mask_grad
)
return
;
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
int
deformable_groups
=
ctx
.
Attr
<
int
>
(
"deformable_groups"
);
int
im2col_step
=
ctx
.
Attr
<
int
>
(
"im2col_step"
);
std
::
vector
<
int
>
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
CPUDeviceContext
>();
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
framework
::
DDim
input_shape
=
framework
::
slice_ddim
(
input
->
dims
(),
1
,
input
->
dims
().
size
());
std
::
vector
<
int64_t
>
input_shape_vec
=
framework
::
vectorize
(
input_shape
);
std
::
vector
<
int64_t
>
filter_shape_vec
(
framework
::
vectorize
(
filter
.
dims
()));
std
::
vector
<
int64_t
>
output_shape_vec
(
framework
::
vectorize
(
output_grad
->
dims
()));
std
::
vector
<
int64_t
>
col_buffer_shape_vec
(
filter_shape_vec
.
size
());
col_buffer_shape_vec
[
0
]
=
input
->
dims
()[
1
]
*
filter
.
dims
()[
2
]
*
filter
.
dims
()[
3
];
col_buffer_shape_vec
[
1
]
=
im2col_step
;
for
(
size_t
j
=
0
;
j
<
filter_shape_vec
.
size
()
-
2
;
++
j
)
{
col_buffer_shape_vec
[
j
+
2
]
=
output_shape_vec
[
j
+
2
];
}
framework
::
DDim
col_shape
(
framework
::
make_ddim
(
col_buffer_shape_vec
));
std
::
vector
<
int64_t
>
output_buffer_shape_vec
(
1
);
output_buffer_shape_vec
[
0
]
=
batch_size
*
output_shape_vec
[
1
]
*
output_shape_vec
[
2
]
*
output_shape_vec
[
3
];
framework
::
DDim
output_shape
(
framework
::
make_ddim
(
output_buffer_shape_vec
));
Tensor
col_buffer
;
Tensor
output_buffer
;
col_buffer
=
ctx
.
AllocateTmpTensor
<
T
,
CPUDeviceContext
>
(
col_shape
,
dev_ctx
);
output_buffer
=
ctx
.
AllocateTmpTensor
<
T
,
CPUDeviceContext
>
(
output_shape
,
dev_ctx
);
output_buffer
.
ShareDataWith
(
*
output_grad
);
int64_t
M
=
input_shape_vec
[
0
]
/
groups
*
filter_shape_vec
[
2
]
*
filter_shape_vec
[
3
];
int64_t
N
=
im2col_step
*
output_shape_vec
[
2
]
*
output_shape_vec
[
3
];
int64_t
K
=
output_shape_vec
[
1
]
/
groups
;
framework
::
DDim
weight_3d_shape
=
{
groups
,
K
,
M
};
framework
::
DDim
out_grad_4d_shape
=
{
batch_size
/
im2col_step
,
groups
,
K
,
N
};
framework
::
DDim
col_buffer_3d_shape
=
{
groups
,
M
,
N
};
framework
::
DDim
filter_grad_shape
=
{
groups
,
K
,
M
};
Tensor
weight_3d
;
weight_3d
.
ShareDataWith
(
filter
).
Resize
(
weight_3d_shape
);
Tensor
out_grad_4d
;
out_grad_4d
.
ShareDataWith
(
output_buffer
).
Resize
(
out_grad_4d_shape
);
Tensor
col_buffer_3d
;
col_buffer_3d
.
ShareDataWith
(
col_buffer
).
Resize
(
col_buffer_3d_shape
);
math
::
SetConstant
<
CPUDeviceContext
,
T
>
set_zero
;
auto
blas
=
math
::
GetBlas
<
CPUDeviceContext
,
T
>
(
dev_ctx
);
col_buffer
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
col_buffer_3d
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
out_grad_4d
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int
input_dim
=
input
->
numel
()
/
input
->
dims
()[
0
];
int
input_offset_dim
=
offset
.
numel
()
/
offset
.
dims
()[
0
];
int
input_mask_dim
=
mask
.
numel
()
/
mask
.
dims
()[
0
];
if
(
filter_grad
)
{
filter_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
filter_grad
->
Resize
(
filter_grad_shape
);
set_zero
(
dev_ctx
,
filter_grad
,
static_cast
<
T
>
(
0
));
}
if
(
input_grad
)
{
input_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
set_zero
(
dev_ctx
,
input_grad
,
static_cast
<
T
>
(
0
));
}
if
(
offset_grad
&&
mask_grad
)
{
offset_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
mask_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
set_zero
(
dev_ctx
,
offset_grad
,
static_cast
<
T
>
(
0
));
set_zero
(
dev_ctx
,
mask_grad
,
static_cast
<
T
>
(
0
));
}
for
(
int
i
=
0
;
i
<
batch_size
/
im2col_step
;
++
i
)
{
Tensor
out_grad_3d
=
out_grad_4d
.
Slice
(
i
,
i
+
1
).
Resize
(
framework
::
slice_ddim
(
out_grad_4d
.
dims
(),
1
,
out_grad_4d
.
dims
().
size
()));
for
(
int
g
=
0
;
g
<
groups
;
++
g
)
{
Tensor
weight_3d_slice
=
weight_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
weight_3d
.
dims
(),
1
,
weight_3d
.
dims
().
size
()));
Tensor
out_grad_3d_slice
=
out_grad_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
out_grad_3d
.
dims
(),
1
,
out_grad_3d
.
dims
().
size
()));
Tensor
col_buffer_3d_slice
=
col_buffer_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
col_buffer_3d
.
dims
(),
1
,
col_buffer_3d
.
dims
().
size
()));
blas
.
MatMul
(
weight_3d_slice
,
true
,
out_grad_3d_slice
,
false
,
T
(
1.0
),
&
col_buffer_3d_slice
,
T
(
0.0
));
}
col_buffer
.
Resize
(
col_shape
);
T
*
col_buffer_ptr
=
col_buffer
.
data
<
T
>
();
const
T
*
input_ptr
=
input
->
data
<
T
>
();
const
T
*
offset_ptr
=
offset
.
data
<
T
>
();
const
T
*
mask_ptr
=
mask
.
data
<
T
>
();
if
(
mask_grad
&&
offset_grad
)
{
T
*
offset_grad_ptr
=
offset_grad
->
data
<
T
>
();
T
*
mask_grad_ptr
=
mask_grad
->
data
<
T
>
();
// get grad of offset and mask
ModulatedDeformableCol2imCoordCPU
(
ctx
.
template
device_context
<
CPUDeviceContext
>(),
col_buffer_ptr
,
input_ptr
+
i
*
im2col_step
*
input_dim
,
offset_ptr
+
i
*
im2col_step
*
input_offset_dim
,
mask_ptr
+
i
*
im2col_step
*
input_mask_dim
,
input_shape_vec
,
col_buffer_shape_vec
,
filter_shape_vec
,
paddings
,
strides
,
dilations
,
deformable_groups
,
offset_grad_ptr
+
i
*
im2col_step
*
input_offset_dim
,
mask_grad_ptr
+
i
*
im2col_step
*
input_mask_dim
);
}
if
(
input_grad
)
{
T
*
input_grad_ptr
=
input_grad
->
data
<
T
>
();
// get grad of input
ModulatedDeformableCol2imCPU
(
ctx
.
template
device_context
<
CPUDeviceContext
>(),
col_buffer_ptr
,
offset_ptr
+
i
*
im2col_step
*
input_offset_dim
,
mask_ptr
+
i
*
im2col_step
*
input_mask_dim
,
input_shape_vec
,
col_buffer_shape_vec
,
filter_shape_vec
,
paddings
,
strides
,
dilations
,
deformable_groups
,
input_grad_ptr
+
i
*
im2col_step
*
input_dim
);
input_grad
->
Resize
(
input
->
dims
());
}
ModulatedDeformableIm2colCPU
(
ctx
.
template
device_context
<
CPUDeviceContext
>(),
input_ptr
+
i
*
im2col_step
*
input_dim
,
offset_ptr
+
i
*
im2col_step
*
input_offset_dim
,
mask_ptr
+
i
*
im2col_step
*
input_mask_dim
,
input_shape_vec
,
col_buffer_shape_vec
,
filter_shape_vec
,
paddings
,
strides
,
dilations
,
deformable_groups
,
col_buffer_ptr
);
col_buffer_3d
.
Resize
(
col_buffer_3d_shape
);
if
(
filter_grad
)
{
Tensor
dweight_3d
;
dweight_3d
=
ctx
.
AllocateTmpTensor
<
T
,
CPUDeviceContext
>
(
filter_grad_shape
,
dev_ctx
);
for
(
int
g
=
0
;
g
<
groups
;
++
g
)
{
Tensor
out_grad_3d_slice
=
out_grad_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
out_grad_3d
.
dims
(),
1
,
out_grad_3d
.
dims
().
size
()));
Tensor
col_buffer_3d_slice
=
col_buffer_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
col_buffer_3d
.
dims
(),
1
,
col_buffer_3d
.
dims
().
size
()));
Tensor
dweight_3d_slice
=
dweight_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
dweight_3d
.
dims
(),
1
,
dweight_3d
.
dims
().
size
()));
blas
.
MatMul
(
out_grad_3d_slice
,
false
,
col_buffer_3d_slice
,
true
,
T
(
1.0
),
&
dweight_3d_slice
,
T
(
0.0
));
}
// update grad of weights
FilterGradAddupCPUKernel
(
dweight_3d
.
numel
(),
groups
,
K
,
M
,
dweight_3d
.
data
<
T
>
(),
filter_grad
->
data
<
T
>
());
}
}
if
(
filter_grad
)
{
filter_grad
->
Resize
(
filter
.
dims
());
}
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/deformable_conv_v1_op.cc
0 → 100644
浏览文件 @
00efd1d8
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/deformable_conv_v1_op.h"
#include <memory>
#include "paddle/fluid/operators/conv_op.h"
namespace
paddle
{
namespace
operators
{
class
DeformableConvV1OpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"Input"
,
"(Tensor) The input of deformable conv op. "
"The shape of input is "
"[N, channel_in, H, W]"
);
AddInput
(
"Offset"
,
"(Tensor) The input offset. "
"The shape of the offset is "
"[N, deformable_groups * kernel_w * kernel_h * 2, H, W"
);
AddInput
(
"Filter"
,
"(Tensor) The Input Filter "
"The shape of the wight is "
"[num_filters, channel_in, kernel_h, kernel_w."
);
AddOutput
(
"Output"
,
"(Tensor) The output. "
"The shape of the output tensor is "
"[N, num_filters, out_height, out_width]]."
);
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
"(vector<int> default:{1, 1}), the "
"strides(h_stride, w_stride) of "
"convolution operator."
)
.
SetDefault
({
1
,
1
});
AddAttr
<
std
::
vector
<
int
>>
(
"paddings"
,
"(vector<int> default:{0,0}), the "
"paddings(h_pad, w_pad) of "
"convolution operator. "
)
.
SetDefault
({
0
,
0
});
AddAttr
<
std
::
vector
<
int
>>
(
"dilations"
,
"(vector<int> default:{1, 1}), the "
"dilations(h_dilation, w_dilation) of "
"convolution operator."
)
.
SetDefault
({
1
,
1
});
AddAttr
<
int
>
(
"groups"
,
"(int default:1), the groups number of the convolution operator. "
"According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
"when group=2, the first half of the filters is only connected to the "
"first half of the input channels, while the second half of the "
"filters "
"is only connected to the second half of the input channels."
)
.
SetDefault
(
1
);
AddAttr
<
int
>
(
"deformable_groups"
,
"(int default:1), the number of the deformable groups."
)
.
SetDefault
(
1
);
AddAttr
<
int
>
(
"im2col_step"
,
"im2col maximum number of image per computation"
)
.
SetDefault
(
64
);
AddComment
(
R"DOC(
**Deformable Convolution v1 Operator**
Deformable Convolution is a new method based Convolution which feature has offset
in spatial location.
1. Get offset of each pixel in feature map with convolution layers which number
of channels should be double of weight size.
2. Add offset to pixel to get new location and the new value which are computed
directly through bilinear interpolation with four nearest pixel.
3. Get the product of pixel and weight as result
Compute 2-D deformable convolution on 4-D input.
Given input image x, output feature map y, the deformable convolution operation can be expressed as follow:
$$
y(p) = \\sum_{k=1}^{K}{w_k * x(p + p_k + \\Delta p_k)}
$$
Where $$\\Delta p_k$$ is the learnable offset for the k-th location, respectively.
Refer to 'https://arxiv.org/abs/1703.06211 '<https://arxiv.org/abs/1703.06211>
Example:
Input:
Input shape: $(N, C_{in}, H_{in}, W_{in})$
Filter shape: $(C_{out}, C_{in}, H_f, W_f)$
Offset shape: $(N, 2 * deformable_groups, * H_f * W_f, H_{out}, W_{out})$
Output:
Output shape: $(N, C_{out}, H_{out}, W_{out})$
where $H_{out}, W_{out}$ must be equal to $H_{in}, W_{in}$ respectively.
Where
$$
H_{out}= \frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]}+ 1 \\
W_{out}= \frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]}+ 1
$$
)DOC"
);
}
};
class
DeformableConvV1Op
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Input"
),
true
,
"Input(Input) of DeformableConvOp "
"should not be null"
);
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Offset"
),
true
,
"Input(Offset) of DeformableConvOp "
"should not be null"
);
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Filter"
),
true
,
"Input(Filter) of DeformableConvOp "
"should not be null"
);
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"Output"
),
true
,
"Output(Output) of DeformableConvOp "
"should not be null."
);
auto
in_dims
=
ctx
->
GetInputDim
(
"Input"
);
auto
filter_dims
=
ctx
->
GetInputDim
(
"Filter"
);
auto
offset_dims
=
ctx
->
GetInputDim
(
"Offset"
);
std
::
vector
<
int
>
strides
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
dilations
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"dilations"
);
int
groups
=
ctx
->
Attrs
().
Get
<
int
>
(
"groups"
);
int
deformable_groups
=
ctx
->
Attrs
().
Get
<
int
>
(
"deformable_groups"
);
int
im2col_step
=
ctx
->
Attrs
().
Get
<
int
>
(
"im2col_step"
);
PADDLE_ENFORCE_EQ
(
in_dims
.
size
(),
4
,
"Conv input should be 4-D tensor, get %u"
,
in_dims
.
size
());
PADDLE_ENFORCE_EQ
(
in_dims
.
size
(),
filter_dims
.
size
(),
"Conv input dimension and filter dimension should be the same."
);
PADDLE_ENFORCE_EQ
(
in_dims
.
size
()
-
strides
.
size
(),
2U
,
"Conv input dimension and strides dimension should be consistent."
);
PADDLE_ENFORCE_EQ
(
paddings
.
size
(),
strides
.
size
(),
"Conv paddings dimension and Conv strides dimension "
"should be the same."
);
PADDLE_ENFORCE_EQ
(
in_dims
[
1
],
filter_dims
[
1
]
*
groups
,
"The number of input channels should be equal to filter "
"channels * groups."
);
PADDLE_ENFORCE_EQ
(
filter_dims
[
0
]
%
groups
,
0
,
"The number of output channels should be divided by groups."
);
PADDLE_ENFORCE_EQ
(
filter_dims
[
0
]
%
deformable_groups
,
0
,
"The number of output channels should be "
"divided by deformable groups."
);
if
(
in_dims
[
0
]
>
im2col_step
)
{
PADDLE_ENFORCE_EQ
(
in_dims
[
0
]
%
im2col_step
,
0U
,
"Input batchsize must be smaller than or divide im2col_step"
);
}
for
(
size_t
i
=
0
;
i
<
strides
.
size
();
++
i
)
{
PADDLE_ENFORCE_GT
(
strides
[
i
],
0U
,
"stride %d size incorrect"
,
i
);
}
for
(
size_t
i
=
0
;
i
<
dilations
.
size
();
++
i
)
{
PADDLE_ENFORCE_GT
(
dilations
[
i
],
0U
,
"dilation %d size incorrect"
,
i
);
}
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
0
]});
for
(
size_t
i
=
0
;
i
<
strides
.
size
();
++
i
)
{
output_shape
.
push_back
(
ConvOutputSize
(
in_dims
[
i
+
2
],
filter_dims
[
i
+
2
],
dilations
[
i
],
paddings
[
i
],
strides
[
i
]));
}
PADDLE_ENFORCE_EQ
(
output_shape
[
1
]
%
deformable_groups
,
0U
,
"output num_filter must divide deformable group size."
);
PADDLE_ENFORCE_EQ
(
output_shape
[
2
],
offset_dims
[
2
],
"output height must equal to offset map height."
);
PADDLE_ENFORCE_EQ
(
output_shape
[
3
],
offset_dims
[
3
],
"output width must equal to offset map width."
);
PADDLE_ENFORCE_EQ
(
offset_dims
[
1
]
%
(
filter_dims
[
2
]
*
filter_dims
[
3
]),
0U
,
"offset filter must divide deformable group size."
);
PADDLE_ENFORCE_EQ
(
offset_dims
[
1
]
/
(
2
*
filter_dims
[
2
]
*
filter_dims
[
3
]),
deformable_groups
,
"offset filter must divide deformable group size."
);
ctx
->
SetOutputDim
(
"Output"
,
framework
::
make_ddim
(
output_shape
));
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
(),
ctx
.
device_context
());
}
};
class
DeformableConvV1GradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"deformable_conv_v1_grad"
);
op
->
SetInput
(
"Input"
,
Input
(
"Input"
));
op
->
SetInput
(
"Filter"
,
Input
(
"Filter"
));
op
->
SetInput
(
"Offset"
,
Input
(
"Offset"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Output"
),
OutputGrad
(
"Output"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Input"
),
InputGrad
(
"Input"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Filter"
),
InputGrad
(
"Filter"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Offset"
),
InputGrad
(
"Offset"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
class
DeformableConvV1GradOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
auto
in_dims
=
ctx
->
GetInputDim
(
"Input"
);
auto
filter_dims
=
ctx
->
GetInputDim
(
"Filter"
);
auto
offset_dims
=
ctx
->
GetInputDim
(
"Offset"
);
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Output"
)),
true
,
"the gradient of output(Out) must not be null"
);
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"Input"
)))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Input"
),
in_dims
);
}
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"Filter"
)))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Filter"
),
filter_dims
);
}
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"Offset"
)))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Offset"
),
offset_dims
);
}
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
(),
ctx
.
device_context
());
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
deformable_conv_v1
,
ops
::
DeformableConvV1Op
,
ops
::
DeformableConvV1OpMaker
,
ops
::
DeformableConvV1GradOpDescMaker
);
REGISTER_OPERATOR
(
deformable_conv_v1_grad
,
ops
::
DeformableConvV1GradOp
);
REGISTER_OP_CPU_KERNEL
(
deformable_conv_v1
,
ops
::
DeformableConvV1CPUKernel
<
float
>
);
REGISTER_OP_CPU_KERNEL
(
deformable_conv_v1_grad
,
ops
::
DeformableConvV1GradCPUKernel
<
float
>
);
paddle/fluid/operators/deformable_conv_v1_op.cu
0 → 100644
浏览文件 @
00efd1d8
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Part of the following code in this file refs to
// https://github.com/msracver/Deformable-ConvNets/blob/master/faster_rcnn/operator_cxx/deformable_convolution.cu
//
// Copyright (c) 2017 Microsoft
// Licensed under The Apache-2.0 License [see LICENSE for details]
// \file deformable_psroi_pooling.cu
// \brief
// \author Yi Li, Guodong Zhang, Jifeng Dai
#pragma once
#include <algorithm>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/deformable_conv_filter.cu.h"
#include "paddle/fluid/operators/deformable_conv_func.h"
#include "paddle/fluid/operators/deformable_conv_v1_op.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/cuda_primitives.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
CUDADeviceContext
=
paddle
::
platform
::
CUDADeviceContext
;
static
constexpr
int
kNumCUDAThread
=
512
;
static
constexpr
int
kNumMaximumNumBlock
=
4096
;
static
inline
int
NumBlock
(
const
int
N
)
{
return
std
::
min
((
N
+
kNumCUDAThread
-
1
)
/
kNumCUDAThread
,
kNumMaximumNumBlock
);
}
template
<
typename
T
>
__global__
void
DeformableCol2imCUDAKernel
(
const
int
nthreads
,
const
T
*
data_col
,
const
T
*
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
channel_per_deformable_group
,
const
int
batch_size
,
const
int
deformable_group
,
const
int
height_col
,
const
int
width_col
,
T
*
grad_im
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
offset
=
blockDim
.
x
*
gridDim
.
x
;
for
(
size_t
thread
=
index
;
thread
<
nthreads
;
thread
+=
offset
)
{
const
int
j
=
(
thread
/
width_col
/
height_col
/
batch_size
)
%
kernel_w
;
const
int
i
=
(
thread
/
width_col
/
height_col
/
batch_size
/
kernel_w
)
%
kernel_h
;
const
int
c
=
thread
/
width_col
/
height_col
/
batch_size
/
kernel_w
/
kernel_h
;
const
int
deformable_group_index
=
c
/
channel_per_deformable_group
;
int
w_out
=
thread
%
width_col
;
int
h_out
=
(
thread
/
width_col
)
%
height_col
;
int
b
=
(
thread
/
width_col
/
height_col
)
%
batch_size
;
int
w_in
=
w_out
*
stride_w
-
pad_w
;
int
h_in
=
h_out
*
stride_h
-
pad_h
;
const
T
*
data_offset_ptr
=
data_offset
+
(
b
*
deformable_group
+
deformable_group_index
)
*
2
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
const
int
data_offset_h_ptr
=
((
2
*
(
i
*
kernel_w
+
j
))
*
height_col
+
h_out
)
*
width_col
+
w_out
;
const
int
data_offset_w_ptr
=
((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_out
)
*
width_col
+
w_out
;
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
];
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
];
const
T
cur_inv_h_data
=
h_in
+
i
*
dilation_h
+
offset_h
;
const
T
cur_inv_w_data
=
w_in
+
j
*
dilation_w
+
offset_w
;
const
T
cur_top_grad
=
data_col
[
thread
];
const
int
cur_h
=
static_cast
<
int
>
(
cur_inv_h_data
);
const
int
cur_w
=
static_cast
<
int
>
(
cur_inv_w_data
);
for
(
int
dy
=
-
2
;
dy
<=
2
;
dy
++
)
{
for
(
int
dx
=
-
2
;
dx
<=
2
;
dx
++
)
{
if
(
cur_h
+
dy
>=
0
&&
cur_h
+
dy
<
height
&&
cur_w
+
dx
>=
0
&&
cur_w
+
dx
<
width
&&
abs
(
cur_inv_h_data
-
(
cur_h
+
dy
))
<
1
&&
abs
(
cur_inv_w_data
-
(
cur_w
+
dx
))
<
1
)
{
int
cur_bottom_grad_pos
=
((
b
*
channels
+
c
)
*
height
+
cur_h
+
dy
)
*
width
+
cur_w
+
dx
;
T
weight
=
DmcnGetGradientWeight
(
cur_inv_h_data
,
cur_inv_w_data
,
cur_h
+
dy
,
cur_w
+
dx
,
height
,
width
);
atomicAdd
(
grad_im
+
cur_bottom_grad_pos
,
weight
*
cur_top_grad
);
}
}
}
}
}
template
<
typename
T
>
inline
void
DeformableCol2im
(
const
platform
::
CUDADeviceContext
&
ctx
,
const
T
*
data_col
,
const
T
*
data_offset
,
const
std
::
vector
<
int64_t
>
im_shape
,
const
std
::
vector
<
int64_t
>
col_shape
,
const
std
::
vector
<
int64_t
>
kernel_shape
,
const
std
::
vector
<
int
>
pad
,
const
std
::
vector
<
int
>
stride
,
const
std
::
vector
<
int
>
dilation
,
const
int
deformable_group
,
T
*
grad_im
)
{
int
channel_per_deformable_group
=
im_shape
[
0
]
/
deformable_group
;
int
num_kernels
=
col_shape
[
0
]
*
col_shape
[
1
]
*
col_shape
[
2
]
*
col_shape
[
3
];
int
blocks
=
NumBlock
(
num_kernels
);
int
threads
=
kNumCUDAThread
;
DeformableCol2imCUDAKernel
<
T
><<<
blocks
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
).
stream
()
>>>
(
num_kernels
,
data_col
,
data_offset
,
im_shape
[
0
],
im_shape
[
1
],
im_shape
[
2
],
kernel_shape
[
2
],
kernel_shape
[
3
],
pad
[
0
],
pad
[
1
],
stride
[
0
],
stride
[
1
],
dilation
[
0
],
dilation
[
1
],
channel_per_deformable_group
,
col_shape
[
1
],
deformable_group
,
col_shape
[
2
],
col_shape
[
3
],
grad_im
);
}
template
<
typename
T
>
__global__
void
DeformableCol2imCoordCUDAKernel
(
const
int
nthreads
,
const
T
*
data_col
,
const
T
*
data_im
,
const
T
*
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
channel_per_deformable_group
,
const
int
batch_size
,
const
int
offset_channels
,
const
int
deformable_group
,
const
int
height_col
,
const
int
width_col
,
T
*
grad_offset
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
offset
=
blockDim
.
x
*
gridDim
.
x
;
for
(
size_t
i
=
index
;
i
<
nthreads
;
i
+=
offset
)
{
T
val
=
0
,
mval
=
0
;
const
int
w
=
i
%
width_col
;
const
int
h
=
(
i
/
width_col
)
%
height_col
;
const
int
c
=
(
i
/
width_col
/
height_col
)
%
offset_channels
;
const
int
b
=
(
i
/
width_col
/
height_col
)
/
offset_channels
;
const
int
deformable_group_index
=
c
/
(
2
*
kernel_h
*
kernel_w
);
const
int
col_step
=
kernel_h
*
kernel_w
;
int
cnt
=
0
;
const
T
*
data_col_ptr
=
data_col
+
deformable_group_index
*
channel_per_deformable_group
*
batch_size
*
width_col
*
height_col
;
const
T
*
data_im_ptr
=
data_im
+
(
b
*
deformable_group
+
deformable_group_index
)
*
channel_per_deformable_group
/
kernel_h
/
kernel_w
*
height
*
width
;
const
T
*
data_offset_ptr
=
data_offset
+
(
b
*
deformable_group
+
deformable_group_index
)
*
2
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
const
int
offset_c
=
c
-
deformable_group_index
*
2
*
kernel_h
*
kernel_w
;
for
(
int
col_c
=
offset_c
/
2
;
col_c
<
channel_per_deformable_group
;
col_c
+=
col_step
)
{
const
int
col_pos
=
(((
col_c
*
batch_size
+
b
)
*
height_col
)
+
h
)
*
width_col
+
w
;
const
int
bp_dir
=
offset_c
%
2
;
int
j
=
(
col_pos
/
width_col
/
height_col
/
batch_size
)
%
kernel_w
;
int
i
=
(
col_pos
/
width_col
/
height_col
/
batch_size
/
kernel_w
)
%
kernel_h
;
int
w_out
=
col_pos
%
width_col
;
int
h_out
=
(
col_pos
/
width_col
)
%
height_col
;
int
w_in
=
w_out
*
stride_w
-
pad_w
;
int
h_in
=
h_out
*
stride_h
-
pad_h
;
const
int
data_offset_h_ptr
=
(((
2
*
(
i
*
kernel_w
+
j
))
*
height_col
+
h_out
)
*
width_col
+
w_out
);
const
int
data_offset_w_ptr
=
(((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_out
)
*
width_col
+
w_out
);
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
];
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
];
T
inv_h
=
h_in
+
i
*
dilation_h
+
offset_h
;
T
inv_w
=
w_in
+
j
*
dilation_w
+
offset_w
;
if
(
inv_h
<=
-
1
||
inv_w
<=
-
1
||
inv_h
>=
height
||
inv_w
>=
width
)
{
inv_h
=
inv_w
=
-
2
;
}
else
{
mval
+=
data_col_ptr
[
col_pos
]
*
DmcnIm2colBilinear
(
data_im_ptr
+
cnt
*
height
*
width
,
width
,
height
,
width
,
inv_h
,
inv_w
);
}
const
T
weight
=
DmcnGetCoordinateWeight
(
inv_h
,
inv_w
,
height
,
width
,
data_im_ptr
+
cnt
*
height
*
width
,
width
,
bp_dir
);
val
+=
weight
*
data_col_ptr
[
col_pos
];
cnt
+=
1
;
}
grad_offset
[
i
]
=
val
;
}
}
template
<
typename
T
>
inline
void
DeformableCol2imCoord
(
const
platform
::
CUDADeviceContext
&
ctx
,
const
T
*
data_col
,
const
T
*
data_im
,
const
T
*
data_offset
,
const
std
::
vector
<
int64_t
>
im_shape
,
const
std
::
vector
<
int64_t
>
col_shape
,
const
std
::
vector
<
int64_t
>
kernel_shape
,
const
std
::
vector
<
int
>
paddings
,
const
std
::
vector
<
int
>
strides
,
const
std
::
vector
<
int
>
dilations
,
const
int
deformable_groups
,
T
*
grad_offset
)
{
int
num_kernels
=
2
*
kernel_shape
[
2
]
*
kernel_shape
[
3
]
*
col_shape
[
1
]
*
col_shape
[
2
]
*
col_shape
[
3
]
*
deformable_groups
;
int
channel_per_deformable_group
=
col_shape
[
0
]
/
deformable_groups
;
int
blocks
=
NumBlock
(
num_kernels
);
int
threads
=
kNumCUDAThread
;
DeformableCol2imCoordCUDAKernel
<
T
><<<
blocks
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
).
stream
()
>>>
(
num_kernels
,
data_col
,
data_im
,
data_offset
,
im_shape
[
0
],
im_shape
[
1
],
im_shape
[
2
],
kernel_shape
[
2
],
kernel_shape
[
3
],
paddings
[
0
],
paddings
[
1
],
strides
[
0
],
strides
[
1
],
dilations
[
0
],
dilations
[
1
],
channel_per_deformable_group
,
col_shape
[
1
],
2
*
kernel_shape
[
2
]
*
kernel_shape
[
3
]
*
deformable_groups
,
deformable_groups
,
col_shape
[
2
],
col_shape
[
3
],
grad_offset
);
}
template
<
typename
T
>
__global__
void
DeformableIm2colCUDAKernel
(
const
int
nthreads
,
const
T
*
data_im
,
const
T
*
data_offset
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
channel_per_deformable_group
,
const
int
batch_size
,
const
int
num_channels
,
const
int
deformable_group
,
const
int
height_col
,
const
int
width_col
,
T
*
data_col
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
offset
=
blockDim
.
x
*
gridDim
.
x
;
for
(
size_t
i
=
index
;
i
<
nthreads
;
i
+=
offset
)
{
const
int
w_col
=
i
%
width_col
;
const
int
h_col
=
(
i
/
width_col
)
%
height_col
;
const
int
b_col
=
(
i
/
width_col
)
/
height_col
%
batch_size
;
const
int
c_im
=
(
i
/
width_col
/
height_col
)
/
batch_size
;
const
int
c_col
=
c_im
*
kernel_h
*
kernel_w
;
const
int
deformable_group_index
=
c_im
/
channel_per_deformable_group
;
const
int
h_in
=
h_col
*
stride_h
-
pad_h
;
const
int
w_in
=
w_col
*
stride_w
-
pad_w
;
T
*
data_col_ptr
=
data_col
+
((
c_col
*
batch_size
+
b_col
)
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
T
*
data_im_ptr
=
data_im
+
(
b_col
*
num_channels
+
c_im
)
*
height
*
width
;
const
T
*
data_offset_ptr
=
data_offset
+
(
b_col
*
deformable_group
+
deformable_group_index
)
*
2
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
for
(
int
i
=
0
;
i
<
kernel_h
;
++
i
)
{
for
(
int
j
=
0
;
j
<
kernel_w
;
++
j
)
{
const
int
data_offset_h_ptr
=
((
2
*
(
i
*
kernel_w
+
j
))
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
int
data_offset_w_ptr
=
((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
];
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
];
T
val
=
static_cast
<
T
>
(
0
);
const
T
h_im
=
h_in
+
i
*
dilation_h
+
offset_h
;
const
T
w_im
=
w_in
+
j
*
dilation_w
+
offset_w
;
if
(
h_im
>
-
1
&&
w_im
>
-
1
&&
h_im
<
height
&&
w_im
<
width
)
{
val
=
DmcnIm2colBilinear
(
data_im_ptr
,
width
,
height
,
width
,
h_im
,
w_im
);
}
*
data_col_ptr
=
val
;
data_col_ptr
+=
batch_size
*
height_col
*
width_col
;
}
}
}
}
template
<
typename
T
>
inline
void
DeformableIm2col
(
const
platform
::
CUDADeviceContext
&
ctx
,
const
T
*
data_im
,
const
T
*
data_offset
,
const
std
::
vector
<
int64_t
>
im_shape
,
const
std
::
vector
<
int64_t
>
col_shape
,
const
std
::
vector
<
int64_t
>
filter_shape
,
const
std
::
vector
<
int
>
paddings
,
const
std
::
vector
<
int
>
strides
,
const
std
::
vector
<
int
>
dilations
,
const
int
deformable_groups
,
T
*
data_col
)
{
int
channel_per_deformable_group
=
im_shape
[
0
]
/
deformable_groups
;
int
num_kernels
=
im_shape
[
0
]
*
col_shape
[
1
]
*
col_shape
[
2
]
*
col_shape
[
3
];
int
blocks
=
NumBlock
(
num_kernels
);
int
threads
=
kNumCUDAThread
;
// get outputs of im2col with offset by bilinear interpolation
DeformableIm2colCUDAKernel
<
T
><<<
blocks
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
).
stream
()
>>>
(
num_kernels
,
data_im
,
data_offset
,
im_shape
[
1
],
im_shape
[
2
],
filter_shape
[
2
],
filter_shape
[
3
],
paddings
[
0
],
paddings
[
1
],
strides
[
0
],
strides
[
1
],
dilations
[
0
],
dilations
[
1
],
channel_per_deformable_group
,
col_shape
[
1
],
im_shape
[
0
],
deformable_groups
,
col_shape
[
2
],
col_shape
[
3
],
data_col
);
}
template
<
typename
T
>
class
DeformableConvV1CUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
Tensor
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
const
Tensor
offset
=
*
ctx
.
Input
<
Tensor
>
(
"Offset"
);
Tensor
filter
=
*
ctx
.
Input
<
Tensor
>
(
"Filter"
);
Tensor
*
output
=
ctx
.
Output
<
Tensor
>
(
"Output"
);
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
&
dev_ctx
=
ctx
.
template
device_context
<
CUDADeviceContext
>();
const
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
const
int
deformable_groups
=
ctx
.
Attr
<
int
>
(
"deformable_groups"
);
const
int
im2col_step
=
ctx
.
Attr
<
int
>
(
"im2col_step"
);
const
std
::
vector
<
int
>
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
const
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
const
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
std
::
vector
<
int64_t
>
filter_shape_vec
(
framework
::
vectorize
(
filter
.
dims
()));
std
::
vector
<
int64_t
>
output_shape_vec
(
framework
::
vectorize
(
output
->
dims
()));
// col_shape_vec: {c_i * k_h * k_w, im2col_step, o_h, o_w}
std
::
vector
<
int64_t
>
col_buffer_shape_vec
(
filter_shape_vec
.
size
());
col_buffer_shape_vec
[
0
]
=
input
->
dims
()[
1
]
*
filter
.
dims
()[
2
]
*
filter
.
dims
()[
3
];
col_buffer_shape_vec
[
1
]
=
im2col_step
;
for
(
size_t
j
=
0
;
j
<
filter_shape_vec
.
size
()
-
2
;
++
j
)
{
col_buffer_shape_vec
[
j
+
2
]
=
output_shape_vec
[
j
+
2
];
}
framework
::
DDim
col_shape
(
framework
::
make_ddim
(
col_buffer_shape_vec
));
std
::
vector
<
int64_t
>
output_buffer_shape_vec
(
1
);
output_buffer_shape_vec
[
0
]
=
batch_size
*
output_shape_vec
[
1
]
*
output_shape_vec
[
2
]
*
output_shape_vec
[
3
];
framework
::
DDim
output_shape
(
framework
::
make_ddim
(
output_buffer_shape_vec
));
Tensor
col_buffer
;
Tensor
output_buffer
;
col_buffer
=
ctx
.
AllocateTmpTensor
<
T
,
CUDADeviceContext
>
(
col_shape
,
dev_ctx
);
output_buffer
=
ctx
.
AllocateTmpTensor
<
T
,
CUDADeviceContext
>
(
output_shape
,
dev_ctx
);
int64_t
M
=
output_shape_vec
[
1
]
/
groups
;
int64_t
N
=
im2col_step
*
output_shape_vec
[
2
]
*
output_shape_vec
[
3
];
int64_t
K
=
input
->
dims
()[
1
]
*
filter_shape_vec
[
2
]
*
filter_shape_vec
[
3
]
/
groups
;
Tensor
weight_3d
;
weight_3d
.
ShareDataWith
(
filter
).
Resize
(
framework
::
make_ddim
({
groups
,
M
,
K
}));
Tensor
col_buffer_3d
;
col_buffer_3d
.
ShareDataWith
(
col_buffer
)
.
Resize
(
framework
::
make_ddim
({
groups
,
K
,
N
}));
Tensor
output_4d
;
output_4d
.
ShareDataWith
(
output_buffer
)
.
Resize
(
framework
::
make_ddim
({
batch_size
/
im2col_step
,
groups
,
M
,
N
}));
output_4d
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
framework
::
DDim
input_shape
=
framework
::
slice_ddim
(
input
->
dims
(),
1
,
input
->
dims
().
size
());
std
::
vector
<
int64_t
>
input_shape_vec
=
framework
::
vectorize
(
input_shape
);
int
input_dim
=
input
->
numel
()
/
input
->
dims
()[
0
];
int
input_offset_dim
=
offset
.
numel
()
/
offset
.
dims
()[
0
];
auto
blas
=
math
::
GetBlas
<
CUDADeviceContext
,
T
>
(
dev_ctx
);
const
T
*
input_ptr
=
input
->
data
<
T
>
();
const
T
*
offset_ptr
=
offset
.
data
<
T
>
();
col_buffer
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
col_buffer_ptr
=
col_buffer
.
data
<
T
>
();
for
(
int
i
=
0
;
i
<
batch_size
/
im2col_step
;
++
i
)
{
DeformableIm2col
(
dev_ctx
,
input_ptr
+
i
*
im2col_step
*
input_dim
,
offset_ptr
+
i
*
im2col_step
*
input_offset_dim
,
input_shape_vec
,
col_buffer_shape_vec
,
filter_shape_vec
,
paddings
,
strides
,
dilations
,
deformable_groups
,
col_buffer_ptr
);
Tensor
output_3d
=
output_4d
.
Slice
(
i
,
i
+
1
).
Resize
(
framework
::
slice_ddim
(
output_4d
.
dims
(),
1
,
output_4d
.
dims
().
size
()));
// get the product of pixel and weight
for
(
int
g
=
0
;
g
<
groups
;
++
g
)
{
Tensor
weight_3d_slice
=
weight_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
weight_3d
.
dims
(),
1
,
weight_3d
.
dims
().
size
()));
Tensor
col_buffer_3d_slice
=
col_buffer_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
col_buffer_3d
.
dims
(),
1
,
col_buffer_3d
.
dims
().
size
()));
Tensor
output_3d_slice
=
output_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
output_3d
.
dims
(),
1
,
output_3d
.
dims
().
size
()));
blas
.
MatMul
(
weight_3d_slice
,
false
,
col_buffer_3d_slice
,
false
,
T
(
1.0
),
&
output_3d_slice
,
T
(
0.0
));
}
}
output
->
ShareDataWith
(
output_buffer
)
.
Resize
(
framework
::
make_ddim
(
output_shape_vec
));
}
};
template
<
typename
T
>
class
DeformableConvV1GradCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
Tensor
*
output_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Output"
));
Tensor
*
input_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Input"
));
Tensor
*
filter_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Filter"
));
Tensor
*
offset_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Offset"
));
const
Tensor
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
Tensor
offset
=
*
ctx
.
Input
<
Tensor
>
(
"Offset"
);
Tensor
filter
=
*
ctx
.
Input
<
Tensor
>
(
"Filter"
);
if
(
!
input_grad
&&
!
filter_grad
&&
!
offset_grad
)
return
;
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
int
deformable_groups
=
ctx
.
Attr
<
int
>
(
"deformable_groups"
);
int
im2col_step
=
ctx
.
Attr
<
int
>
(
"im2col_step"
);
std
::
vector
<
int
>
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
CUDADeviceContext
>();
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
framework
::
DDim
input_shape
=
framework
::
slice_ddim
(
input
->
dims
(),
1
,
input
->
dims
().
size
());
std
::
vector
<
int64_t
>
input_shape_vec
=
framework
::
vectorize
(
input_shape
);
std
::
vector
<
int64_t
>
filter_shape_vec
(
framework
::
vectorize
(
filter
.
dims
()));
std
::
vector
<
int64_t
>
output_shape_vec
(
framework
::
vectorize
(
output_grad
->
dims
()));
std
::
vector
<
int64_t
>
col_buffer_shape_vec
(
filter_shape_vec
.
size
());
col_buffer_shape_vec
[
0
]
=
input
->
dims
()[
1
]
*
filter
.
dims
()[
2
]
*
filter
.
dims
()[
3
];
col_buffer_shape_vec
[
1
]
=
im2col_step
;
for
(
size_t
j
=
0
;
j
<
filter_shape_vec
.
size
()
-
2
;
++
j
)
{
col_buffer_shape_vec
[
j
+
2
]
=
output_shape_vec
[
j
+
2
];
}
framework
::
DDim
col_shape
(
framework
::
make_ddim
(
col_buffer_shape_vec
));
std
::
vector
<
int64_t
>
output_buffer_shape_vec
(
1
);
output_buffer_shape_vec
[
0
]
=
batch_size
*
output_shape_vec
[
1
]
*
output_shape_vec
[
2
]
*
output_shape_vec
[
3
];
framework
::
DDim
output_shape
(
framework
::
make_ddim
(
output_buffer_shape_vec
));
Tensor
col_buffer
;
Tensor
output_buffer
;
col_buffer
=
ctx
.
AllocateTmpTensor
<
T
,
CUDADeviceContext
>
(
col_shape
,
dev_ctx
);
output_buffer
=
ctx
.
AllocateTmpTensor
<
T
,
CUDADeviceContext
>
(
output_shape
,
dev_ctx
);
output_buffer
.
ShareDataWith
(
*
output_grad
);
int64_t
M
=
input_shape_vec
[
0
]
/
groups
*
filter_shape_vec
[
2
]
*
filter_shape_vec
[
3
];
int64_t
N
=
im2col_step
*
output_shape_vec
[
2
]
*
output_shape_vec
[
3
];
int64_t
K
=
output_shape_vec
[
1
]
/
groups
;
framework
::
DDim
weight_3d_shape
=
{
groups
,
K
,
M
};
framework
::
DDim
out_grad_4d_shape
=
{
batch_size
/
im2col_step
,
groups
,
K
,
N
};
framework
::
DDim
col_buffer_3d_shape
=
{
groups
,
M
,
N
};
framework
::
DDim
filter_grad_shape
=
{
groups
,
K
,
M
};
Tensor
weight_3d
;
weight_3d
.
ShareDataWith
(
filter
).
Resize
(
weight_3d_shape
);
Tensor
out_grad_4d
;
out_grad_4d
.
ShareDataWith
(
output_buffer
).
Resize
(
out_grad_4d_shape
);
Tensor
col_buffer_3d
;
col_buffer_3d
.
ShareDataWith
(
col_buffer
).
Resize
(
col_buffer_3d_shape
);
math
::
SetConstant
<
CUDADeviceContext
,
T
>
set_zero
;
auto
blas
=
math
::
GetBlas
<
CUDADeviceContext
,
T
>
(
dev_ctx
);
col_buffer
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
col_buffer_3d
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
out_grad_4d
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int
input_dim
=
input
->
numel
()
/
input
->
dims
()[
0
];
int
input_offset_dim
=
offset
.
numel
()
/
offset
.
dims
()[
0
];
if
(
filter_grad
)
{
filter_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
filter_grad
->
Resize
(
filter_grad_shape
);
set_zero
(
dev_ctx
,
filter_grad
,
static_cast
<
T
>
(
0
));
}
if
(
input_grad
)
{
input_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
set_zero
(
dev_ctx
,
input_grad
,
static_cast
<
T
>
(
0
));
}
if
(
offset_grad
)
{
offset_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
set_zero
(
dev_ctx
,
offset_grad
,
static_cast
<
T
>
(
0
));
}
for
(
int
i
=
0
;
i
<
batch_size
/
im2col_step
;
++
i
)
{
Tensor
out_grad_3d
=
out_grad_4d
.
Slice
(
i
,
i
+
1
).
Resize
(
framework
::
slice_ddim
(
out_grad_4d
.
dims
(),
1
,
out_grad_4d
.
dims
().
size
()));
for
(
int
g
=
0
;
g
<
groups
;
++
g
)
{
Tensor
weight_3d_slice
=
weight_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
weight_3d
.
dims
(),
1
,
weight_3d
.
dims
().
size
()));
Tensor
out_grad_3d_slice
=
out_grad_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
out_grad_3d
.
dims
(),
1
,
out_grad_3d
.
dims
().
size
()));
Tensor
col_buffer_3d_slice
=
col_buffer_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
col_buffer_3d
.
dims
(),
1
,
col_buffer_3d
.
dims
().
size
()));
blas
.
MatMul
(
weight_3d_slice
,
true
,
out_grad_3d_slice
,
false
,
T
(
1.0
),
&
col_buffer_3d_slice
,
T
(
0.0
));
}
col_buffer
.
Resize
(
col_shape
);
T
*
col_buffer_ptr
=
col_buffer
.
data
<
T
>
();
const
T
*
input_ptr
=
input
->
data
<
T
>
();
const
T
*
offset_ptr
=
offset
.
data
<
T
>
();
if
(
offset_grad
)
{
T
*
offset_grad_ptr
=
offset_grad
->
data
<
T
>
();
// get grad of offset
DeformableCol2imCoord
(
dev_ctx
,
col_buffer_ptr
,
input_ptr
+
i
*
im2col_step
*
input_dim
,
offset_ptr
+
i
*
im2col_step
*
input_offset_dim
,
input_shape_vec
,
col_buffer_shape_vec
,
filter_shape_vec
,
paddings
,
strides
,
dilations
,
deformable_groups
,
offset_grad_ptr
+
i
*
im2col_step
*
input_offset_dim
);
}
if
(
input_grad
)
{
T
*
input_grad_ptr
=
input_grad
->
data
<
T
>
();
// get grad of input
DeformableCol2im
(
dev_ctx
,
col_buffer_ptr
,
offset_ptr
+
i
*
im2col_step
*
input_offset_dim
,
input_shape_vec
,
col_buffer_shape_vec
,
filter_shape_vec
,
paddings
,
strides
,
dilations
,
deformable_groups
,
input_grad_ptr
+
i
*
im2col_step
*
input_dim
);
input_grad
->
Resize
(
input
->
dims
());
}
DeformableIm2col
(
dev_ctx
,
input_ptr
+
i
*
im2col_step
*
input_dim
,
offset_ptr
+
i
*
im2col_step
*
input_offset_dim
,
input_shape_vec
,
col_buffer_shape_vec
,
filter_shape_vec
,
paddings
,
strides
,
dilations
,
deformable_groups
,
col_buffer_ptr
);
col_buffer_3d
.
Resize
(
col_buffer_3d_shape
);
if
(
filter_grad
)
{
Tensor
dweight_3d
;
dweight_3d
=
ctx
.
AllocateTmpTensor
<
T
,
CUDADeviceContext
>
(
filter_grad_shape
,
dev_ctx
);
for
(
int
g
=
0
;
g
<
groups
;
++
g
)
{
Tensor
out_grad_3d_slice
=
out_grad_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
out_grad_3d
.
dims
(),
1
,
out_grad_3d
.
dims
().
size
()));
Tensor
col_buffer_3d_slice
=
col_buffer_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
col_buffer_3d
.
dims
(),
1
,
col_buffer_3d
.
dims
().
size
()));
Tensor
dweight_3d_slice
=
dweight_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
dweight_3d
.
dims
(),
1
,
dweight_3d
.
dims
().
size
()));
blas
.
MatMul
(
out_grad_3d_slice
,
false
,
col_buffer_3d_slice
,
true
,
T
(
1.0
),
&
dweight_3d_slice
,
T
(
0.0
));
}
FilterGradAddupCUDAKernel
<
T
><<<
NumBlock
(
dweight_3d
.
numel
()),
kNumCUDAThread
,
0
,
dev_ctx
.
stream
()
>>>
(
dweight_3d
.
numel
(),
groups
,
K
,
M
,
dweight_3d
.
data
<
T
>
(),
filter_grad
->
data
<
T
>
());
}
}
if
(
filter_grad
)
{
filter_grad
->
Resize
(
filter
.
dims
());
}
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
deformable_conv_v1
,
ops
::
DeformableConvV1CUDAKernel
<
float
>
);
REGISTER_OP_CUDA_KERNEL
(
deformable_conv_v1_grad
,
ops
::
DeformableConvV1GradCUDAKernel
<
float
>
);
paddle/fluid/operators/deformable_conv_v1_op.h
0 → 100644
浏览文件 @
00efd1d8
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Part of the following code in this file refs to
// https://github.com/msracver/Deformable-ConvNets/blob/master/faster_rcnn/operator_cxx/deformable_convolution.cu
//
// Copyright (c) 2017 Microsoft
// Licensed under The Apache-2.0 License [see LICENSE for details]
// \file deformable_psroi_pooling.cu
// \brief
// \author Yi Li, Guodong Zhang, Jifeng Dai
#pragma once
#include <algorithm>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/deformable_conv_func.h"
#include "paddle/fluid/operators/deformable_conv_op.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
CPUDeviceContext
=
platform
::
CPUDeviceContext
;
template
<
typename
T
>
void
DeformableCol2imCPUKernel
(
const
int
num_kernels
,
const
T
*
data_col
,
const
T
*
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
channel_per_deformable_group
,
const
int
batch_size
,
const
int
deformable_group
,
const
int
height_col
,
const
int
width_col
,
T
*
grad_im
)
{
for
(
size_t
thread
=
0
;
thread
<
num_kernels
;
thread
++
)
{
const
int
j
=
(
thread
/
width_col
/
height_col
/
batch_size
)
%
kernel_w
;
const
int
i
=
(
thread
/
width_col
/
height_col
/
batch_size
/
kernel_w
)
%
kernel_h
;
const
int
c
=
thread
/
width_col
/
height_col
/
batch_size
/
kernel_w
/
kernel_h
;
const
int
deformable_group_index
=
c
/
channel_per_deformable_group
;
int
w_out
=
thread
%
width_col
;
int
h_out
=
(
thread
/
width_col
)
%
height_col
;
int
b
=
(
thread
/
width_col
/
height_col
)
%
batch_size
;
int
w_in
=
w_out
*
stride_w
-
pad_w
;
int
h_in
=
h_out
*
stride_h
-
pad_h
;
const
T
*
data_offset_ptr
=
data_offset
+
(
b
*
deformable_group
+
deformable_group_index
)
*
2
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
const
int
data_offset_h_ptr
=
((
2
*
(
i
*
kernel_w
+
j
))
*
height_col
+
h_out
)
*
width_col
+
w_out
;
const
int
data_offset_w_ptr
=
((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_out
)
*
width_col
+
w_out
;
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
];
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
];
const
T
cur_inv_h_data
=
h_in
+
i
*
dilation_h
+
offset_h
;
const
T
cur_inv_w_data
=
w_in
+
j
*
dilation_w
+
offset_w
;
const
T
cur_top_grad
=
data_col
[
thread
];
const
int
cur_h
=
static_cast
<
int
>
(
cur_inv_h_data
);
const
int
cur_w
=
static_cast
<
int
>
(
cur_inv_w_data
);
for
(
int
dy
=
-
2
;
dy
<=
2
;
dy
++
)
{
for
(
int
dx
=
-
2
;
dx
<=
2
;
dx
++
)
{
if
(
cur_h
+
dy
>=
0
&&
cur_h
+
dy
<
height
&&
cur_w
+
dx
>=
0
&&
cur_w
+
dx
<
width
&&
abs
(
cur_inv_h_data
-
(
cur_h
+
dy
))
<
1
&&
abs
(
cur_inv_w_data
-
(
cur_w
+
dx
))
<
1
)
{
int
cur_bottom_grad_pos
=
((
b
*
channels
+
c
)
*
height
+
cur_h
+
dy
)
*
width
+
cur_w
+
dx
;
T
weight
=
DmcnGetGradientWeight
(
cur_inv_h_data
,
cur_inv_w_data
,
cur_h
+
dy
,
cur_w
+
dx
,
height
,
width
);
*
(
grad_im
+
cur_bottom_grad_pos
)
=
*
(
grad_im
+
cur_bottom_grad_pos
)
+
weight
*
cur_top_grad
;
}
}
}
}
}
template
<
typename
T
>
inline
void
DeformableCol2imCPU
(
const
platform
::
CPUDeviceContext
&
ctx
,
const
T
*
data_col
,
const
T
*
data_offset
,
const
std
::
vector
<
int64_t
>
im_shape
,
const
std
::
vector
<
int64_t
>
col_shape
,
const
std
::
vector
<
int64_t
>
kernel_shape
,
const
std
::
vector
<
int
>
pad
,
const
std
::
vector
<
int
>
stride
,
const
std
::
vector
<
int
>
dilation
,
const
int
deformable_group
,
T
*
grad_im
)
{
int
channel_per_deformable_group
=
im_shape
[
0
]
/
deformable_group
;
int
num_kernels
=
col_shape
[
0
]
*
col_shape
[
1
]
*
col_shape
[
2
]
*
col_shape
[
3
];
DeformableCol2imCPUKernel
(
num_kernels
,
data_col
,
data_offset
,
im_shape
[
0
],
im_shape
[
1
],
im_shape
[
2
],
kernel_shape
[
2
],
kernel_shape
[
3
],
pad
[
0
],
pad
[
1
],
stride
[
0
],
stride
[
1
],
dilation
[
0
],
dilation
[
1
],
channel_per_deformable_group
,
col_shape
[
1
],
deformable_group
,
col_shape
[
2
],
col_shape
[
3
],
grad_im
);
}
template
<
typename
T
>
void
DeformableCol2imCoordCPUKernel
(
const
int
num_kernels
,
const
T
*
data_col
,
const
T
*
data_im
,
const
T
*
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
channel_per_deformable_group
,
const
int
batch_size
,
const
int
offset_channels
,
const
int
deformable_group
,
const
int
height_col
,
const
int
width_col
,
T
*
grad_offset
)
{
for
(
size_t
i
=
0
;
i
<
num_kernels
;
i
++
)
{
T
val
=
0
,
mval
=
0
;
const
int
w
=
i
%
width_col
;
const
int
h
=
(
i
/
width_col
)
%
height_col
;
const
int
c
=
(
i
/
width_col
/
height_col
)
%
offset_channels
;
const
int
b
=
(
i
/
width_col
/
height_col
)
/
offset_channels
;
const
int
deformable_group_index
=
c
/
(
2
*
kernel_h
*
kernel_w
);
const
int
col_step
=
kernel_h
*
kernel_w
;
int
cnt
=
0
;
const
T
*
data_col_ptr
=
data_col
+
deformable_group_index
*
channel_per_deformable_group
*
batch_size
*
width_col
*
height_col
;
const
T
*
data_im_ptr
=
data_im
+
(
b
*
deformable_group
+
deformable_group_index
)
*
channel_per_deformable_group
/
kernel_h
/
kernel_w
*
height
*
width
;
const
T
*
data_offset_ptr
=
data_offset
+
(
b
*
deformable_group
+
deformable_group_index
)
*
2
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
const
int
offset_c
=
c
-
deformable_group_index
*
2
*
kernel_h
*
kernel_w
;
for
(
int
col_c
=
offset_c
/
2
;
col_c
<
channel_per_deformable_group
;
col_c
+=
col_step
)
{
const
int
col_pos
=
(((
col_c
*
batch_size
+
b
)
*
height_col
)
+
h
)
*
width_col
+
w
;
const
int
bp_dir
=
offset_c
%
2
;
int
j
=
(
col_pos
/
width_col
/
height_col
/
batch_size
)
%
kernel_w
;
int
i
=
(
col_pos
/
width_col
/
height_col
/
batch_size
/
kernel_w
)
%
kernel_h
;
int
w_out
=
col_pos
%
width_col
;
int
h_out
=
(
col_pos
/
width_col
)
%
height_col
;
int
w_in
=
w_out
*
stride_w
-
pad_w
;
int
h_in
=
h_out
*
stride_h
-
pad_h
;
const
int
data_offset_h_ptr
=
(((
2
*
(
i
*
kernel_w
+
j
))
*
height_col
+
h_out
)
*
width_col
+
w_out
);
const
int
data_offset_w_ptr
=
(((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_out
)
*
width_col
+
w_out
);
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
];
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
];
T
inv_h
=
h_in
+
i
*
dilation_h
+
offset_h
;
T
inv_w
=
w_in
+
j
*
dilation_w
+
offset_w
;
if
(
inv_h
<=
-
1
||
inv_w
<=
-
1
||
inv_h
>=
height
||
inv_w
>=
width
)
{
inv_h
=
inv_w
=
-
2
;
}
else
{
mval
+=
data_col_ptr
[
col_pos
]
*
DmcnIm2colBilinear
(
data_im_ptr
+
cnt
*
height
*
width
,
width
,
height
,
width
,
inv_h
,
inv_w
);
}
const
T
weight
=
DmcnGetCoordinateWeight
(
inv_h
,
inv_w
,
height
,
width
,
data_im_ptr
+
cnt
*
height
*
width
,
width
,
bp_dir
);
val
+=
weight
*
data_col_ptr
[
col_pos
];
cnt
+=
1
;
}
grad_offset
[
i
]
=
val
;
}
}
template
<
typename
T
>
inline
void
DeformableCol2imCoordCPU
(
const
platform
::
CPUDeviceContext
&
ctx
,
const
T
*
data_col
,
const
T
*
data_im
,
const
T
*
data_offset
,
const
std
::
vector
<
int64_t
>
im_shape
,
const
std
::
vector
<
int64_t
>
col_shape
,
const
std
::
vector
<
int64_t
>
kernel_shape
,
const
std
::
vector
<
int
>
paddings
,
const
std
::
vector
<
int
>
strides
,
const
std
::
vector
<
int
>
dilations
,
const
int
deformable_groups
,
T
*
grad_offset
)
{
int
num_kernels
=
2
*
kernel_shape
[
2
]
*
kernel_shape
[
3
]
*
col_shape
[
1
]
*
col_shape
[
2
]
*
col_shape
[
3
]
*
deformable_groups
;
int
channel_per_deformable_group
=
col_shape
[
0
]
/
deformable_groups
;
DeformableCol2imCoordCPUKernel
(
num_kernels
,
data_col
,
data_im
,
data_offset
,
im_shape
[
0
],
im_shape
[
1
],
im_shape
[
2
],
kernel_shape
[
2
],
kernel_shape
[
3
],
paddings
[
0
],
paddings
[
1
],
strides
[
0
],
strides
[
1
],
dilations
[
0
],
dilations
[
1
],
channel_per_deformable_group
,
col_shape
[
1
],
2
*
kernel_shape
[
2
]
*
kernel_shape
[
3
]
*
deformable_groups
,
deformable_groups
,
col_shape
[
2
],
col_shape
[
3
],
grad_offset
);
}
template
<
typename
T
>
void
DeformableIm2colCPUKernel
(
const
int
num_kernels
,
const
T
*
data_im
,
const
T
*
data_offset
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
channel_per_deformable_group
,
const
int
batch_size
,
const
int
num_channels
,
const
int
deformable_group
,
const
int
height_col
,
const
int
width_col
,
T
*
data_col
)
{
for
(
size_t
i
=
0
;
i
<
num_kernels
;
i
++
)
{
const
int
w_col
=
i
%
width_col
;
const
int
h_col
=
(
i
/
width_col
)
%
height_col
;
const
int
b_col
=
(
i
/
width_col
)
/
height_col
%
batch_size
;
const
int
c_im
=
(
i
/
width_col
/
height_col
)
/
batch_size
;
const
int
c_col
=
c_im
*
kernel_h
*
kernel_w
;
const
int
deformable_group_index
=
c_im
/
channel_per_deformable_group
;
const
int
h_in
=
h_col
*
stride_h
-
pad_h
;
const
int
w_in
=
w_col
*
stride_w
-
pad_w
;
T
*
data_col_ptr
=
data_col
+
((
c_col
*
batch_size
+
b_col
)
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
T
*
data_im_ptr
=
data_im
+
(
b_col
*
num_channels
+
c_im
)
*
height
*
width
;
const
T
*
data_offset_ptr
=
data_offset
+
(
b_col
*
deformable_group
+
deformable_group_index
)
*
2
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
for
(
int
i
=
0
;
i
<
kernel_h
;
++
i
)
{
for
(
int
j
=
0
;
j
<
kernel_w
;
++
j
)
{
const
int
data_offset_h_ptr
=
((
2
*
(
i
*
kernel_w
+
j
))
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
int
data_offset_w_ptr
=
((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
];
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
];
T
val
=
static_cast
<
T
>
(
0
);
const
T
h_im
=
h_in
+
i
*
dilation_h
+
offset_h
;
const
T
w_im
=
w_in
+
j
*
dilation_w
+
offset_w
;
if
(
h_im
>
-
1
&&
w_im
>
-
1
&&
h_im
<
height
&&
w_im
<
width
)
{
val
=
DmcnIm2colBilinear
(
data_im_ptr
,
width
,
height
,
width
,
h_im
,
w_im
);
}
*
data_col_ptr
=
val
;
data_col_ptr
+=
batch_size
*
height_col
*
width_col
;
}
}
}
}
template
<
typename
T
>
inline
void
DeformableIm2colCPU
(
const
platform
::
CPUDeviceContext
&
ctx
,
const
T
*
data_im
,
const
T
*
data_offset
,
const
std
::
vector
<
int64_t
>
im_shape
,
const
std
::
vector
<
int64_t
>
col_shape
,
const
std
::
vector
<
int64_t
>
filter_shape
,
const
std
::
vector
<
int
>
paddings
,
const
std
::
vector
<
int
>
strides
,
const
std
::
vector
<
int
>
dilations
,
const
int
deformable_groups
,
T
*
data_col
)
{
int
channel_per_deformable_group
=
im_shape
[
0
]
/
deformable_groups
;
int
num_kernels
=
im_shape
[
0
]
*
col_shape
[
1
]
*
col_shape
[
2
]
*
col_shape
[
3
];
// get outputs of im2col with offset by bilinear interpolation
DeformableIm2colCPUKernel
(
num_kernels
,
data_im
,
data_offset
,
im_shape
[
1
],
im_shape
[
2
],
filter_shape
[
2
],
filter_shape
[
3
],
paddings
[
0
],
paddings
[
1
],
strides
[
0
],
strides
[
1
],
dilations
[
0
],
dilations
[
1
],
channel_per_deformable_group
,
col_shape
[
1
],
im_shape
[
0
],
deformable_groups
,
col_shape
[
2
],
col_shape
[
3
],
data_col
);
}
template
<
typename
T
>
class
DeformableConvV1CPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
*
offset
=
ctx
.
Input
<
Tensor
>
(
"Offset"
);
Tensor
filter
=
*
ctx
.
Input
<
Tensor
>
(
"Filter"
);
Tensor
*
output
=
ctx
.
Output
<
Tensor
>
(
"Output"
);
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
&
dev_ctx
=
ctx
.
template
device_context
<
CPUDeviceContext
>();
const
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
const
int
deformable_groups
=
ctx
.
Attr
<
int
>
(
"deformable_groups"
);
const
int
im2col_step
=
ctx
.
Attr
<
int
>
(
"im2col_step"
);
const
std
::
vector
<
int
>
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
const
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
const
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
std
::
vector
<
int64_t
>
filter_shape_vec
(
framework
::
vectorize
(
filter
.
dims
()));
std
::
vector
<
int64_t
>
output_shape_vec
(
framework
::
vectorize
(
output
->
dims
()));
// col_shape_vec: {c_i * k_h * k_w, im2col_step, o_h, o_w}
std
::
vector
<
int64_t
>
col_buffer_shape_vec
(
filter_shape_vec
.
size
());
col_buffer_shape_vec
[
0
]
=
input
->
dims
()[
1
]
*
filter
.
dims
()[
2
]
*
filter
.
dims
()[
3
];
col_buffer_shape_vec
[
1
]
=
im2col_step
;
for
(
size_t
j
=
0
;
j
<
filter_shape_vec
.
size
()
-
2
;
++
j
)
{
col_buffer_shape_vec
[
j
+
2
]
=
output_shape_vec
[
j
+
2
];
}
framework
::
DDim
col_shape
(
framework
::
make_ddim
(
col_buffer_shape_vec
));
std
::
vector
<
int64_t
>
output_buffer_shape_vec
(
1
);
output_buffer_shape_vec
[
0
]
=
batch_size
*
output_shape_vec
[
1
]
*
output_shape_vec
[
2
]
*
output_shape_vec
[
3
];
framework
::
DDim
output_shape
(
framework
::
make_ddim
(
output_buffer_shape_vec
));
Tensor
col_buffer
;
Tensor
output_buffer
;
col_buffer
=
ctx
.
AllocateTmpTensor
<
T
,
CPUDeviceContext
>
(
col_shape
,
dev_ctx
);
output_buffer
=
ctx
.
AllocateTmpTensor
<
T
,
CPUDeviceContext
>
(
output_shape
,
dev_ctx
);
int64_t
M
=
output_shape_vec
[
1
]
/
groups
;
int64_t
N
=
im2col_step
*
output_shape_vec
[
2
]
*
output_shape_vec
[
3
];
int64_t
K
=
input
->
dims
()[
1
]
*
filter_shape_vec
[
2
]
*
filter_shape_vec
[
3
]
/
groups
;
Tensor
weight_3d
;
weight_3d
.
ShareDataWith
(
filter
).
Resize
(
framework
::
make_ddim
({
groups
,
M
,
K
}));
Tensor
col_buffer_3d
;
col_buffer_3d
.
ShareDataWith
(
col_buffer
)
.
Resize
(
framework
::
make_ddim
({
groups
,
K
,
N
}));
Tensor
output_4d
;
output_4d
.
ShareDataWith
(
output_buffer
)
.
Resize
(
framework
::
make_ddim
({
batch_size
/
im2col_step
,
groups
,
M
,
N
}));
output_4d
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
framework
::
DDim
input_shape
=
framework
::
slice_ddim
(
input
->
dims
(),
1
,
input
->
dims
().
size
());
std
::
vector
<
int64_t
>
input_shape_vec
=
framework
::
vectorize
(
input_shape
);
int
input_dim
=
input
->
numel
()
/
input
->
dims
()[
0
];
int
input_offset_dim
=
offset
->
numel
()
/
offset
->
dims
()[
0
];
auto
blas
=
math
::
GetBlas
<
CPUDeviceContext
,
T
>
(
dev_ctx
);
const
T
*
input_ptr
=
input
->
data
<
T
>
();
const
T
*
offset_ptr
=
offset
->
data
<
T
>
();
col_buffer
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
col_buffer_ptr
=
col_buffer
.
data
<
T
>
();
for
(
int
i
=
0
;
i
<
batch_size
/
im2col_step
;
++
i
)
{
DeformableIm2colCPU
(
dev_ctx
,
input_ptr
+
i
*
im2col_step
*
input_dim
,
offset_ptr
+
i
*
im2col_step
*
input_offset_dim
,
input_shape_vec
,
col_buffer_shape_vec
,
filter_shape_vec
,
paddings
,
strides
,
dilations
,
deformable_groups
,
col_buffer_ptr
);
Tensor
output_3d
=
output_4d
.
Slice
(
i
,
i
+
1
).
Resize
(
framework
::
slice_ddim
(
output_4d
.
dims
(),
1
,
output_4d
.
dims
().
size
()));
// get the product of pixel and weight
for
(
int
g
=
0
;
g
<
groups
;
++
g
)
{
Tensor
weight_3d_slice
=
weight_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
weight_3d
.
dims
(),
1
,
weight_3d
.
dims
().
size
()));
Tensor
col_buffer_3d_slice
=
col_buffer_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
col_buffer_3d
.
dims
(),
1
,
col_buffer_3d
.
dims
().
size
()));
Tensor
output_3d_slice
=
output_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
output_3d
.
dims
(),
1
,
output_3d
.
dims
().
size
()));
blas
.
MatMul
(
weight_3d_slice
,
false
,
col_buffer_3d_slice
,
false
,
T
(
1.0
),
&
output_3d_slice
,
T
(
0.0
));
}
}
output
->
ShareDataWith
(
output_buffer
)
.
Resize
(
framework
::
make_ddim
(
output_shape_vec
));
}
};
template
<
typename
T
>
class
DeformableConvV1GradCPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
Tensor
*
output_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Output"
));
Tensor
*
input_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Input"
));
Tensor
*
filter_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Filter"
));
Tensor
*
offset_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Offset"
));
const
Tensor
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
Tensor
offset
=
*
ctx
.
Input
<
Tensor
>
(
"Offset"
);
Tensor
filter
=
*
ctx
.
Input
<
Tensor
>
(
"Filter"
);
if
(
!
input_grad
&&
!
filter_grad
&&
!
offset_grad
)
return
;
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
int
deformable_groups
=
ctx
.
Attr
<
int
>
(
"deformable_groups"
);
int
im2col_step
=
ctx
.
Attr
<
int
>
(
"im2col_step"
);
std
::
vector
<
int
>
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
CPUDeviceContext
>();
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
framework
::
DDim
input_shape
=
framework
::
slice_ddim
(
input
->
dims
(),
1
,
input
->
dims
().
size
());
std
::
vector
<
int64_t
>
input_shape_vec
=
framework
::
vectorize
(
input_shape
);
std
::
vector
<
int64_t
>
filter_shape_vec
(
framework
::
vectorize
(
filter
.
dims
()));
std
::
vector
<
int64_t
>
output_shape_vec
(
framework
::
vectorize
(
output_grad
->
dims
()));
std
::
vector
<
int64_t
>
col_buffer_shape_vec
(
filter_shape_vec
.
size
());
col_buffer_shape_vec
[
0
]
=
input
->
dims
()[
1
]
*
filter
.
dims
()[
2
]
*
filter
.
dims
()[
3
];
col_buffer_shape_vec
[
1
]
=
im2col_step
;
for
(
size_t
j
=
0
;
j
<
filter_shape_vec
.
size
()
-
2
;
++
j
)
{
col_buffer_shape_vec
[
j
+
2
]
=
output_shape_vec
[
j
+
2
];
}
framework
::
DDim
col_shape
(
framework
::
make_ddim
(
col_buffer_shape_vec
));
std
::
vector
<
int64_t
>
output_buffer_shape_vec
(
1
);
output_buffer_shape_vec
[
0
]
=
batch_size
*
output_shape_vec
[
1
]
*
output_shape_vec
[
2
]
*
output_shape_vec
[
3
];
framework
::
DDim
output_shape
(
framework
::
make_ddim
(
output_buffer_shape_vec
));
Tensor
col_buffer
;
Tensor
output_buffer
;
col_buffer
=
ctx
.
AllocateTmpTensor
<
T
,
CPUDeviceContext
>
(
col_shape
,
dev_ctx
);
output_buffer
=
ctx
.
AllocateTmpTensor
<
T
,
CPUDeviceContext
>
(
output_shape
,
dev_ctx
);
output_buffer
.
ShareDataWith
(
*
output_grad
);
int64_t
M
=
input_shape_vec
[
0
]
/
groups
*
filter_shape_vec
[
2
]
*
filter_shape_vec
[
3
];
int64_t
N
=
im2col_step
*
output_shape_vec
[
2
]
*
output_shape_vec
[
3
];
int64_t
K
=
output_shape_vec
[
1
]
/
groups
;
framework
::
DDim
weight_3d_shape
=
{
groups
,
K
,
M
};
framework
::
DDim
out_grad_4d_shape
=
{
batch_size
/
im2col_step
,
groups
,
K
,
N
};
framework
::
DDim
col_buffer_3d_shape
=
{
groups
,
M
,
N
};
framework
::
DDim
filter_grad_shape
=
{
groups
,
K
,
M
};
Tensor
weight_3d
;
weight_3d
.
ShareDataWith
(
filter
).
Resize
(
weight_3d_shape
);
Tensor
out_grad_4d
;
out_grad_4d
.
ShareDataWith
(
output_buffer
).
Resize
(
out_grad_4d_shape
);
Tensor
col_buffer_3d
;
col_buffer_3d
.
ShareDataWith
(
col_buffer
).
Resize
(
col_buffer_3d_shape
);
math
::
SetConstant
<
CPUDeviceContext
,
T
>
set_zero
;
auto
blas
=
math
::
GetBlas
<
CPUDeviceContext
,
T
>
(
dev_ctx
);
col_buffer
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
col_buffer_3d
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
out_grad_4d
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int
input_dim
=
input
->
numel
()
/
input
->
dims
()[
0
];
int
input_offset_dim
=
offset
.
numel
()
/
offset
.
dims
()[
0
];
if
(
filter_grad
)
{
filter_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
filter_grad
->
Resize
(
filter_grad_shape
);
set_zero
(
dev_ctx
,
filter_grad
,
static_cast
<
T
>
(
0
));
}
if
(
input_grad
)
{
input_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
set_zero
(
dev_ctx
,
input_grad
,
static_cast
<
T
>
(
0
));
}
if
(
offset_grad
)
{
offset_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
set_zero
(
dev_ctx
,
offset_grad
,
static_cast
<
T
>
(
0
));
}
for
(
int
i
=
0
;
i
<
batch_size
/
im2col_step
;
++
i
)
{
Tensor
out_grad_3d
=
out_grad_4d
.
Slice
(
i
,
i
+
1
).
Resize
(
framework
::
slice_ddim
(
out_grad_4d
.
dims
(),
1
,
out_grad_4d
.
dims
().
size
()));
for
(
int
g
=
0
;
g
<
groups
;
++
g
)
{
Tensor
weight_3d_slice
=
weight_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
weight_3d
.
dims
(),
1
,
weight_3d
.
dims
().
size
()));
Tensor
out_grad_3d_slice
=
out_grad_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
out_grad_3d
.
dims
(),
1
,
out_grad_3d
.
dims
().
size
()));
Tensor
col_buffer_3d_slice
=
col_buffer_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
col_buffer_3d
.
dims
(),
1
,
col_buffer_3d
.
dims
().
size
()));
blas
.
MatMul
(
weight_3d_slice
,
true
,
out_grad_3d_slice
,
false
,
T
(
1.0
),
&
col_buffer_3d_slice
,
T
(
0.0
));
}
col_buffer
.
Resize
(
col_shape
);
T
*
col_buffer_ptr
=
col_buffer
.
data
<
T
>
();
const
T
*
input_ptr
=
input
->
data
<
T
>
();
const
T
*
offset_ptr
=
offset
.
data
<
T
>
();
if
(
offset_grad
)
{
T
*
offset_grad_ptr
=
offset_grad
->
data
<
T
>
();
// get grad of offset
DeformableCol2imCoordCPU
(
dev_ctx
,
col_buffer_ptr
,
input_ptr
+
i
*
im2col_step
*
input_dim
,
offset_ptr
+
i
*
im2col_step
*
input_offset_dim
,
input_shape_vec
,
col_buffer_shape_vec
,
filter_shape_vec
,
paddings
,
strides
,
dilations
,
deformable_groups
,
offset_grad_ptr
+
i
*
im2col_step
*
input_offset_dim
);
}
if
(
input_grad
)
{
T
*
input_grad_ptr
=
input_grad
->
data
<
T
>
();
// get grad of input
DeformableCol2imCPU
(
dev_ctx
,
col_buffer_ptr
,
offset_ptr
+
i
*
im2col_step
*
input_offset_dim
,
input_shape_vec
,
col_buffer_shape_vec
,
filter_shape_vec
,
paddings
,
strides
,
dilations
,
deformable_groups
,
input_grad_ptr
+
i
*
im2col_step
*
input_dim
);
input_grad
->
Resize
(
input
->
dims
());
}
DeformableIm2colCPU
(
dev_ctx
,
input_ptr
+
i
*
im2col_step
*
input_dim
,
offset_ptr
+
i
*
im2col_step
*
input_offset_dim
,
input_shape_vec
,
col_buffer_shape_vec
,
filter_shape_vec
,
paddings
,
strides
,
dilations
,
deformable_groups
,
col_buffer_ptr
);
col_buffer_3d
.
Resize
(
col_buffer_3d_shape
);
if
(
filter_grad
)
{
Tensor
dweight_3d
;
dweight_3d
=
ctx
.
AllocateTmpTensor
<
T
,
CPUDeviceContext
>
(
filter_grad_shape
,
dev_ctx
);
for
(
int
g
=
0
;
g
<
groups
;
++
g
)
{
Tensor
out_grad_3d_slice
=
out_grad_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
out_grad_3d
.
dims
(),
1
,
out_grad_3d
.
dims
().
size
()));
Tensor
col_buffer_3d_slice
=
col_buffer_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
col_buffer_3d
.
dims
(),
1
,
col_buffer_3d
.
dims
().
size
()));
Tensor
dweight_3d_slice
=
dweight_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
framework
::
slice_ddim
(
dweight_3d
.
dims
(),
1
,
dweight_3d
.
dims
().
size
()));
blas
.
MatMul
(
out_grad_3d_slice
,
false
,
col_buffer_3d_slice
,
true
,
T
(
1.0
),
&
dweight_3d_slice
,
T
(
0.0
));
}
// update grad of weights
FilterGradAddupCPUKernel
(
dweight_3d
.
numel
(),
groups
,
K
,
M
,
dweight_3d
.
data
<
T
>
(),
filter_grad
->
data
<
T
>
());
}
}
if
(
filter_grad
)
{
filter_grad
->
Resize
(
filter
.
dims
());
}
}
};
}
// namespace operators
}
// namespace paddle
python/paddle/fluid/layers/nn.py
浏览文件 @
00efd1d8
...
@@ -13196,20 +13196,30 @@ def deformable_conv(input,
...
@@ -13196,20 +13196,30 @@ def deformable_conv(input,
im2col_step
=
None
,
im2col_step
=
None
,
param_attr
=
None
,
param_attr
=
None
,
bias_attr
=
None
,
bias_attr
=
None
,
modulated
=
True
,
name
=
None
):
name
=
None
):
"""
"""
**Deformable Convolution Layer**
**Deformable Convolution Layer**
Compute 2-D deformable convolution on 4-D input.
Compute 2-D deformable convolution on 4-D input.
Given input image x, output feature map y, the deformable convolution operation can be expressed as follow:
Given input image x, output feature map y, the deformable convolution operation can be expressed as follow:
Deformable Convolution v2:
.. math::
.. math::
y(p) = \sum_{k=1}^{K}{w_k * x(p + p_k + \Delta p_k) * \Delta m_k}
y(p) = \sum_{k=1}^{K}{w_k * x(p + p_k + \Delta p_k) * \Delta m_k}
Deformable Convolution v1:
Where :math:`\Delta p_k` and :math:`\Delta m_k` are the learnable offset and modulation scalar for the k-th location, respectively.
.. math::
Refer to `Deformable ConvNets v2: More Deformable, Better Results
<https://arxiv.org/abs/1811.11168v2>`_ .
y(p) = \sum_{k=1}^{K}{w_k * x(p + p_k + \Delta p_k)}
Where :math:`\Delta p_k` and :math:`\Delta m_k` are the learnable offset and modulation scalar for the k-th location,
which :math:`\Delta m_k` is one in deformable convolution v1. Please refer to `Deformable ConvNets v2: More Deformable, Better Results
<https://arxiv.org/abs/1811.11168v2>`_ and `Deformable Convolutional Networks <https://arxiv.org/abs/1703.06211>`_.
Example:
Example:
- Input:
- Input:
...
@@ -13235,7 +13245,7 @@ def deformable_conv(input,
...
@@ -13235,7 +13245,7 @@ def deformable_conv(input,
Args:
Args:
input (Variable): The input image with [N, C, H, W] format.
input (Variable): The input image with [N, C, H, W] format.
offset (Variable): The input coord offset of deformable convolution layer.
offset (Variable): The input coord
inate
offset of deformable convolution layer.
Mask (Variable): The input mask of deformable covolution layer.
Mask (Variable): The input mask of deformable covolution layer.
num_filters(int): The number of filter. It is as same as the output
num_filters(int): The number of filter. It is as same as the output
image channel.
image channel.
...
@@ -13274,6 +13284,8 @@ def deformable_conv(input,
...
@@ -13274,6 +13284,8 @@ def deformable_conv(input,
to the output units. If it is set to None or one attribute of ParamAttr, conv2d
to the output units. If it is set to None or one attribute of ParamAttr, conv2d
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
is not set, the bias is initialized zero. Default: None.
modulated (bool): Make sure which version should be used between v1 and v2, where v2 is
\
used while True. Default: True.
name (str|None): A name for this layer(optional). If set None, the layer
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically. Default: None
will be named automatically. Default: None
Returns:
Returns:
...
@@ -13285,12 +13297,22 @@ def deformable_conv(input,
...
@@ -13285,12 +13297,22 @@ def deformable_conv(input,
Examples:
Examples:
.. code-block:: python
.. code-block:: python
#deformable conv v2:
import paddle.fluid as fluid
import paddle.fluid as fluid
data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32')
data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32')
offset = fluid.layers.data(name='offset', shape=[18, 32, 32], dtype='float32')
offset = fluid.layers.data(name='offset', shape=[18, 32, 32], dtype='float32')
mask = fluid.layers.data(name='mask', shape=[9, 32, 32], dtype='float32')
mask = fluid.layers.data(name='mask', shape=[9, 32, 32], dtype='float32')
out = fluid.layers.deformable_conv(input=data, offset=offset, mask=mask,
out = fluid.layers.deformable_conv(input=data, offset=offset, mask=mask,
num_filters=2, filter_size=3, padding=1)
num_filters=2, filter_size=3, padding=1, modulated=True)
#deformable conv v1:
import paddle.fluid as fluid
data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32')
offset = fluid.layers.data(name='offset', shape=[18, 32, 32], dtype='float32')
out = fluid.layers.deformable_conv(input=data, offset=offset, mask=None,
num_filters=2, filter_size=3, padding=1, modulated=False)
"""
"""
num_channels
=
input
.
shape
[
1
]
num_channels
=
input
.
shape
[
1
]
...
@@ -13303,8 +13325,6 @@ def deformable_conv(input,
...
@@ -13303,8 +13325,6 @@ def deformable_conv(input,
raise
TypeError
(
"Input of deformable_conv must be Variable"
)
raise
TypeError
(
"Input of deformable_conv must be Variable"
)
if
not
isinstance
(
offset
,
Variable
):
if
not
isinstance
(
offset
,
Variable
):
raise
TypeError
(
"Input Offset of deformable_conv must be Variable"
)
raise
TypeError
(
"Input Offset of deformable_conv must be Variable"
)
if
not
isinstance
(
mask
,
Variable
):
raise
TypeError
(
"Input Mask of deformable_conv must be Variable"
)
if
groups
is
None
:
if
groups
is
None
:
num_filter_channels
=
num_channels
num_filter_channels
=
num_channels
...
@@ -13334,23 +13354,42 @@ def deformable_conv(input,
...
@@ -13334,23 +13354,42 @@ def deformable_conv(input,
pre_bias
=
helper
.
create_variable_for_type_inference
(
dtype
)
pre_bias
=
helper
.
create_variable_for_type_inference
(
dtype
)
helper
.
append_op
(
if
modulated
:
type
=
'deformable_conv'
,
helper
.
append_op
(
inputs
=
{
type
=
'deformable_conv'
,
'Input'
:
input
,
inputs
=
{
'Filter'
:
filter_param
,
'Input'
:
input
,
'Offset'
:
offset
,
'Filter'
:
filter_param
,
'Mask'
:
mask
,
'Offset'
:
offset
,
},
'Mask'
:
mask
,
outputs
=
{
"Output"
:
pre_bias
},
},
attrs
=
{
outputs
=
{
"Output"
:
pre_bias
},
'strides'
:
stride
,
attrs
=
{
'paddings'
:
padding
,
'strides'
:
stride
,
'dilations'
:
dilation
,
'paddings'
:
padding
,
'groups'
:
groups
,
'dilations'
:
dilation
,
'deformable_groups'
:
deformable_groups
,
'groups'
:
groups
,
'im2col_step'
:
im2col_step
,
'deformable_groups'
:
deformable_groups
,
})
'im2col_step'
:
im2col_step
,
})
else
:
helper
.
append_op
(
type
=
'deformable_conv_v1'
,
inputs
=
{
'Input'
:
input
,
'Filter'
:
filter_param
,
'Offset'
:
offset
,
},
outputs
=
{
"Output"
:
pre_bias
},
attrs
=
{
'strides'
:
stride
,
'paddings'
:
padding
,
'dilations'
:
dilation
,
'groups'
:
groups
,
'deformable_groups'
:
deformable_groups
,
'im2col_step'
:
im2col_step
,
})
output
=
helper
.
append_bias_op
(
pre_bias
,
dim_start
=
1
,
dim_end
=
2
)
output
=
helper
.
append_bias_op
(
pre_bias
,
dim_start
=
1
,
dim_end
=
2
)
return
output
return
output
...
...
python/paddle/fluid/tests/unittests/test_deformable_conv_op.py
浏览文件 @
00efd1d8
...
@@ -145,48 +145,35 @@ class TestModulatedDeformableConvOp(OpTest):
...
@@ -145,48 +145,35 @@ class TestModulatedDeformableConvOp(OpTest):
}
}
self
.
outputs
=
{
'Output'
:
output
}
self
.
outputs
=
{
'Output'
:
output
}
def
has_cuda
(
self
):
return
core
.
is_compiled_with_cuda
()
def
test_check_output
(
self
):
def
test_check_output
(
self
):
if
self
.
has_cuda
():
self
.
check_output
(
atol
=
1e-5
)
place
=
core
.
CUDAPlace
(
0
)
self
.
check_output_with_place
(
place
,
atol
=
1e-5
)
def
test_check_grad
(
self
):
def
test_check_grad
(
self
):
if
self
.
has_cuda
():
self
.
check_grad
(
place
=
core
.
CUDAPlace
(
0
)
{
'Input'
,
'Offset'
,
'Mask'
,
'Filter'
},
self
.
check_grad_with_place
(
'Output'
,
place
,
{
'Input'
,
'Offset'
,
'Mask'
,
'Filter'
},
max_relative_error
=
0.05
)
'Output'
,
max_relative_error
=
0.05
)
def
test_check_grad_no_filter
(
self
):
def
test_check_grad_no_filter
(
self
):
if
self
.
has_cuda
():
self
.
check_grad
(
place
=
core
.
CUDAPlace
(
0
)
[
'Input'
,
'Offset'
,
'Mask'
],
self
.
check_grad_with_place
(
'Output'
,
place
,
[
'Input'
,
'Offset'
,
'Mask'
],
max_relative_error
=
0.1
,
'Output'
,
no_grad_set
=
set
([
'Filter'
]))
max_relative_error
=
0.1
,
no_grad_set
=
set
([
'Filter'
]))
def
test_check_grad_no_input
(
self
):
def
test_check_grad_no_input
(
self
):
if
self
.
has_cuda
():
self
.
check_grad
(
place
=
core
.
CUDAPlace
(
0
)
[
'Filter'
,
'Offset'
,
'Mask'
],
self
.
check_grad_with_place
(
'Output'
,
place
,
[
'Filter'
,
'Offset'
,
'Mask'
],
max_relative_error
=
0.1
,
'Output'
,
no_grad_set
=
set
([
'Input'
]))
max_relative_error
=
0.1
,
no_grad_set
=
set
([
'Input'
]))
def
test_check_grad_no_offset_no_mask
(
self
):
def
test_check_grad_no_offset_no_mask
(
self
):
if
self
.
has_cuda
():
self
.
check_grad
(
place
=
core
.
CUDAPlace
(
0
)
[
'Input'
,
'Filter'
],
self
.
check_grad_with_place
(
'Output'
,
place
,
[
'Input'
,
'Filter'
],
max_relative_error
=
0.1
,
'Output'
,
no_grad_set
=
set
([
'Offset'
,
'Mask'
]))
max_relative_error
=
0.1
,
no_grad_set
=
set
([
'Offset'
,
'Mask'
]))
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
]
self
.
pad
=
[
1
,
1
]
...
...
python/paddle/fluid/tests/unittests/test_deformable_conv_v1_op.py
0 → 100644
浏览文件 @
00efd1d8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
paddle.fluid.core
as
core
from
op_test
import
OpTest
def
dmc_bilinear
(
data_im
,
height
,
width
,
h
,
w
):
h_low
=
int
(
np
.
floor
(
h
))
w_low
=
int
(
np
.
floor
(
w
))
h_high
=
h_low
+
1
w_high
=
w_low
+
1
lh
=
h
-
h_low
lw
=
w
-
w_low
hh
=
1
-
lh
hw
=
1
-
lw
v1
=
0
if
h_low
>=
0
and
w_low
>=
0
:
v1
=
data_im
[
h_low
,
w_low
]
v2
=
0
if
h_low
>=
0
and
w_high
<=
width
-
1
:
v2
=
data_im
[
h_low
,
w_high
]
v3
=
0
if
h_high
<=
height
-
1
and
w_low
>=
0
:
v3
=
data_im
[
h_high
,
w_low
]
v4
=
0
if
h_high
<=
height
-
1
and
w_high
<=
width
-
1
:
v4
=
data_im
[
h_high
,
w_high
]
w1
,
w2
,
w3
,
w4
=
hh
*
hw
,
hh
*
lw
,
lh
*
hw
,
lh
*
lw
val
=
w1
*
v1
+
w2
*
v2
+
w3
*
v3
+
w4
*
v4
return
val
def
dconv_im2col_gemm
(
input
,
offset
,
filter
,
group
,
conv_param
):
in_n
,
in_c
,
in_h
,
in_w
=
input
.
shape
out_c
,
f_c
,
f_h
,
f_w
=
filter
.
shape
assert
offset
.
shape
==
(
in_n
,
2
*
f_h
*
f_w
,
in_h
,
in_w
)
assert
f_c
*
group
==
in_c
assert
np
.
mod
(
out_c
,
group
)
==
0
stride
,
pad
,
dilation
=
conv_param
[
'stride'
],
conv_param
[
'pad'
],
\
conv_param
[
'dilation'
]
out_h
=
1
+
(
in_h
+
2
*
pad
[
0
]
-
(
dilation
[
0
]
*
(
f_h
-
1
)
+
1
))
//
stride
[
0
]
out_w
=
1
+
(
in_w
+
2
*
pad
[
1
]
-
(
dilation
[
1
]
*
(
f_w
-
1
)
+
1
))
//
stride
[
1
]
assert
out_h
==
in_h
assert
out_w
==
in_w
col_buffer
=
np
.
zeros
((
in_n
,
in_c
*
f_h
*
f_w
,
in_h
*
in_w
))
for
n
in
range
(
in_n
):
for
c
in
range
(
in_c
):
for
h
in
range
(
out_h
):
for
w
in
range
(
out_w
):
for
kh
in
range
(
f_h
):
for
kw
in
range
(
f_w
):
offset_h_table
=
\
offset
[
n
,
::
2
,
h
,
w
].
reshape
(
f_h
,
f_w
)
offset_w_table
=
\
offset
[
n
,
1
::
2
,
h
,
w
].
reshape
(
f_h
,
f_w
)
offset_h
=
offset_h_table
[
kh
,
kw
]
offset_w
=
offset_w_table
[
kh
,
kw
]
val
=
0
im_h
=
h
*
stride
[
0
]
+
kh
*
dilation
[
0
]
\
+
offset_h
-
pad
[
0
]
im_w
=
w
*
stride
[
0
]
+
kw
*
dilation
[
0
]
\
+
offset_w
-
pad
[
1
]
if
im_h
>
-
1
and
im_w
>
-
1
and
\
im_h
<
in_h
and
im_w
<
in_h
:
val
=
dmc_bilinear
(
input
[
n
,
c
],
in_h
,
in_w
,
im_h
,
im_w
)
val_out
=
val
col_buffer
[
n
,
c
*
f_h
*
f_w
+
kh
*
f_w
+
kw
,
h
*
in_w
+
w
]
=
val_out
out
=
np
.
zeros
((
in_n
,
group
,
int
(
out_c
//
group
),
out_h
*
out_w
))
weight
=
filter
.
reshape
(
group
,
int
(
out_c
//
group
),
f_c
*
f_h
*
f_w
)
col_buffer
=
col_buffer
.
reshape
(
(
in_n
,
group
,
int
(
in_c
//
group
*
f_h
*
f_w
),
in_h
*
in_w
))
for
n
in
range
(
in_n
):
for
g
in
range
(
group
):
out
[
n
,
g
]
=
np
.
matmul
(
weight
[
g
],
col_buffer
[
n
,
g
])
out
=
out
.
reshape
(
in_n
,
out_c
,
out_h
,
out_w
)
return
out
class
TestModulatedDeformableConvOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"deformable_conv_v1"
self
.
dtype
=
np
.
float32
self
.
init_group
()
self
.
init_dilation
()
self
.
init_test_case
()
conv_param
=
{
'stride'
:
self
.
stride
,
'pad'
:
self
.
pad
,
'dilation'
:
self
.
dilations
}
input
=
np
.
random
.
random
(
self
.
input_size
).
astype
(
self
.
dtype
)
offset
=
10
*
np
.
random
.
random
(
self
.
offset_size
).
astype
(
self
.
dtype
)
filter
=
np
.
random
.
random
(
self
.
filter_size
).
astype
(
self
.
dtype
)
output
=
dconv_im2col_gemm
(
input
,
offset
,
filter
,
self
.
groups
,
conv_param
)
output
=
output
.
astype
(
self
.
dtype
)
self
.
inputs
=
{
'Input'
:
OpTest
.
np_dtype_to_fluid_dtype
(
input
),
'Offset'
:
OpTest
.
np_dtype_to_fluid_dtype
(
offset
),
'Filter'
:
OpTest
.
np_dtype_to_fluid_dtype
(
filter
)
}
self
.
attrs
=
{
'strides'
:
self
.
stride
,
'paddings'
:
self
.
pad
,
'groups'
:
self
.
groups
,
'deformable_groups'
:
self
.
deformable_groups
,
'im2col_step'
:
self
.
im2col_step
,
'dilations'
:
self
.
dilations
,
}
self
.
outputs
=
{
'Output'
:
output
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
(
[
'Input'
,
'Offset'
,
'Filter'
],
'Output'
,
max_relative_error
=
0.05
)
def
test_check_grad_no_filter
(
self
):
self
.
check_grad
(
[
'Input'
,
'Offset'
],
'Output'
,
max_relative_error
=
0.1
,
no_grad_set
=
set
([
'Filter'
]))
def
init_test_case
(
self
):
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
dilations
=
[
1
,
1
]
self
.
input_size
=
[
2
,
4
,
4
,
4
]
# NCHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
//
self
.
groups
self
.
filter_size
=
[
4
,
f_c
,
3
,
3
]
self
.
im2col_step
=
1
self
.
deformable_groups
=
1
offset_c
=
2
*
self
.
deformable_groups
*
self
.
filter_size
[
2
]
*
self
.
filter_size
[
3
]
self
.
offset_size
=
[
self
.
input_size
[
0
],
offset_c
,
self
.
input_size
[
2
],
self
.
input_size
[
3
]
]
def
init_dilation
(
self
):
self
.
dilations
=
[
1
,
1
]
def
init_group
(
self
):
self
.
groups
=
1
class
TestWithStride
(
TestModulatedDeformableConvOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
3
,
3
]
self
.
stride
=
[
2
,
2
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
//
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
]
self
.
im2col_step
=
1
self
.
deformable_groups
=
1
offset_c
=
2
*
self
.
deformable_groups
*
self
.
filter_size
[
2
]
*
self
.
filter_size
[
3
]
self
.
offset_size
=
[
self
.
input_size
[
0
],
offset_c
,
self
.
input_size
[
2
],
self
.
input_size
[
3
]
]
class
TestWithDilation
(
TestModulatedDeformableConvOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
2
,
2
]
self
.
stride
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
4
,
4
]
# NCHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
//
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
]
self
.
im2col_step
=
1
self
.
deformable_groups
=
1
offset_c
=
2
*
self
.
deformable_groups
*
self
.
filter_size
[
2
]
*
self
.
filter_size
[
3
]
self
.
offset_size
=
[
self
.
input_size
[
0
],
offset_c
,
self
.
input_size
[
2
],
self
.
input_size
[
3
]
]
def
init_dilation
(
self
):
self
.
dilations
=
[
2
,
2
]
class
TestWith1x1
(
TestModulatedDeformableConvOp
):
def
init_test_case
(
self
):
self
.
pad
=
[
0
,
0
]
self
.
stride
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
//
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
1
,
1
]
self
.
im2col_step
=
1
self
.
deformable_groups
=
1
offset_c
=
2
*
self
.
deformable_groups
*
self
.
filter_size
[
2
]
*
self
.
filter_size
[
3
]
self
.
offset_size
=
[
self
.
input_size
[
0
],
offset_c
,
self
.
input_size
[
2
],
self
.
input_size
[
3
]
]
class
TestWithGroup
(
TestModulatedDeformableConvOp
):
def
init_group
(
self
):
self
.
groups
=
2
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
00efd1d8
...
@@ -2276,32 +2276,31 @@ class TestBook(LayerTest):
...
@@ -2276,32 +2276,31 @@ class TestBook(LayerTest):
print
(
str
(
program
))
print
(
str
(
program
))
def
test_deformable_conv
(
self
):
def
test_deformable_conv
(
self
):
if
core
.
is_compiled_with_cuda
():
with
program_guard
(
fluid
.
default_main_program
(),
with
program_guard
(
fluid
.
default_main_program
(),
fluid
.
default_startup_program
()):
fluid
.
default_startup_program
()):
input
=
layers
.
data
(
input
=
layers
.
data
(
name
=
'input'
,
name
=
'input'
,
append_batch_size
=
False
,
append_batch_size
=
False
,
shape
=
[
2
,
3
,
32
,
32
],
shape
=
[
2
,
3
,
32
,
32
],
dtype
=
"float32"
)
dtype
=
"float32"
)
offset
=
layers
.
data
(
offset
=
layers
.
data
(
name
=
'offset'
,
name
=
'offset'
,
append_batch_size
=
False
,
append_batch_size
=
False
,
shape
=
[
2
,
18
,
32
,
32
],
shape
=
[
2
,
18
,
32
,
32
],
dtype
=
"float32"
)
dtype
=
"float32"
)
mask
=
layers
.
data
(
mask
=
layers
.
data
(
name
=
'mask'
,
name
=
'mask'
,
append_batch_size
=
False
,
append_batch_size
=
False
,
shape
=
[
2
,
9
,
32
,
32
],
shape
=
[
2
,
9
,
32
,
32
],
dtype
=
"float32"
)
dtype
=
"float32"
)
out
=
layers
.
deformable_conv
(
out
=
layers
.
deformable_conv
(
input
=
input
,
input
=
input
,
offset
=
offset
,
offset
=
offset
,
mask
=
mask
,
mask
=
mask
,
num_filters
=
2
,
num_filters
=
2
,
filter_size
=
3
,
filter_size
=
3
,
padding
=
1
)
padding
=
1
)
return
(
out
)
return
(
out
)
def
test_unfold
(
self
):
def
test_unfold
(
self
):
with
self
.
static_graph
():
with
self
.
static_graph
():
...
@@ -2338,6 +2337,29 @@ class TestBook(LayerTest):
...
@@ -2338,6 +2337,29 @@ class TestBook(LayerTest):
trans_std
=
0.1
)
trans_std
=
0.1
)
return
(
out
)
return
(
out
)
def
test_deformable_conv_v1
(
self
):
with
program_guard
(
fluid
.
default_main_program
(),
fluid
.
default_startup_program
()):
input
=
layers
.
data
(
name
=
'input'
,
append_batch_size
=
False
,
shape
=
[
2
,
3
,
32
,
32
],
dtype
=
"float32"
)
offset
=
layers
.
data
(
name
=
'offset'
,
append_batch_size
=
False
,
shape
=
[
2
,
18
,
32
,
32
],
dtype
=
"float32"
)
out
=
layers
.
deformable_conv
(
input
=
input
,
offset
=
offset
,
mask
=
None
,
num_filters
=
2
,
filter_size
=
3
,
padding
=
1
,
modulated
=
False
)
return
(
out
)
def
test_retinanet_target_assign
(
self
):
def
test_retinanet_target_assign
(
self
):
with
program_guard
(
fluid
.
default_main_program
(),
with
program_guard
(
fluid
.
default_main_program
(),
fluid
.
default_startup_program
()):
fluid
.
default_startup_program
()):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录