Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
b776434c
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b776434c
编写于
12月 07, 2020
作者:
B
Bai Yifan
提交者:
GitHub
12月 07, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add deform_conv2d,DeformConv2D (#29364) (#29425)
* add deform_conv2d,DeformConv2D
上级
d094cd02
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
946 addition
and
1 deletion
+946
-1
python/paddle/fluid/tests/unittests/test_deform_conv2d.py
python/paddle/fluid/tests/unittests/test_deform_conv2d.py
+558
-0
python/paddle/vision/ops.py
python/paddle/vision/ops.py
+388
-1
未找到文件。
python/paddle/fluid/tests/unittests/test_deform_conv2d.py
0 → 100644
浏览文件 @
b776434c
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle
import
paddle.nn.functional
as
F
import
paddle.nn.initializer
as
I
import
numpy
as
np
import
unittest
from
unittest
import
TestCase
class
TestDeformConv2D
(
TestCase
):
batch_size
=
4
spatial_shape
=
(
16
,
16
)
dtype
=
"float32"
def
setUp
(
self
):
self
.
in_channels
=
3
self
.
out_channels
=
5
self
.
kernel_size
=
[
3
,
3
]
self
.
padding
=
[
0
,
0
]
self
.
stride
=
[
1
,
1
]
self
.
dilation
=
[
1
,
1
]
self
.
groups
=
1
self
.
no_bias
=
True
def
prepare
(
self
):
if
isinstance
(
self
.
kernel_size
,
int
):
filter_shape
=
(
self
.
kernel_size
,
)
*
2
else
:
filter_shape
=
tuple
(
self
.
kernel_size
)
self
.
filter_shape
=
filter_shape
self
.
weight
=
np
.
random
.
uniform
(
-
1
,
1
,
(
self
.
out_channels
,
self
.
in_channels
//
self
.
groups
)
+
filter_shape
).
astype
(
self
.
dtype
)
if
not
self
.
no_bias
:
self
.
bias
=
np
.
random
.
uniform
(
-
1
,
1
,
(
self
.
out_channels
,
)).
astype
(
self
.
dtype
)
def
out_size
(
in_size
,
pad_size
,
dilation_size
,
kernel_size
,
stride_size
):
return
(
in_size
+
2
*
pad_size
-
(
dilation_size
*
(
kernel_size
-
1
)
+
1
))
/
stride_size
+
1
out_h
=
int
(
out_size
(
self
.
spatial_shape
[
0
],
self
.
padding
[
0
],
self
.
dilation
[
0
],
self
.
kernel_size
[
0
],
self
.
stride
[
0
]))
out_w
=
int
(
out_size
(
self
.
spatial_shape
[
1
],
self
.
padding
[
1
],
self
.
dilation
[
1
],
self
.
kernel_size
[
1
],
self
.
stride
[
1
]))
out_shape
=
(
out_h
,
out_w
)
self
.
input_shape
=
(
self
.
batch_size
,
self
.
in_channels
)
+
self
.
spatial_shape
self
.
offset_shape
=
(
self
.
batch_size
,
2
*
filter_shape
[
0
]
*
filter_shape
[
1
])
+
out_shape
self
.
mask_shape
=
(
self
.
batch_size
,
filter_shape
[
0
]
*
filter_shape
[
1
]
)
+
out_shape
self
.
input
=
np
.
random
.
uniform
(
-
1
,
1
,
self
.
input_shape
).
astype
(
self
.
dtype
)
self
.
offset
=
np
.
random
.
uniform
(
-
1
,
1
,
self
.
offset_shape
).
astype
(
self
.
dtype
)
self
.
mask
=
np
.
random
.
uniform
(
-
1
,
1
,
self
.
mask_shape
).
astype
(
self
.
dtype
)
def
static_graph_case_dcn
(
self
):
main
=
paddle
.
static
.
Program
()
start
=
paddle
.
static
.
Program
()
paddle
.
enable_static
()
with
paddle
.
static
.
program_guard
(
main
,
start
):
x
=
paddle
.
static
.
data
(
"input"
,
(
-
1
,
self
.
in_channels
,
-
1
,
-
1
),
dtype
=
self
.
dtype
)
offset
=
paddle
.
static
.
data
(
"offset"
,
(
-
1
,
2
*
self
.
filter_shape
[
0
]
*
self
.
filter_shape
[
1
],
-
1
,
-
1
),
dtype
=
self
.
dtype
)
mask
=
paddle
.
static
.
data
(
"mask"
,
(
-
1
,
self
.
filter_shape
[
0
]
*
self
.
filter_shape
[
1
],
-
1
,
-
1
),
dtype
=
self
.
dtype
)
y_v1
=
paddle
.
fluid
.
layers
.
deformable_conv
(
input
=
x
,
offset
=
offset
,
mask
=
None
,
num_filters
=
self
.
out_channels
,
filter_size
=
self
.
filter_shape
,
stride
=
self
.
stride
,
padding
=
self
.
padding
,
dilation
=
self
.
dilation
,
groups
=
self
.
groups
,
deformable_groups
=
1
,
im2col_step
=
1
,
param_attr
=
I
.
Assign
(
self
.
weight
),
bias_attr
=
False
if
self
.
no_bias
else
I
.
Assign
(
self
.
bias
),
modulated
=
False
)
y_v2
=
paddle
.
fluid
.
layers
.
deformable_conv
(
input
=
x
,
offset
=
offset
,
mask
=
mask
,
num_filters
=
self
.
out_channels
,
filter_size
=
self
.
filter_shape
,
stride
=
self
.
stride
,
padding
=
self
.
padding
,
dilation
=
self
.
dilation
,
groups
=
self
.
groups
,
deformable_groups
=
1
,
im2col_step
=
1
,
param_attr
=
I
.
Assign
(
self
.
weight
),
bias_attr
=
False
if
self
.
no_bias
else
I
.
Assign
(
self
.
bias
))
exe
=
paddle
.
static
.
Executor
(
self
.
place
)
exe
.
run
(
start
)
out_v1
,
out_v2
=
exe
.
run
(
main
,
feed
=
{
"input"
:
self
.
input
,
"offset"
:
self
.
offset
,
"mask"
:
self
.
mask
},
fetch_list
=
[
y_v1
,
y_v2
])
return
out_v1
,
out_v2
def
dygraph_case_dcn
(
self
):
paddle
.
disable_static
()
x
=
paddle
.
to_tensor
(
self
.
input
)
offset
=
paddle
.
to_tensor
(
self
.
offset
)
mask
=
paddle
.
to_tensor
(
self
.
mask
)
bias
=
None
if
self
.
no_bias
else
paddle
.
to_tensor
(
self
.
bias
)
deform_conv2d
=
paddle
.
vision
.
ops
.
DeformConv2D
(
in_channels
=
self
.
in_channels
,
out_channels
=
self
.
out_channels
,
kernel_size
=
self
.
kernel_size
,
stride
=
self
.
stride
,
padding
=
self
.
padding
,
dilation
=
self
.
dilation
,
groups
=
self
.
groups
,
weight_attr
=
I
.
Assign
(
self
.
weight
),
bias_attr
=
False
if
self
.
no_bias
else
I
.
Assign
(
self
.
bias
))
y_v1
=
deform_conv2d
(
x
,
offset
)
y_v2
=
deform_conv2d
(
x
,
offset
,
mask
)
out_v1
=
y_v1
.
numpy
()
out_v2
=
y_v2
.
numpy
()
return
out_v1
,
out_v2
def
_test_identity
(
self
):
self
.
prepare
()
static_dcn_v1
,
static_dcn_v2
=
self
.
static_graph_case_dcn
()
dy_dcn_v1
,
dy_dcn_v2
=
self
.
dygraph_case_dcn
()
np
.
testing
.
assert_array_almost_equal
(
static_dcn_v1
,
dy_dcn_v1
)
np
.
testing
.
assert_array_almost_equal
(
static_dcn_v2
,
dy_dcn_v2
)
def
test_identity
(
self
):
self
.
place
=
paddle
.
CPUPlace
()
self
.
_test_identity
()
if
paddle
.
is_compiled_with_cuda
():
self
.
place
=
paddle
.
CUDAPlace
(
0
)
self
.
_test_identity
()
class
TestDeformConv2DFunctional
(
TestCase
):
batch_size
=
4
spatial_shape
=
(
16
,
16
)
dtype
=
"float32"
def
setUp
(
self
):
self
.
in_channels
=
3
self
.
out_channels
=
5
self
.
kernel_size
=
[
3
,
3
]
self
.
padding
=
[
0
,
0
]
self
.
stride
=
[
1
,
1
]
self
.
dilation
=
[
1
,
1
]
self
.
groups
=
1
self
.
no_bias
=
True
def
prepare
(
self
):
if
isinstance
(
self
.
kernel_size
,
int
):
filter_shape
=
(
self
.
kernel_size
,
)
*
2
else
:
filter_shape
=
tuple
(
self
.
kernel_size
)
self
.
filter_shape
=
filter_shape
self
.
weight
=
np
.
random
.
uniform
(
-
1
,
1
,
(
self
.
out_channels
,
self
.
in_channels
//
self
.
groups
)
+
filter_shape
).
astype
(
self
.
dtype
)
if
not
self
.
no_bias
:
self
.
bias
=
np
.
random
.
uniform
(
-
1
,
1
,
(
self
.
out_channels
,
)).
astype
(
self
.
dtype
)
def
out_size
(
in_size
,
pad_size
,
dilation_size
,
kernel_size
,
stride_size
):
return
(
in_size
+
2
*
pad_size
-
(
dilation_size
*
(
kernel_size
-
1
)
+
1
))
/
stride_size
+
1
out_h
=
int
(
out_size
(
self
.
spatial_shape
[
0
],
self
.
padding
[
0
],
self
.
dilation
[
0
],
self
.
kernel_size
[
0
],
self
.
stride
[
0
]))
out_w
=
int
(
out_size
(
self
.
spatial_shape
[
1
],
self
.
padding
[
1
],
self
.
dilation
[
1
],
self
.
kernel_size
[
1
],
self
.
stride
[
1
]))
out_shape
=
(
out_h
,
out_w
)
self
.
input_shape
=
(
self
.
batch_size
,
self
.
in_channels
)
+
self
.
spatial_shape
self
.
offset_shape
=
(
self
.
batch_size
,
2
*
filter_shape
[
0
]
*
filter_shape
[
1
])
+
out_shape
self
.
mask_shape
=
(
self
.
batch_size
,
filter_shape
[
0
]
*
filter_shape
[
1
]
)
+
out_shape
self
.
input
=
np
.
random
.
uniform
(
-
1
,
1
,
self
.
input_shape
).
astype
(
self
.
dtype
)
self
.
offset
=
np
.
random
.
uniform
(
-
1
,
1
,
self
.
offset_shape
).
astype
(
self
.
dtype
)
self
.
mask
=
np
.
random
.
uniform
(
-
1
,
1
,
self
.
mask_shape
).
astype
(
self
.
dtype
)
def
static_graph_case_dcn
(
self
):
main
=
paddle
.
static
.
Program
()
start
=
paddle
.
static
.
Program
()
paddle
.
enable_static
()
with
paddle
.
static
.
program_guard
(
main
,
start
):
x
=
paddle
.
static
.
data
(
"input"
,
(
-
1
,
self
.
in_channels
,
-
1
,
-
1
),
dtype
=
self
.
dtype
)
offset
=
paddle
.
static
.
data
(
"offset"
,
(
-
1
,
2
*
self
.
filter_shape
[
0
]
*
self
.
filter_shape
[
1
],
-
1
,
-
1
),
dtype
=
self
.
dtype
)
mask
=
paddle
.
static
.
data
(
"mask"
,
(
-
1
,
self
.
filter_shape
[
0
]
*
self
.
filter_shape
[
1
],
-
1
,
-
1
),
dtype
=
self
.
dtype
)
y_v1
=
paddle
.
fluid
.
layers
.
deformable_conv
(
input
=
x
,
offset
=
offset
,
mask
=
None
,
num_filters
=
self
.
out_channels
,
filter_size
=
self
.
filter_shape
,
stride
=
self
.
stride
,
padding
=
self
.
padding
,
dilation
=
self
.
dilation
,
groups
=
self
.
groups
,
deformable_groups
=
1
,
im2col_step
=
1
,
param_attr
=
I
.
Assign
(
self
.
weight
),
bias_attr
=
False
if
self
.
no_bias
else
I
.
Assign
(
self
.
bias
),
modulated
=
False
)
y_v2
=
paddle
.
fluid
.
layers
.
deformable_conv
(
input
=
x
,
offset
=
offset
,
mask
=
mask
,
num_filters
=
self
.
out_channels
,
filter_size
=
self
.
filter_shape
,
stride
=
self
.
stride
,
padding
=
self
.
padding
,
dilation
=
self
.
dilation
,
groups
=
self
.
groups
,
deformable_groups
=
1
,
im2col_step
=
1
,
param_attr
=
I
.
Assign
(
self
.
weight
),
bias_attr
=
False
if
self
.
no_bias
else
I
.
Assign
(
self
.
bias
))
exe
=
paddle
.
static
.
Executor
(
self
.
place
)
exe
.
run
(
start
)
out_v1
,
out_v2
=
exe
.
run
(
main
,
feed
=
{
"input"
:
self
.
input
,
"offset"
:
self
.
offset
,
"mask"
:
self
.
mask
},
fetch_list
=
[
y_v1
,
y_v2
])
return
out_v1
,
out_v2
def
dygraph_case_dcn
(
self
):
paddle
.
disable_static
()
x
=
paddle
.
to_tensor
(
self
.
input
)
offset
=
paddle
.
to_tensor
(
self
.
offset
)
mask
=
paddle
.
to_tensor
(
self
.
mask
)
weight
=
paddle
.
to_tensor
(
self
.
weight
)
bias
=
None
if
self
.
no_bias
else
paddle
.
to_tensor
(
self
.
bias
)
y_v1
=
paddle
.
vision
.
ops
.
deform_conv2d
(
x
=
x
,
offset
=
offset
,
weight
=
weight
,
bias
=
bias
,
stride
=
self
.
stride
,
padding
=
self
.
padding
,
dilation
=
self
.
dilation
,
groups
=
self
.
groups
,
)
y_v2
=
paddle
.
vision
.
ops
.
deform_conv2d
(
x
=
x
,
offset
=
offset
,
mask
=
mask
,
weight
=
weight
,
bias
=
bias
,
stride
=
self
.
stride
,
padding
=
self
.
padding
,
dilation
=
self
.
dilation
,
groups
=
self
.
groups
,
)
out_v1
=
y_v1
.
numpy
()
out_v2
=
y_v2
.
numpy
()
return
out_v1
,
out_v2
def
new_api_static_graph_case_dcn
(
self
):
main
=
paddle
.
static
.
Program
()
start
=
paddle
.
static
.
Program
()
paddle
.
enable_static
()
with
paddle
.
static
.
program_guard
(
main
,
start
):
x
=
paddle
.
static
.
data
(
"input"
,
(
-
1
,
self
.
in_channels
,
-
1
,
-
1
),
dtype
=
self
.
dtype
)
offset
=
paddle
.
static
.
data
(
"offset"
,
(
-
1
,
2
*
self
.
filter_shape
[
0
]
*
self
.
filter_shape
[
1
],
-
1
,
-
1
),
dtype
=
self
.
dtype
)
mask
=
paddle
.
static
.
data
(
"mask"
,
(
-
1
,
self
.
filter_shape
[
0
]
*
self
.
filter_shape
[
1
],
-
1
,
-
1
),
dtype
=
self
.
dtype
)
weight
=
paddle
.
static
.
data
(
"weight"
,
list
(
self
.
weight
.
shape
),
dtype
=
self
.
dtype
)
if
not
self
.
no_bias
:
bias
=
paddle
.
static
.
data
(
"bias"
,
[
-
1
],
dtype
=
self
.
dtype
)
y_v1
=
paddle
.
vision
.
ops
.
deform_conv2d
(
x
=
x
,
offset
=
offset
,
weight
=
weight
,
bias
=
None
if
self
.
no_bias
else
bias
,
stride
=
self
.
stride
,
padding
=
self
.
padding
,
dilation
=
self
.
dilation
,
groups
=
self
.
groups
,
)
y_v2
=
paddle
.
vision
.
ops
.
deform_conv2d
(
x
=
x
,
offset
=
offset
,
mask
=
mask
,
weight
=
weight
,
bias
=
None
if
self
.
no_bias
else
bias
,
stride
=
self
.
stride
,
padding
=
self
.
padding
,
dilation
=
self
.
dilation
,
groups
=
self
.
groups
,
)
exe
=
paddle
.
static
.
Executor
(
self
.
place
)
exe
.
run
(
start
)
feed_dict
=
{
"input"
:
self
.
input
,
"offset"
:
self
.
offset
,
"mask"
:
self
.
mask
,
"weight"
:
self
.
weight
}
if
not
self
.
no_bias
:
feed_dict
[
"bias"
]
=
self
.
bias
out_v1
,
out_v2
=
exe
.
run
(
main
,
feed
=
feed_dict
,
fetch_list
=
[
y_v1
,
y_v2
])
return
out_v1
,
out_v2
def
_test_identity
(
self
):
self
.
prepare
()
static_dcn_v1
,
static_dcn_v2
=
self
.
static_graph_case_dcn
()
dy_dcn_v1
,
dy_dcn_v2
=
self
.
dygraph_case_dcn
()
new_static_dcn_v1
,
new_static_dcn_v2
=
self
.
new_api_static_graph_case_dcn
(
)
np
.
testing
.
assert_array_almost_equal
(
static_dcn_v1
,
dy_dcn_v1
)
np
.
testing
.
assert_array_almost_equal
(
static_dcn_v2
,
dy_dcn_v2
)
np
.
testing
.
assert_array_almost_equal
(
static_dcn_v1
,
new_static_dcn_v1
)
np
.
testing
.
assert_array_almost_equal
(
static_dcn_v2
,
new_static_dcn_v2
)
def
test_identity
(
self
):
self
.
place
=
paddle
.
CPUPlace
()
self
.
_test_identity
()
if
paddle
.
is_compiled_with_cuda
():
self
.
place
=
paddle
.
CUDAPlace
(
0
)
self
.
_test_identity
()
# testcases for DeformConv2D
class
TestDeformConv2DWithPadding
(
TestDeformConv2D
):
def
setUp
(
self
):
self
.
in_channels
=
3
self
.
out_channels
=
5
self
.
kernel_size
=
[
3
,
3
]
self
.
padding
=
[
2
,
2
]
self
.
stride
=
[
1
,
1
]
self
.
dilation
=
[
1
,
1
]
self
.
groups
=
1
self
.
no_bias
=
True
class
TestDeformConv2DWithBias
(
TestDeformConv2D
):
def
setUp
(
self
):
self
.
in_channels
=
3
self
.
out_channels
=
5
self
.
kernel_size
=
[
3
,
3
]
self
.
padding
=
[
2
,
2
]
self
.
stride
=
[
1
,
1
]
self
.
dilation
=
[
1
,
1
]
self
.
groups
=
1
self
.
no_bias
=
False
class
TestDeformConv2DWithAsynPadding
(
TestDeformConv2D
):
def
setUp
(
self
):
self
.
in_channels
=
3
self
.
out_channels
=
5
self
.
kernel_size
=
[
3
,
3
]
self
.
padding
=
[
1
,
2
]
self
.
stride
=
[
1
,
1
]
self
.
dilation
=
[
1
,
1
]
self
.
groups
=
1
self
.
no_bias
=
False
class
TestDeformConv2DWithDilation
(
TestDeformConv2D
):
def
setUp
(
self
):
self
.
in_channels
=
3
self
.
out_channels
=
5
self
.
kernel_size
=
[
3
,
3
]
self
.
padding
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
dilation
=
[
3
,
3
]
self
.
groups
=
1
self
.
no_bias
=
False
class
TestDeformConv2DWithStride
(
TestDeformConv2D
):
def
setUp
(
self
):
self
.
in_channels
=
3
self
.
out_channels
=
5
self
.
kernel_size
=
[
3
,
3
]
self
.
padding
=
[
1
,
1
]
self
.
stride
=
[
2
,
2
]
self
.
dilation
=
[
1
,
1
]
self
.
groups
=
1
self
.
no_bias
=
False
class
TestDeformConv2DWithGroups
(
TestDeformConv2D
):
def
setUp
(
self
):
self
.
in_channels
=
5
self
.
out_channels
=
5
self
.
kernel_size
=
[
3
,
3
]
self
.
padding
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
dilation
=
[
1
,
1
]
self
.
groups
=
5
self
.
no_bias
=
False
# testcases for deform_conv2d
class
TestDeformConv2DFunctionalWithPadding
(
TestDeformConv2DFunctional
):
def
setUp
(
self
):
self
.
in_channels
=
3
self
.
out_channels
=
5
self
.
kernel_size
=
[
3
,
3
]
self
.
padding
=
[
2
,
2
]
self
.
stride
=
[
1
,
1
]
self
.
dilation
=
[
1
,
1
]
self
.
groups
=
1
self
.
no_bias
=
True
class
TestDeformConv2DFunctionalWithBias
(
TestDeformConv2DFunctional
):
def
setUp
(
self
):
self
.
in_channels
=
3
self
.
out_channels
=
5
self
.
kernel_size
=
[
3
,
3
]
self
.
padding
=
[
2
,
2
]
self
.
stride
=
[
1
,
1
]
self
.
dilation
=
[
1
,
1
]
self
.
groups
=
1
self
.
no_bias
=
False
class
TestDeformConv2DFunctionalWithAsynPadding
(
TestDeformConv2DFunctional
):
def
setUp
(
self
):
self
.
in_channels
=
3
self
.
out_channels
=
5
self
.
kernel_size
=
[
3
,
3
]
self
.
padding
=
[
1
,
2
]
self
.
stride
=
[
1
,
1
]
self
.
dilation
=
[
1
,
1
]
self
.
groups
=
1
self
.
no_bias
=
False
class
TestDeformConv2DFunctionalWithDilation
(
TestDeformConv2DFunctional
):
def
setUp
(
self
):
self
.
in_channels
=
3
self
.
out_channels
=
5
self
.
kernel_size
=
[
3
,
3
]
self
.
padding
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
dilation
=
[
3
,
3
]
self
.
groups
=
1
self
.
no_bias
=
False
class
TestDeformConv2DFunctionalWithStride
(
TestDeformConv2DFunctional
):
def
setUp
(
self
):
self
.
in_channels
=
3
self
.
out_channels
=
5
self
.
kernel_size
=
[
3
,
3
]
self
.
padding
=
[
1
,
1
]
self
.
stride
=
[
2
,
2
]
self
.
dilation
=
[
1
,
1
]
self
.
groups
=
1
self
.
no_bias
=
False
class
TestDeformConv2DFunctionalWithGroups
(
TestDeformConv2DFunctional
):
def
setUp
(
self
):
self
.
in_channels
=
5
self
.
out_channels
=
5
self
.
kernel_size
=
[
3
,
3
]
self
.
padding
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
dilation
=
[
1
,
1
]
self
.
groups
=
5
self
.
no_bias
=
False
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/vision/ops.py
浏览文件 @
b776434c
...
...
@@ -16,10 +16,13 @@ import numpy as np
from
..fluid.layer_helper
import
LayerHelper
from
..fluid.data_feeder
import
check_variable_and_dtype
,
check_type
,
check_dtype
from
..fluid
import
core
,
layers
from
..fluid.layers
import
nn
,
utils
from
..nn
import
Layer
from
..fluid.initializer
import
Normal
from
paddle.common_ops_import
import
*
__all__
=
[
'yolo_loss'
,
'yolo_box'
]
__all__
=
[
'yolo_loss'
,
'yolo_box'
,
'deform_conv2d'
,
'DeformConv2D'
]
def
yolo_loss
(
x
,
...
...
@@ -386,3 +389,387 @@ def yolo_box(x,
},
attrs
=
attrs
)
return
boxes
,
scores
def
deform_conv2d
(
x
,
offset
,
weight
,
bias
=
None
,
stride
=
1
,
padding
=
0
,
dilation
=
1
,
groups
=
1
,
mask
=
None
,
name
=
None
):
r
"""
Compute 2-D deformable convolution on 4-D input.
Given input image x, output feature map y, the deformable convolution operation can be expressed as follow:
Deformable Convolution v2:
.. math::
y(p) = \sum_{k=1}^{K}{w_k * x(p + p_k + \Delta p_k) * \Delta m_k}
Deformable Convolution v1:
.. math::
y(p) = \sum_{k=1}^{K}{w_k * x(p + p_k + \Delta p_k)}
Where :math:`\Delta p_k` and :math:`\Delta m_k` are the learnable offset and modulation scalar for the k-th location,
Which :math:`\Delta m_k` is one in deformable convolution v1. Please refer to `Deformable ConvNets v2: More Deformable, Better Results
<https://arxiv.org/abs/1811.11168v2>`_ and `Deformable Convolutional Networks <https://arxiv.org/abs/1703.06211>`_.
Example:
- Input:
x shape: :math:`(N, C_{in}, H_{in}, W_{in})`
weight shape: :math:`(C_{out}, C_{in}, H_f, W_f)`
offset shape: :math:`(N, 2 * H_f * W_f, H_{out}, W_{out})`
mask shape: :math:`(N, H_f * W_f, H_{out}, W_{out})`
- Output:
Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
Where
.. math::
H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\
W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
Args:
x (Tensor): The input image with [N, C, H, W] format. A Tensor with type
float32, float64.
offset (Tensor): The input coordinate offset of deformable convolution layer.
A Tensor with type float32, float64.
weight (Tensor): The convolution kernel with shape [M, C/g, kH, kW], where M is
the number of output channels, g is the number of groups, kH is the filter's
height, kW is the filter's width.
bias (Tensor, optional): The bias with shape [M,].
stride (int|list|tuple, optional): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: stride = 1.
padding (int|list|tuple, optional): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding. Default: padding = 0.
dilation (int|list|tuple, optional): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: dilation = 1.
groups (int, optonal): The groups number of the deformable conv layer. According to
grouped convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only
connected to the second half of the input channels. Default: groups=1.
mask (Tensor, optional): The input mask of deformable convolution layer.
A Tensor with type float32, float64. It should be None when you use
deformable convolution v1.
name(str, optional): For details, please refer to :ref:`api_guide_Name`.
Generally, no setting is required. Default: None.
Returns:
Tensor: The tensor variable storing the deformable convolution \
result. A Tensor with type float32, float64.
Raises:
ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch.
Examples:
.. code-block:: python
#deformable conv v2:
import paddle
input = paddle.rand((8, 1, 28, 28))
kh, kw = 3, 3
weight = paddle.rand((16, 1, kh, kw))
# offset shape should be [bs, 2 * kh * kw, out_h, out_w]
# mask shape should be [bs, hw * hw, out_h, out_w]
# In this case, for an input of 28, stride of 1
# and kernel size of 3, without padding, the output size is 26
offset = paddle.rand((8, 2 * kh * kw, 26, 26))
mask = paddle.rand((8, kh * kw, 26, 26))
out = paddle.vision.ops.deform_conv2d(input, offset, weight, mask=mask)
print(out.shape)
# returns
[8, 16, 26, 26]
#deformable conv v1:
import paddle
input = paddle.rand((8, 1, 28, 28))
kh, kw = 3, 3
weight = paddle.rand((16, 1, kh, kw))
# offset shape should be [bs, 2 * kh * kw, out_h, out_w]
# In this case, for an input of 28, stride of 1
# and kernel size of 3, without padding, the output size is 26
offset = paddle.rand((8, 2 * kh * kw, 26, 26))
out = paddle.vision.ops.deform_conv2d(input, offset, weight)
print(out.shape)
# returns
[8, 16, 26, 26]
"""
stride
=
utils
.
convert_to_list
(
stride
,
2
,
'stride'
)
padding
=
utils
.
convert_to_list
(
padding
,
2
,
'padding'
)
dilation
=
utils
.
convert_to_list
(
dilation
,
2
,
'dilation'
)
use_deform_conv2d_v1
=
True
if
mask
is
None
else
False
if
in_dygraph_mode
():
attrs
=
(
'strides'
,
stride
,
'paddings'
,
padding
,
'dilations'
,
dilation
,
'groups'
,
groups
,
'im2col_step'
,
1
)
if
use_deform_conv2d_v1
:
op_type
=
'deformable_conv_v1'
pre_bias
=
getattr
(
core
.
ops
,
op_type
)(
x
,
offset
,
weight
,
*
attrs
)
else
:
op_type
=
'deformable_conv'
pre_bias
=
getattr
(
core
.
ops
,
op_type
)(
x
,
offset
,
mask
,
weight
,
*
attrs
)
if
bias
is
not
None
:
out
=
nn
.
elementwise_add
(
pre_bias
,
bias
,
axis
=
1
)
else
:
out
=
pre_bias
else
:
check_variable_and_dtype
(
x
,
"x"
,
[
'float32'
,
'float64'
],
'deform_conv2d'
)
check_variable_and_dtype
(
offset
,
"offset"
,
[
'float32'
,
'float64'
],
'deform_conv2d'
)
num_channels
=
x
.
shape
[
1
]
helper
=
LayerHelper
(
'deformable_conv'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
stride
=
utils
.
convert_to_list
(
stride
,
2
,
'stride'
)
padding
=
utils
.
convert_to_list
(
padding
,
2
,
'padding'
)
dilation
=
utils
.
convert_to_list
(
dilation
,
2
,
'dilation'
)
pre_bias
=
helper
.
create_variable_for_type_inference
(
dtype
)
if
use_deform_conv2d_v1
:
op_type
=
'deformable_conv_v1'
inputs
=
{
'Input'
:
x
,
'Filter'
:
weight
,
'Offset'
:
offset
,
}
else
:
op_type
=
'deformable_conv'
inputs
=
{
'Input'
:
x
,
'Filter'
:
weight
,
'Offset'
:
offset
,
'Mask'
:
mask
,
}
outputs
=
{
"Output"
:
pre_bias
}
attrs
=
{
'strides'
:
stride
,
'paddings'
:
padding
,
'dilations'
:
dilation
,
'groups'
:
groups
,
'deformable_groups'
:
1
,
'im2col_step'
:
1
,
}
helper
.
append_op
(
type
=
op_type
,
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
attrs
)
if
bias
is
not
None
:
out
=
helper
.
create_variable_for_type_inference
(
dtype
)
helper
.
append_op
(
type
=
'elementwise_add'
,
inputs
=
{
'X'
:
[
pre_bias
],
'Y'
:
[
bias
]},
outputs
=
{
'Out'
:
[
out
]},
attrs
=
{
'axis'
:
1
})
else
:
out
=
pre_bias
return
out
class
DeformConv2D
(
Layer
):
r
"""
Compute 2-D deformable convolution on 4-D input.
Given input image x, output feature map y, the deformable convolution operation can be expressed as follow:
Deformable Convolution v2:
.. math::
y(p) = \sum_{k=1}^{K}{w_k * x(p + p_k + \Delta p_k) * \Delta m_k}
Deformable Convolution v1:
.. math::
y(p) = \sum_{k=1}^{K}{w_k * x(p + p_k + \Delta p_k)}
Where :math:`\Delta p_k` and :math:`\Delta m_k` are the learnable offset and modulation scalar for the k-th location,
Which :math:`\Delta m_k` is one in deformable convolution v1. Please refer to `Deformable ConvNets v2: More Deformable, Better Results
<https://arxiv.org/abs/1811.11168v2>`_ and `Deformable Convolutional Networks <https://arxiv.org/abs/1703.06211>`_.
Example:
- Input:
x shape: :math:`(N, C_{in}, H_{in}, W_{in})`
weight shape: :math:`(C_{out}, C_{in}, H_f, W_f)`
offset shape: :math:`(N, 2 * H_f * W_f, H_{out}, W_{out})`
mask shape: :math:`(N, H_f * W_f, H_{out}, W_{out})`
- Output:
Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
Where
.. math::
H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\
W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
Parameters:
in_channels(int): The number of input channels in the input image.
out_channels(int): The number of output channels produced by the convolution.
kernel_size(int|list|tuple): The size of the convolving kernel.
stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must
contain three integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. The default value is 1.
padding (int|list|tuple, optional): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding. Default: padding = 0.
dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must
contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
dilation_D = dilation_H = dilation_W = dilation. The default value is 1.
groups(int, optional): The groups number of the Conv3D Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only
connected to the second half of the input channels. The default value is 1.
weight_attr(ParamAttr, optional): The parameter attribute for learnable parameters/weights
of conv2d. If it is set to None or one attribute of ParamAttr, conv2d
will create ParamAttr as param_attr. If it is set to None, the parameter
is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is
:math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. The default value is None.
bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of conv2d.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv2d
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. The default value is None.
Attribute:
**weight** (Parameter): the learnable weights of filter of this layer.
**bias** (Parameter or None): the learnable bias of this layer.
Shape:
- x: :math:`(N, C_{in}, H_{in}, W_{in})`
- offset: :math:`(N, 2 * H_f * W_f, H_{out}, W_{out})`
- mask: :math:`(N, H_f * W_f, H_{out}, W_{out})`
- output: :math:`(N, C_{out}, H_{out}, W_{out})`
Where
.. math::
H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (kernel\_size[0] - 1) + 1))}{strides[0]} + 1
W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (kernel\_size[1] - 1) + 1))}{strides[1]} + 1
Examples:
.. code-block:: python
#deformable conv v2:
import paddle
input = paddle.rand((8, 1, 28, 28))
kh, kw = 3, 3
# offset shape should be [bs, 2 * kh * kw, out_h, out_w]
# mask shape should be [bs, hw * hw, out_h, out_w]
# In this case, for an input of 28, stride of 1
# and kernel size of 3, without padding, the output size is 26
offset = paddle.rand((8, 2 * kh * kw, 26, 26))
mask = paddle.rand((8, kh * kw, 26, 26))
deform_conv = paddle.vision.ops.DeformConv2D(
in_channels=1,
out_channels=16,
kernel_size=[kh, kw])
out = deform_conv(input, offset, mask)
print(out.shape)
# returns
[8, 16, 26, 26]
#deformable conv v1:
import paddle
input = paddle.rand((8, 1, 28, 28))
kh, kw = 3, 3
# offset shape should be [bs, 2 * kh * kw, out_h, out_w]
# mask shape should be [bs, hw * hw, out_h, out_w]
# In this case, for an input of 28, stride of 1
# and kernel size of 3, without padding, the output size is 26
offset = paddle.rand((8, 2 * kh * kw, 26, 26))
deform_conv = paddle.vision.ops.DeformConv2D(
in_channels=1,
out_channels=16,
kernel_size=[kh, kw])
out = deform_conv(input, offset)
print(out.shape)
# returns
[8, 16, 26, 26]
"""
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
padding
=
0
,
dilation
=
1
,
groups
=
1
,
weight_attr
=
None
,
bias_attr
=
None
):
super
(
DeformConv2D
,
self
).
__init__
()
assert
weight_attr
is
not
False
,
"weight_attr should not be False in Conv."
self
.
_weight_attr
=
weight_attr
self
.
_bias_attr
=
bias_attr
self
.
_groups
=
groups
self
.
_in_channels
=
in_channels
self
.
_out_channels
=
out_channels
self
.
_channel_dim
=
1
self
.
_stride
=
utils
.
convert_to_list
(
stride
,
2
,
'stride'
)
self
.
_dilation
=
utils
.
convert_to_list
(
dilation
,
2
,
'dilation'
)
self
.
_kernel_size
=
utils
.
convert_to_list
(
kernel_size
,
2
,
'kernel_size'
)
if
in_channels
%
groups
!=
0
:
raise
ValueError
(
"in_channels must be divisible by groups."
)
self
.
_padding
=
utils
.
convert_to_list
(
padding
,
2
,
'padding'
)
filter_shape
=
[
out_channels
,
in_channels
//
groups
]
+
self
.
_kernel_size
def
_get_default_param_initializer
():
filter_elem_num
=
np
.
prod
(
self
.
_kernel_size
)
*
self
.
_in_channels
std
=
(
2.0
/
filter_elem_num
)
**
0.5
return
Normal
(
0.0
,
std
,
0
)
self
.
weight
=
self
.
create_parameter
(
shape
=
filter_shape
,
attr
=
self
.
_weight_attr
,
default_initializer
=
_get_default_param_initializer
())
self
.
bias
=
self
.
create_parameter
(
attr
=
self
.
_bias_attr
,
shape
=
[
self
.
_out_channels
],
is_bias
=
True
)
def
forward
(
self
,
x
,
offset
,
mask
=
None
):
out
=
deform_conv2d
(
x
=
x
,
offset
=
offset
,
weight
=
self
.
weight
,
bias
=
self
.
bias
,
stride
=
self
.
_stride
,
padding
=
self
.
_padding
,
dilation
=
self
.
_dilation
,
groups
=
self
.
_groups
,
mask
=
mask
)
return
out
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录