未验证 提交 87bb7262 编写于 作者: B Bai Yifan 提交者: GitHub

Add deform_conv2d,DeformConv2D (#29364)

* add deform_conv2d,DeformConv2D
上级 671555ed
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn.functional as F
import paddle.nn.initializer as I
import numpy as np
import unittest
from unittest import TestCase
class TestDeformConv2D(TestCase):
batch_size = 4
spatial_shape = (16, 16)
dtype = "float32"
def setUp(self):
self.in_channels = 3
self.out_channels = 5
self.kernel_size = [3, 3]
self.padding = [0, 0]
self.stride = [1, 1]
self.dilation = [1, 1]
self.groups = 1
self.no_bias = True
def prepare(self):
if isinstance(self.kernel_size, int):
filter_shape = (self.kernel_size, ) * 2
else:
filter_shape = tuple(self.kernel_size)
self.filter_shape = filter_shape
self.weight = np.random.uniform(
-1, 1, (self.out_channels, self.in_channels // self.groups
) + filter_shape).astype(self.dtype)
if not self.no_bias:
self.bias = np.random.uniform(-1, 1, (
self.out_channels, )).astype(self.dtype)
def out_size(in_size, pad_size, dilation_size, kernel_size,
stride_size):
return (in_size + 2 * pad_size -
(dilation_size * (kernel_size - 1) + 1)) / stride_size + 1
out_h = int(
out_size(self.spatial_shape[0], self.padding[0], self.dilation[0],
self.kernel_size[0], self.stride[0]))
out_w = int(
out_size(self.spatial_shape[1], self.padding[1], self.dilation[1],
self.kernel_size[1], self.stride[1]))
out_shape = (out_h, out_w)
self.input_shape = (self.batch_size, self.in_channels
) + self.spatial_shape
self.offset_shape = (self.batch_size, 2 * filter_shape[0] *
filter_shape[1]) + out_shape
self.mask_shape = (self.batch_size, filter_shape[0] * filter_shape[1]
) + out_shape
self.input = np.random.uniform(-1, 1,
self.input_shape).astype(self.dtype)
self.offset = np.random.uniform(-1, 1,
self.offset_shape).astype(self.dtype)
self.mask = np.random.uniform(-1, 1, self.mask_shape).astype(self.dtype)
def static_graph_case_dcn(self):
main = paddle.static.Program()
start = paddle.static.Program()
paddle.enable_static()
with paddle.static.program_guard(main, start):
x = paddle.static.data(
"input", (-1, self.in_channels, -1, -1), dtype=self.dtype)
offset = paddle.static.data(
"offset",
(-1, 2 * self.filter_shape[0] * self.filter_shape[1], -1, -1),
dtype=self.dtype)
mask = paddle.static.data(
"mask",
(-1, self.filter_shape[0] * self.filter_shape[1], -1, -1),
dtype=self.dtype)
y_v1 = paddle.fluid.layers.deformable_conv(
input=x,
offset=offset,
mask=None,
num_filters=self.out_channels,
filter_size=self.filter_shape,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
groups=self.groups,
deformable_groups=1,
im2col_step=1,
param_attr=I.Assign(self.weight),
bias_attr=False if self.no_bias else I.Assign(self.bias),
modulated=False)
y_v2 = paddle.fluid.layers.deformable_conv(
input=x,
offset=offset,
mask=mask,
num_filters=self.out_channels,
filter_size=self.filter_shape,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
groups=self.groups,
deformable_groups=1,
im2col_step=1,
param_attr=I.Assign(self.weight),
bias_attr=False if self.no_bias else I.Assign(self.bias))
exe = paddle.static.Executor(self.place)
exe.run(start)
out_v1, out_v2 = exe.run(main,
feed={
"input": self.input,
"offset": self.offset,
"mask": self.mask
},
fetch_list=[y_v1, y_v2])
return out_v1, out_v2
def dygraph_case_dcn(self):
paddle.disable_static()
x = paddle.to_tensor(self.input)
offset = paddle.to_tensor(self.offset)
mask = paddle.to_tensor(self.mask)
bias = None if self.no_bias else paddle.to_tensor(self.bias)
deform_conv2d = paddle.vision.ops.DeformConv2D(
in_channels=self.in_channels,
out_channels=self.out_channels,
kernel_size=self.kernel_size,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
groups=self.groups,
weight_attr=I.Assign(self.weight),
bias_attr=False if self.no_bias else I.Assign(self.bias))
y_v1 = deform_conv2d(x, offset)
y_v2 = deform_conv2d(x, offset, mask)
out_v1 = y_v1.numpy()
out_v2 = y_v2.numpy()
return out_v1, out_v2
def _test_identity(self):
self.prepare()
static_dcn_v1, static_dcn_v2 = self.static_graph_case_dcn()
dy_dcn_v1, dy_dcn_v2 = self.dygraph_case_dcn()
np.testing.assert_array_almost_equal(static_dcn_v1, dy_dcn_v1)
np.testing.assert_array_almost_equal(static_dcn_v2, dy_dcn_v2)
def test_identity(self):
self.place = paddle.CPUPlace()
self._test_identity()
if paddle.is_compiled_with_cuda():
self.place = paddle.CUDAPlace(0)
self._test_identity()
class TestDeformConv2DFunctional(TestCase):
batch_size = 4
spatial_shape = (16, 16)
dtype = "float32"
def setUp(self):
self.in_channels = 3
self.out_channels = 5
self.kernel_size = [3, 3]
self.padding = [0, 0]
self.stride = [1, 1]
self.dilation = [1, 1]
self.groups = 1
self.no_bias = True
def prepare(self):
if isinstance(self.kernel_size, int):
filter_shape = (self.kernel_size, ) * 2
else:
filter_shape = tuple(self.kernel_size)
self.filter_shape = filter_shape
self.weight = np.random.uniform(
-1, 1, (self.out_channels, self.in_channels // self.groups
) + filter_shape).astype(self.dtype)
if not self.no_bias:
self.bias = np.random.uniform(-1, 1, (
self.out_channels, )).astype(self.dtype)
def out_size(in_size, pad_size, dilation_size, kernel_size,
stride_size):
return (in_size + 2 * pad_size -
(dilation_size * (kernel_size - 1) + 1)) / stride_size + 1
out_h = int(
out_size(self.spatial_shape[0], self.padding[0], self.dilation[0],
self.kernel_size[0], self.stride[0]))
out_w = int(
out_size(self.spatial_shape[1], self.padding[1], self.dilation[1],
self.kernel_size[1], self.stride[1]))
out_shape = (out_h, out_w)
self.input_shape = (self.batch_size, self.in_channels
) + self.spatial_shape
self.offset_shape = (self.batch_size, 2 * filter_shape[0] *
filter_shape[1]) + out_shape
self.mask_shape = (self.batch_size, filter_shape[0] * filter_shape[1]
) + out_shape
self.input = np.random.uniform(-1, 1,
self.input_shape).astype(self.dtype)
self.offset = np.random.uniform(-1, 1,
self.offset_shape).astype(self.dtype)
self.mask = np.random.uniform(-1, 1, self.mask_shape).astype(self.dtype)
def static_graph_case_dcn(self):
main = paddle.static.Program()
start = paddle.static.Program()
paddle.enable_static()
with paddle.static.program_guard(main, start):
x = paddle.static.data(
"input", (-1, self.in_channels, -1, -1), dtype=self.dtype)
offset = paddle.static.data(
"offset",
(-1, 2 * self.filter_shape[0] * self.filter_shape[1], -1, -1),
dtype=self.dtype)
mask = paddle.static.data(
"mask",
(-1, self.filter_shape[0] * self.filter_shape[1], -1, -1),
dtype=self.dtype)
y_v1 = paddle.fluid.layers.deformable_conv(
input=x,
offset=offset,
mask=None,
num_filters=self.out_channels,
filter_size=self.filter_shape,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
groups=self.groups,
deformable_groups=1,
im2col_step=1,
param_attr=I.Assign(self.weight),
bias_attr=False if self.no_bias else I.Assign(self.bias),
modulated=False)
y_v2 = paddle.fluid.layers.deformable_conv(
input=x,
offset=offset,
mask=mask,
num_filters=self.out_channels,
filter_size=self.filter_shape,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
groups=self.groups,
deformable_groups=1,
im2col_step=1,
param_attr=I.Assign(self.weight),
bias_attr=False if self.no_bias else I.Assign(self.bias))
exe = paddle.static.Executor(self.place)
exe.run(start)
out_v1, out_v2 = exe.run(main,
feed={
"input": self.input,
"offset": self.offset,
"mask": self.mask
},
fetch_list=[y_v1, y_v2])
return out_v1, out_v2
def dygraph_case_dcn(self):
paddle.disable_static()
x = paddle.to_tensor(self.input)
offset = paddle.to_tensor(self.offset)
mask = paddle.to_tensor(self.mask)
weight = paddle.to_tensor(self.weight)
bias = None if self.no_bias else paddle.to_tensor(self.bias)
y_v1 = paddle.vision.ops.deform_conv2d(
x=x,
offset=offset,
weight=weight,
bias=bias,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
groups=self.groups, )
y_v2 = paddle.vision.ops.deform_conv2d(
x=x,
offset=offset,
mask=mask,
weight=weight,
bias=bias,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
groups=self.groups, )
out_v1 = y_v1.numpy()
out_v2 = y_v2.numpy()
return out_v1, out_v2
def new_api_static_graph_case_dcn(self):
main = paddle.static.Program()
start = paddle.static.Program()
paddle.enable_static()
with paddle.static.program_guard(main, start):
x = paddle.static.data(
"input", (-1, self.in_channels, -1, -1), dtype=self.dtype)
offset = paddle.static.data(
"offset",
(-1, 2 * self.filter_shape[0] * self.filter_shape[1], -1, -1),
dtype=self.dtype)
mask = paddle.static.data(
"mask",
(-1, self.filter_shape[0] * self.filter_shape[1], -1, -1),
dtype=self.dtype)
weight = paddle.static.data(
"weight", list(self.weight.shape), dtype=self.dtype)
if not self.no_bias:
bias = paddle.static.data("bias", [-1], dtype=self.dtype)
y_v1 = paddle.vision.ops.deform_conv2d(
x=x,
offset=offset,
weight=weight,
bias=None if self.no_bias else bias,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
groups=self.groups, )
y_v2 = paddle.vision.ops.deform_conv2d(
x=x,
offset=offset,
mask=mask,
weight=weight,
bias=None if self.no_bias else bias,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
groups=self.groups, )
exe = paddle.static.Executor(self.place)
exe.run(start)
feed_dict = {
"input": self.input,
"offset": self.offset,
"mask": self.mask,
"weight": self.weight
}
if not self.no_bias:
feed_dict["bias"] = self.bias
out_v1, out_v2 = exe.run(main, feed=feed_dict, fetch_list=[y_v1, y_v2])
return out_v1, out_v2
def _test_identity(self):
self.prepare()
static_dcn_v1, static_dcn_v2 = self.static_graph_case_dcn()
dy_dcn_v1, dy_dcn_v2 = self.dygraph_case_dcn()
new_static_dcn_v1, new_static_dcn_v2 = self.new_api_static_graph_case_dcn(
)
np.testing.assert_array_almost_equal(static_dcn_v1, dy_dcn_v1)
np.testing.assert_array_almost_equal(static_dcn_v2, dy_dcn_v2)
np.testing.assert_array_almost_equal(static_dcn_v1, new_static_dcn_v1)
np.testing.assert_array_almost_equal(static_dcn_v2, new_static_dcn_v2)
def test_identity(self):
self.place = paddle.CPUPlace()
self._test_identity()
if paddle.is_compiled_with_cuda():
self.place = paddle.CUDAPlace(0)
self._test_identity()
# testcases for DeformConv2D
class TestDeformConv2DWithPadding(TestDeformConv2D):
def setUp(self):
self.in_channels = 3
self.out_channels = 5
self.kernel_size = [3, 3]
self.padding = [2, 2]
self.stride = [1, 1]
self.dilation = [1, 1]
self.groups = 1
self.no_bias = True
class TestDeformConv2DWithBias(TestDeformConv2D):
def setUp(self):
self.in_channels = 3
self.out_channels = 5
self.kernel_size = [3, 3]
self.padding = [2, 2]
self.stride = [1, 1]
self.dilation = [1, 1]
self.groups = 1
self.no_bias = False
class TestDeformConv2DWithAsynPadding(TestDeformConv2D):
def setUp(self):
self.in_channels = 3
self.out_channels = 5
self.kernel_size = [3, 3]
self.padding = [1, 2]
self.stride = [1, 1]
self.dilation = [1, 1]
self.groups = 1
self.no_bias = False
class TestDeformConv2DWithDilation(TestDeformConv2D):
def setUp(self):
self.in_channels = 3
self.out_channels = 5
self.kernel_size = [3, 3]
self.padding = [1, 1]
self.stride = [1, 1]
self.dilation = [3, 3]
self.groups = 1
self.no_bias = False
class TestDeformConv2DWithStride(TestDeformConv2D):
def setUp(self):
self.in_channels = 3
self.out_channels = 5
self.kernel_size = [3, 3]
self.padding = [1, 1]
self.stride = [2, 2]
self.dilation = [1, 1]
self.groups = 1
self.no_bias = False
class TestDeformConv2DWithGroups(TestDeformConv2D):
def setUp(self):
self.in_channels = 5
self.out_channels = 5
self.kernel_size = [3, 3]
self.padding = [1, 1]
self.stride = [1, 1]
self.dilation = [1, 1]
self.groups = 5
self.no_bias = False
# testcases for deform_conv2d
class TestDeformConv2DFunctionalWithPadding(TestDeformConv2DFunctional):
def setUp(self):
self.in_channels = 3
self.out_channels = 5
self.kernel_size = [3, 3]
self.padding = [2, 2]
self.stride = [1, 1]
self.dilation = [1, 1]
self.groups = 1
self.no_bias = True
class TestDeformConv2DFunctionalWithBias(TestDeformConv2DFunctional):
def setUp(self):
self.in_channels = 3
self.out_channels = 5
self.kernel_size = [3, 3]
self.padding = [2, 2]
self.stride = [1, 1]
self.dilation = [1, 1]
self.groups = 1
self.no_bias = False
class TestDeformConv2DFunctionalWithAsynPadding(TestDeformConv2DFunctional):
def setUp(self):
self.in_channels = 3
self.out_channels = 5
self.kernel_size = [3, 3]
self.padding = [1, 2]
self.stride = [1, 1]
self.dilation = [1, 1]
self.groups = 1
self.no_bias = False
class TestDeformConv2DFunctionalWithDilation(TestDeformConv2DFunctional):
def setUp(self):
self.in_channels = 3
self.out_channels = 5
self.kernel_size = [3, 3]
self.padding = [1, 1]
self.stride = [1, 1]
self.dilation = [3, 3]
self.groups = 1
self.no_bias = False
class TestDeformConv2DFunctionalWithStride(TestDeformConv2DFunctional):
def setUp(self):
self.in_channels = 3
self.out_channels = 5
self.kernel_size = [3, 3]
self.padding = [1, 1]
self.stride = [2, 2]
self.dilation = [1, 1]
self.groups = 1
self.no_bias = False
class TestDeformConv2DFunctionalWithGroups(TestDeformConv2DFunctional):
def setUp(self):
self.in_channels = 5
self.out_channels = 5
self.kernel_size = [3, 3]
self.padding = [1, 1]
self.stride = [1, 1]
self.dilation = [1, 1]
self.groups = 5
self.no_bias = False
if __name__ == "__main__":
unittest.main()
...@@ -16,10 +16,13 @@ import numpy as np ...@@ -16,10 +16,13 @@ import numpy as np
from ..fluid.layer_helper import LayerHelper from ..fluid.layer_helper import LayerHelper
from ..fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype from ..fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype
from ..fluid import core, layers from ..fluid import core, layers
from ..fluid.layers import nn, utils
from ..nn import Layer
from ..fluid.initializer import Normal
from paddle.common_ops_import import * from paddle.common_ops_import import *
__all__ = ['yolo_loss', 'yolo_box'] __all__ = ['yolo_loss', 'yolo_box', 'deform_conv2d', 'DeformConv2D']
def yolo_loss(x, def yolo_loss(x,
...@@ -386,3 +389,387 @@ def yolo_box(x, ...@@ -386,3 +389,387 @@ def yolo_box(x,
}, },
attrs=attrs) attrs=attrs)
return boxes, scores return boxes, scores
def deform_conv2d(x,
offset,
weight,
bias=None,
stride=1,
padding=0,
dilation=1,
groups=1,
mask=None,
name=None):
r"""
Compute 2-D deformable convolution on 4-D input.
Given input image x, output feature map y, the deformable convolution operation can be expressed as follow:
Deformable Convolution v2:
.. math::
y(p) = \sum_{k=1}^{K}{w_k * x(p + p_k + \Delta p_k) * \Delta m_k}
Deformable Convolution v1:
.. math::
y(p) = \sum_{k=1}^{K}{w_k * x(p + p_k + \Delta p_k)}
Where :math:`\Delta p_k` and :math:`\Delta m_k` are the learnable offset and modulation scalar for the k-th location,
Which :math:`\Delta m_k` is one in deformable convolution v1. Please refer to `Deformable ConvNets v2: More Deformable, Better Results
<https://arxiv.org/abs/1811.11168v2>`_ and `Deformable Convolutional Networks <https://arxiv.org/abs/1703.06211>`_.
Example:
- Input:
x shape: :math:`(N, C_{in}, H_{in}, W_{in})`
weight shape: :math:`(C_{out}, C_{in}, H_f, W_f)`
offset shape: :math:`(N, 2 * H_f * W_f, H_{out}, W_{out})`
mask shape: :math:`(N, H_f * W_f, H_{out}, W_{out})`
- Output:
Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
Where
.. math::
H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\
W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
Args:
x (Tensor): The input image with [N, C, H, W] format. A Tensor with type
float32, float64.
offset (Tensor): The input coordinate offset of deformable convolution layer.
A Tensor with type float32, float64.
weight (Tensor): The convolution kernel with shape [M, C/g, kH, kW], where M is
the number of output channels, g is the number of groups, kH is the filter's
height, kW is the filter's width.
bias (Tensor, optional): The bias with shape [M,].
stride (int|list|tuple, optional): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: stride = 1.
padding (int|list|tuple, optional): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding. Default: padding = 0.
dilation (int|list|tuple, optional): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: dilation = 1.
groups (int, optonal): The groups number of the deformable conv layer. According to
grouped convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only
connected to the second half of the input channels. Default: groups=1.
mask (Tensor, optional): The input mask of deformable convolution layer.
A Tensor with type float32, float64. It should be None when you use
deformable convolution v1.
name(str, optional): For details, please refer to :ref:`api_guide_Name`.
Generally, no setting is required. Default: None.
Returns:
Tensor: The tensor variable storing the deformable convolution \
result. A Tensor with type float32, float64.
Raises:
ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch.
Examples:
.. code-block:: python
#deformable conv v2:
import paddle
input = paddle.rand((8, 1, 28, 28))
kh, kw = 3, 3
weight = paddle.rand((16, 1, kh, kw))
# offset shape should be [bs, 2 * kh * kw, out_h, out_w]
# mask shape should be [bs, hw * hw, out_h, out_w]
# In this case, for an input of 28, stride of 1
# and kernel size of 3, without padding, the output size is 26
offset = paddle.rand((8, 2 * kh * kw, 26, 26))
mask = paddle.rand((8, kh * kw, 26, 26))
out = paddle.vision.ops.deform_conv2d(input, offset, weight, mask=mask)
print(out.shape)
# returns
[8, 16, 26, 26]
#deformable conv v1:
import paddle
input = paddle.rand((8, 1, 28, 28))
kh, kw = 3, 3
weight = paddle.rand((16, 1, kh, kw))
# offset shape should be [bs, 2 * kh * kw, out_h, out_w]
# In this case, for an input of 28, stride of 1
# and kernel size of 3, without padding, the output size is 26
offset = paddle.rand((8, 2 * kh * kw, 26, 26))
out = paddle.vision.ops.deform_conv2d(input, offset, weight)
print(out.shape)
# returns
[8, 16, 26, 26]
"""
stride = utils.convert_to_list(stride, 2, 'stride')
padding = utils.convert_to_list(padding, 2, 'padding')
dilation = utils.convert_to_list(dilation, 2, 'dilation')
use_deform_conv2d_v1 = True if mask is None else False
if in_dygraph_mode():
attrs = ('strides', stride, 'paddings', padding, 'dilations', dilation,
'groups', groups, 'im2col_step', 1)
if use_deform_conv2d_v1:
op_type = 'deformable_conv_v1'
pre_bias = getattr(core.ops, op_type)(x, offset, weight, *attrs)
else:
op_type = 'deformable_conv'
pre_bias = getattr(core.ops, op_type)(x, offset, mask, weight,
*attrs)
if bias is not None:
out = nn.elementwise_add(pre_bias, bias, axis=1)
else:
out = pre_bias
else:
check_variable_and_dtype(x, "x", ['float32', 'float64'],
'deform_conv2d')
check_variable_and_dtype(offset, "offset", ['float32', 'float64'],
'deform_conv2d')
num_channels = x.shape[1]
helper = LayerHelper('deformable_conv', **locals())
dtype = helper.input_dtype()
stride = utils.convert_to_list(stride, 2, 'stride')
padding = utils.convert_to_list(padding, 2, 'padding')
dilation = utils.convert_to_list(dilation, 2, 'dilation')
pre_bias = helper.create_variable_for_type_inference(dtype)
if use_deform_conv2d_v1:
op_type = 'deformable_conv_v1'
inputs = {
'Input': x,
'Filter': weight,
'Offset': offset,
}
else:
op_type = 'deformable_conv'
inputs = {
'Input': x,
'Filter': weight,
'Offset': offset,
'Mask': mask,
}
outputs = {"Output": pre_bias}
attrs = {
'strides': stride,
'paddings': padding,
'dilations': dilation,
'groups': groups,
'deformable_groups': 1,
'im2col_step': 1,
}
helper.append_op(
type=op_type, inputs=inputs, outputs=outputs, attrs=attrs)
if bias is not None:
out = helper.create_variable_for_type_inference(dtype)
helper.append_op(
type='elementwise_add',
inputs={'X': [pre_bias],
'Y': [bias]},
outputs={'Out': [out]},
attrs={'axis': 1})
else:
out = pre_bias
return out
class DeformConv2D(Layer):
r"""
Compute 2-D deformable convolution on 4-D input.
Given input image x, output feature map y, the deformable convolution operation can be expressed as follow:
Deformable Convolution v2:
.. math::
y(p) = \sum_{k=1}^{K}{w_k * x(p + p_k + \Delta p_k) * \Delta m_k}
Deformable Convolution v1:
.. math::
y(p) = \sum_{k=1}^{K}{w_k * x(p + p_k + \Delta p_k)}
Where :math:`\Delta p_k` and :math:`\Delta m_k` are the learnable offset and modulation scalar for the k-th location,
Which :math:`\Delta m_k` is one in deformable convolution v1. Please refer to `Deformable ConvNets v2: More Deformable, Better Results
<https://arxiv.org/abs/1811.11168v2>`_ and `Deformable Convolutional Networks <https://arxiv.org/abs/1703.06211>`_.
Example:
- Input:
x shape: :math:`(N, C_{in}, H_{in}, W_{in})`
weight shape: :math:`(C_{out}, C_{in}, H_f, W_f)`
offset shape: :math:`(N, 2 * H_f * W_f, H_{out}, W_{out})`
mask shape: :math:`(N, H_f * W_f, H_{out}, W_{out})`
- Output:
Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
Where
.. math::
H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\
W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
Parameters:
in_channels(int): The number of input channels in the input image.
out_channels(int): The number of output channels produced by the convolution.
kernel_size(int|list|tuple): The size of the convolving kernel.
stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must
contain three integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. The default value is 1.
padding (int|list|tuple, optional): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding. Default: padding = 0.
dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must
contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
dilation_D = dilation_H = dilation_W = dilation. The default value is 1.
groups(int, optional): The groups number of the Conv3D Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only
connected to the second half of the input channels. The default value is 1.
weight_attr(ParamAttr, optional): The parameter attribute for learnable parameters/weights
of conv2d. If it is set to None or one attribute of ParamAttr, conv2d
will create ParamAttr as param_attr. If it is set to None, the parameter
is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is
:math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. The default value is None.
bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of conv2d.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv2d
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. The default value is None.
Attribute:
**weight** (Parameter): the learnable weights of filter of this layer.
**bias** (Parameter or None): the learnable bias of this layer.
Shape:
- x: :math:`(N, C_{in}, H_{in}, W_{in})`
- offset: :math:`(N, 2 * H_f * W_f, H_{out}, W_{out})`
- mask: :math:`(N, H_f * W_f, H_{out}, W_{out})`
- output: :math:`(N, C_{out}, H_{out}, W_{out})`
Where
.. math::
H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (kernel\_size[0] - 1) + 1))}{strides[0]} + 1
W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (kernel\_size[1] - 1) + 1))}{strides[1]} + 1
Examples:
.. code-block:: python
#deformable conv v2:
import paddle
input = paddle.rand((8, 1, 28, 28))
kh, kw = 3, 3
# offset shape should be [bs, 2 * kh * kw, out_h, out_w]
# mask shape should be [bs, hw * hw, out_h, out_w]
# In this case, for an input of 28, stride of 1
# and kernel size of 3, without padding, the output size is 26
offset = paddle.rand((8, 2 * kh * kw, 26, 26))
mask = paddle.rand((8, kh * kw, 26, 26))
deform_conv = paddle.vision.ops.DeformConv2D(
in_channels=1,
out_channels=16,
kernel_size=[kh, kw])
out = deform_conv(input, offset, mask)
print(out.shape)
# returns
[8, 16, 26, 26]
#deformable conv v1:
import paddle
input = paddle.rand((8, 1, 28, 28))
kh, kw = 3, 3
# offset shape should be [bs, 2 * kh * kw, out_h, out_w]
# mask shape should be [bs, hw * hw, out_h, out_w]
# In this case, for an input of 28, stride of 1
# and kernel size of 3, without padding, the output size is 26
offset = paddle.rand((8, 2 * kh * kw, 26, 26))
deform_conv = paddle.vision.ops.DeformConv2D(
in_channels=1,
out_channels=16,
kernel_size=[kh, kw])
out = deform_conv(input, offset)
print(out.shape)
# returns
[8, 16, 26, 26]
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
weight_attr=None,
bias_attr=None):
super(DeformConv2D, self).__init__()
assert weight_attr is not False, "weight_attr should not be False in Conv."
self._weight_attr = weight_attr
self._bias_attr = bias_attr
self._groups = groups
self._in_channels = in_channels
self._out_channels = out_channels
self._channel_dim = 1
self._stride = utils.convert_to_list(stride, 2, 'stride')
self._dilation = utils.convert_to_list(dilation, 2, 'dilation')
self._kernel_size = utils.convert_to_list(kernel_size, 2, 'kernel_size')
if in_channels % groups != 0:
raise ValueError("in_channels must be divisible by groups.")
self._padding = utils.convert_to_list(padding, 2, 'padding')
filter_shape = [out_channels, in_channels // groups] + self._kernel_size
def _get_default_param_initializer():
filter_elem_num = np.prod(self._kernel_size) * self._in_channels
std = (2.0 / filter_elem_num)**0.5
return Normal(0.0, std, 0)
self.weight = self.create_parameter(
shape=filter_shape,
attr=self._weight_attr,
default_initializer=_get_default_param_initializer())
self.bias = self.create_parameter(
attr=self._bias_attr, shape=[self._out_channels], is_bias=True)
def forward(self, x, offset, mask=None):
out = deform_conv2d(
x=x,
offset=offset,
weight=self.weight,
bias=self.bias,
stride=self._stride,
padding=self._padding,
dilation=self._dilation,
groups=self._groups,
mask=mask)
return out
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册