未验证 提交 ef56ff86 编写于 作者: X xinyingxinying 提交者: GitHub

# add Deform Conv

上级 f8bc4673
...@@ -14,8 +14,6 @@ ...@@ -14,8 +14,6 @@
import numpy as np import numpy as np
from numbers import Integral from numbers import Integral
import math
import six
from paddle import fluid from paddle import fluid
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
...@@ -26,12 +24,140 @@ from ppdet.utils.bbox_utils import bbox_overlaps, box_to_delta ...@@ -26,12 +24,140 @@ from ppdet.utils.bbox_utils import bbox_overlaps, box_to_delta
__all__ = [ __all__ = [
'AnchorGenerator', 'DropBlock', 'RPNTargetAssign', 'GenerateProposals', 'AnchorGenerator', 'DropBlock', 'RPNTargetAssign', 'GenerateProposals',
'MultiClassNMS', 'BBoxAssigner', 'MaskAssigner', 'RoIAlign', 'RoIPool', 'MultiClassNMS', 'BBoxAssigner', 'MaskAssigner', 'RoIAlign', 'RoIPool',
'MultiBoxHead', 'SSDLiteMultiBoxHead', 'SSDOutputDecoder', 'MultiBoxHead', 'SSDOutputDecoder', 'RetinaTargetAssign',
'RetinaTargetAssign', 'RetinaOutputDecoder', 'ConvNorm', 'RetinaOutputDecoder', 'ConvNorm', 'DeformConvNorm', 'MultiClassSoftNMS',
'MultiClassSoftNMS', 'LibraBBoxAssigner' 'LibraBBoxAssigner'
] ]
def _conv_offset(input, filter_size, stride, padding, act=None, name=None):
out_channel = filter_size * filter_size * 3
out = fluid.layers.conv2d(
input,
num_filters=out_channel,
filter_size=filter_size,
stride=stride,
padding=padding,
param_attr=ParamAttr(
initializer=fluid.initializer.Constant(value=0),
name=name + ".w_0"),
bias_attr=ParamAttr(
initializer=fluid.initializer.Constant(value=0),
name=name + ".b_0"),
act=act,
name=name)
return out
def DeformConvNorm(input,
num_filters,
filter_size,
stride=1,
groups=1,
norm_decay=0.,
norm_type='affine_channel',
norm_groups=32,
dilation=1,
lr_scale=1,
freeze_norm=False,
act=None,
norm_name=None,
initializer=None,
bias_attr=False,
name=None):
if bias_attr:
bias_para = ParamAttr(
name=name + "_bias",
initializer=fluid.initializer.Constant(value=0),
learning_rate=lr_scale * 2)
else:
bias_para = False
offset_mask = _conv_offset(
input=input,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
act=None,
name=name + "_conv_offset")
offset_channel = filter_size**2 * 2
mask_channel = filter_size**2
offset, mask = fluid.layers.split(
input=offset_mask,
num_or_sections=[offset_channel, mask_channel],
dim=1)
mask = fluid.layers.sigmoid(mask)
conv = fluid.layers.deformable_conv(
input=input,
offset=offset,
mask=mask,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2 * dilation,
dilation=dilation,
groups=groups,
deformable_groups=1,
im2col_step=1,
param_attr=ParamAttr(
name=name + "_weights",
initializer=initializer,
learning_rate=lr_scale),
bias_attr=bias_para,
name=name + ".conv2d.output.1")
norm_lr = 0. if freeze_norm else 1.
pattr = ParamAttr(
name=norm_name + '_scale',
learning_rate=norm_lr * lr_scale,
regularizer=L2Decay(norm_decay))
battr = ParamAttr(
name=norm_name + '_offset',
learning_rate=norm_lr * lr_scale,
regularizer=L2Decay(norm_decay))
if norm_type in ['bn', 'sync_bn']:
global_stats = True if freeze_norm else False
out = fluid.layers.batch_norm(
input=conv,
act=act,
name=norm_name + '.output.1',
param_attr=pattr,
bias_attr=battr,
moving_mean_name=norm_name + '_mean',
moving_variance_name=norm_name + '_variance',
use_global_stats=global_stats)
scale = fluid.framework._get_var(pattr.name)
bias = fluid.framework._get_var(battr.name)
elif norm_type == 'gn':
out = fluid.layers.group_norm(
input=conv,
act=act,
name=norm_name + '.output.1',
groups=norm_groups,
param_attr=pattr,
bias_attr=battr)
scale = fluid.framework._get_var(pattr.name)
bias = fluid.framework._get_var(battr.name)
elif norm_type == 'affine_channel':
scale = fluid.layers.create_parameter(
shape=[conv.shape[1]],
dtype=conv.dtype,
attr=pattr,
default_initializer=fluid.initializer.Constant(1.))
bias = fluid.layers.create_parameter(
shape=[conv.shape[1]],
dtype=conv.dtype,
attr=battr,
default_initializer=fluid.initializer.Constant(0.))
out = fluid.layers.affine_channel(
x=conv, scale=scale, bias=bias, act=act)
if freeze_norm:
scale.stop_gradient = True
bias.stop_gradient = True
return out
def ConvNorm(input, def ConvNorm(input,
num_filters, num_filters,
filter_size, filter_size,
...@@ -178,6 +304,16 @@ def DropBlock(input, block_size, keep_prob, is_test): ...@@ -178,6 +304,16 @@ def DropBlock(input, block_size, keep_prob, is_test):
return output return output
def CreateTensorFromNumpy(numpy_array):
paddle_array = fluid.layers.create_parameter(
attr=ParamAttr(),
shape=numpy_array.shape,
dtype=numpy_array.dtype,
default_initializer=NumpyArrayInitializer(numpy_array))
paddle_array.stop_gradient = True
return paddle_array
@register @register
@serializable @serializable
class AnchorGenerator(object): class AnchorGenerator(object):
...@@ -560,8 +696,6 @@ class BBoxAssigner(object): ...@@ -560,8 +696,6 @@ class BBoxAssigner(object):
@register @register
class LibraBBoxAssigner(object): class LibraBBoxAssigner(object):
__shared__ = ['num_classes']
def __init__(self, def __init__(self,
batch_size_per_im=512, batch_size_per_im=512,
fg_fraction=.25, fg_fraction=.25,
...@@ -807,7 +941,6 @@ class LibraBBoxAssigner(object): ...@@ -807,7 +941,6 @@ class LibraBBoxAssigner(object):
hs = boxes[:, 3] - boxes[:, 1] + 1 hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws > 0) & (hs > 0))[0] keep = np.where((ws > 0) & (hs > 0))[0]
boxes = boxes[keep] boxes = boxes[keep]
max_overlaps = max_overlaps[keep]
fg_inds = np.where(max_overlaps >= fg_thresh)[0] fg_inds = np.where(max_overlaps >= fg_thresh)[0]
bg_inds = np.where((max_overlaps < bg_thresh_hi) & ( bg_inds = np.where((max_overlaps < bg_thresh_hi) & (
max_overlaps >= bg_thresh_lo))[0] max_overlaps >= bg_thresh_lo))[0]
...@@ -1078,155 +1211,6 @@ class MultiBoxHead(object): ...@@ -1078,155 +1211,6 @@ class MultiBoxHead(object):
self.pad = pad self.pad = pad
@register
@serializable
class SSDLiteMultiBoxHead(object):
def __init__(self,
min_ratio=20,
max_ratio=90,
base_size=300,
min_sizes=None,
max_sizes=None,
aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.],
[2., 3.]],
steps=None,
offset=0.5,
flip=True,
clip=False,
pad=0,
conv_decay=0.0):
super(SSDLiteMultiBoxHead, self).__init__()
self.min_ratio = min_ratio
self.max_ratio = max_ratio
self.base_size = base_size
self.min_sizes = min_sizes
self.max_sizes = max_sizes
self.aspect_ratios = aspect_ratios
self.steps = steps
self.offset = offset
self.flip = flip
self.pad = pad
self.clip = clip
self.conv_decay = conv_decay
def _separable_conv(self, input, num_filters, name):
dwconv_param_attr = ParamAttr(
name=name + 'dw_weights', regularizer=L2Decay(self.conv_decay))
num_filter1 = input.shape[1]
depthwise_conv = fluid.layers.conv2d(
input=input,
num_filters=num_filter1,
filter_size=3,
stride=1,
padding="SAME",
groups=int(num_filter1),
act=None,
use_cudnn=False,
param_attr=dwconv_param_attr,
bias_attr=False)
bn_name = name + '_bn'
bn_param_attr = ParamAttr(
name=bn_name + "_scale", regularizer=L2Decay(0.0))
bn_bias_attr = ParamAttr(
name=bn_name + "_offset", regularizer=L2Decay(0.0))
bn = fluid.layers.batch_norm(
input=depthwise_conv,
param_attr=bn_param_attr,
bias_attr=bn_bias_attr,
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
bn = fluid.layers.relu6(bn)
pwconv_param_attr = ParamAttr(
name=name + 'pw_weights', regularizer=L2Decay(self.conv_decay))
pointwise_conv = fluid.layers.conv2d(
input=bn,
num_filters=num_filters,
filter_size=1,
stride=1,
act=None,
use_cudnn=True,
param_attr=pwconv_param_attr,
bias_attr=False)
return pointwise_conv
def __call__(self, inputs, image, num_classes):
def _permute_and_reshape(input, last_dim):
trans = fluid.layers.transpose(input, perm=[0, 2, 3, 1])
compile_shape = [0, -1, last_dim]
return fluid.layers.reshape(trans, shape=compile_shape)
def _is_list_or_tuple_(data):
return (isinstance(data, list) or isinstance(data, tuple))
if self.min_sizes is None and self.max_sizes is None:
num_layer = len(inputs)
self.min_sizes = []
self.max_sizes = []
step = int(
math.floor(((self.max_ratio - self.min_ratio)) / (num_layer - 2
)))
for ratio in six.moves.range(self.min_ratio, self.max_ratio + 1,
step):
self.min_sizes.append(self.base_size * ratio / 100.)
self.max_sizes.append(self.base_size * (ratio + step) / 100.)
self.min_sizes = [self.base_size * .10] + self.min_sizes
self.max_sizes = [self.base_size * .20] + self.max_sizes
locs, confs = [], []
boxes, mvars = [], []
for i, input in enumerate(inputs):
min_size = self.min_sizes[i]
max_size = self.max_sizes[i]
if not _is_list_or_tuple_(min_size):
min_size = [min_size]
if not _is_list_or_tuple_(max_size):
max_size = [max_size]
step = [
self.steps[i] if self.steps else 0.0, self.steps[i]
if self.steps else 0.0
]
box, var = fluid.layers.prior_box(
input,
image,
min_sizes=min_size,
max_sizes=max_size,
steps=step,
aspect_ratios=self.aspect_ratios[i],
variance=[0.1, 0.1, 0.2, 0.2],
clip=self.clip,
flip=self.flip,
offset=0.5)
num_boxes = box.shape[2]
box = fluid.layers.reshape(box, shape=[-1, 4])
var = fluid.layers.reshape(var, shape=[-1, 4])
num_loc_output = num_boxes * 4
num_conf_output = num_boxes * num_classes
# get loc
mbox_loc = self._separable_conv(input, num_loc_output,
"loc_{}".format(i + 1))
loc = _permute_and_reshape(mbox_loc, 4)
# get conf
mbox_conf = self._separable_conv(input, num_conf_output,
"conf_{}".format(i + 1))
conf = _permute_and_reshape(mbox_conf, num_classes)
locs.append(loc)
confs.append(conf)
boxes.append(box)
mvars.append(var)
ssd_mbox_loc = fluid.layers.concat(locs, axis=1)
ssd_mbox_conf = fluid.layers.concat(confs, axis=1)
prior_boxes = fluid.layers.concat(boxes)
box_vars = fluid.layers.concat(mvars)
prior_boxes.stop_gradient = True
box_vars.stop_gradient = True
return ssd_mbox_loc, ssd_mbox_conf, prior_boxes, box_vars
@register @register
@serializable @serializable
class SSDOutputDecoder(object): class SSDOutputDecoder(object):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册