diff --git a/ppdet/modeling/backbones/__init__.py b/ppdet/modeling/backbones/__init__.py index 7cd17ec09a17f4de054782eefd0c6ea87365fd7a..2c31e792dc88c505c0e4a3e615be2ec390357c58 100644 --- a/ppdet/modeling/backbones/__init__.py +++ b/ppdet/modeling/backbones/__init__.py @@ -18,6 +18,7 @@ from . import resnet from . import resnext from . import darknet from . import mobilenet +from . import mobilenet_v3 from . import senet from . import fpn from . import vgg @@ -33,6 +34,7 @@ from .resnet import * from .resnext import * from .darknet import * from .mobilenet import * +from .mobilenet_v3 import * from .senet import * from .fpn import * from .vgg import * diff --git a/ppdet/modeling/backbones/mobilenet_v3.py b/ppdet/modeling/backbones/mobilenet_v3.py new file mode 100644 index 0000000000000000000000000000000000000000..dc8a059c5ba11e494e8eb84ff8eea24e760f07a1 --- /dev/null +++ b/ppdet/modeling/backbones/mobilenet_v3.py @@ -0,0 +1,277 @@ +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +from ppdet.core.workspace import register +import math + +__all__ = ['MobileNetV3'] + + +@register +class MobileNetV3(): + def __init__(self, + scale=1.0, + model_name='small', + with_extra_blocks=False, + conv_decay=0.0, + bn_decay=0.0, + extra_block_filters=[[256, 512], [128, 256], [128, 256], + [64, 128]]): + self.scale = scale + self.model_name = model_name + self.with_extra_blocks = with_extra_blocks + self.extra_block_filters = extra_block_filters + self.conv_decay = conv_decay + self.bn_decay = bn_decay + self.inplanes = 16 + self.end_points = [] + self.block_stride = 1 + if model_name == "large": + self.cfg = [ + # kernel_size, expand, channel, se_block, act_mode, stride + [3, 16, 16, False, 'relu', 1], + [3, 64, 24, False, 'relu', 2], + [3, 72, 24, False, 'relu', 1], + [5, 72, 40, True, 'relu', 2], + [5, 120, 40, True, 'relu', 1], + [5, 120, 40, True, 'relu', 1], + [3, 240, 80, False, 'hard_swish', 2], + [3, 200, 80, False, 'hard_swish', 1], + [3, 184, 80, False, 'hard_swish', 1], + [3, 184, 80, False, 'hard_swish', 1], + [3, 480, 112, True, 'hard_swish', 1], + [3, 672, 112, True, 'hard_swish', 1], + [5, 672, 160, True, 'hard_swish', 2], + [5, 960, 160, True, 'hard_swish', 1], + [5, 960, 160, True, 'hard_swish', 1], + ] + elif model_name == "small": + self.cfg = [ + # kernel_size, expand, channel, se_block, act_mode, stride + [3, 16, 16, True, 'relu', 2], + [3, 72, 24, False, 'relu', 2], + [3, 88, 24, False, 'relu', 1], + [5, 96, 40, True, 'hard_swish', 2], + [5, 240, 40, True, 'hard_swish', 1], + [5, 240, 40, True, 'hard_swish', 1], + [5, 120, 48, True, 'hard_swish', 1], + [5, 144, 48, True, 'hard_swish', 1], + [5, 288, 96, True, 'hard_swish', 2], + [5, 576, 96, True, 'hard_swish', 1], + [5, 576, 96, True, 'hard_swish', 1], + ] + else: + raise NotImplementedError + + def _conv_bn_layer(self, + input, + filter_size, + num_filters, + stride, + padding, + num_groups=1, + if_act=True, + act=None, + name=None, + use_cudnn=True): + conv_param_attr = ParamAttr( + name=name + '_weights', regularizer=L2Decay(self.conv_decay)) + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=conv_param_attr, + bias_attr=False) + bn_name = name + '_bn' + bn_param_attr = ParamAttr( + name=bn_name + "_scale", regularizer=L2Decay(self.bn_decay)) + bn_bias_attr = ParamAttr( + name=bn_name + "_offset", regularizer=L2Decay(self.bn_decay)) + bn = fluid.layers.batch_norm( + input=conv, + param_attr=bn_param_attr, + bias_attr=bn_bias_attr, + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + if if_act: + if act == 'relu': + bn = fluid.layers.relu(bn) + elif act == 'hard_swish': + bn = self._hard_swish(bn) + elif act == 'relu6': + bn = fluid.layers.relu6(bn) + return bn + + def _hard_swish(self, x): + return x * fluid.layers.relu6(x + 3) / 6. + + def _se_block(self, input, num_out_filter, ratio=4, name=None): + num_mid_filter = int(num_out_filter // ratio) + pool = fluid.layers.pool2d( + input=input, pool_type='avg', global_pooling=True, use_cudnn=False) + conv1 = fluid.layers.conv2d( + input=pool, + filter_size=1, + num_filters=num_mid_filter, + act='relu', + param_attr=ParamAttr(name=name + '_1_weights'), + bias_attr=ParamAttr(name=name + '_1_offset')) + conv2 = fluid.layers.conv2d( + input=conv1, + filter_size=1, + num_filters=num_out_filter, + act='hard_sigmoid', + param_attr=ParamAttr(name=name + '_2_weights'), + bias_attr=ParamAttr(name=name + '_2_offset')) + + scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0) + return scale + + def _residual_unit(self, + input, + num_in_filter, + num_mid_filter, + num_out_filter, + stride, + filter_size, + act=None, + use_se=False, + name=None): + input_data = input + conv0 = self._conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_mid_filter, + stride=1, + padding=0, + if_act=True, + act=act, + name=name + '_expand') + if self.block_stride == 16 and stride == 2: + self.end_points.append(conv0) + conv1 = self._conv_bn_layer( + input=conv0, + filter_size=filter_size, + num_filters=num_mid_filter, + stride=stride, + padding=int((filter_size - 1) // 2), + if_act=True, + act=act, + num_groups=num_mid_filter, + use_cudnn=False, + name=name + '_depthwise') + + if use_se: + conv1 = self._se_block( + input=conv1, num_out_filter=num_mid_filter, name=name + '_se') + + conv2 = self._conv_bn_layer( + input=conv1, + filter_size=1, + num_filters=num_out_filter, + stride=1, + padding=0, + if_act=False, + name=name + '_linear') + if num_in_filter != num_out_filter or stride != 1: + return conv2 + else: + return fluid.layers.elementwise_add(x=input_data, y=conv2, act=None) + + def _extra_block_dw(self, + input, + num_filters1, + num_filters2, + stride, + name=None): + pointwise_conv = self._conv_bn_layer( + input=input, + filter_size=1, + num_filters=int(num_filters1), + stride=1, + padding="SAME", + act='relu6', + name=name + "_extra1") + depthwise_conv = self._conv_bn_layer( + input=pointwise_conv, + filter_size=3, + num_filters=int(num_filters2), + stride=stride, + padding="SAME", + num_groups=int(num_filters1), + act='relu6', + use_cudnn=False, + name=name + "_extra2_dw") + normal_conv = self._conv_bn_layer( + input=depthwise_conv, + filter_size=1, + num_filters=int(num_filters2), + stride=1, + padding="SAME", + act='relu6', + name=name + "_extra2_sep") + return normal_conv + + def __call__(self, input): + scale = self.scale + inplanes = self.inplanes + cfg = self.cfg + blocks = [] + + #conv1 + conv = self._conv_bn_layer( + input, + filter_size=3, + num_filters=inplanes if scale <= 1.0 else int(inplanes * scale), + stride=2, + padding=1, + num_groups=1, + if_act=True, + act='hard_swish', + name='conv1') + i = 0 + for layer_cfg in cfg: + self.block_stride *= layer_cfg[5] + conv = self._residual_unit( + input=conv, + num_in_filter=inplanes, + num_mid_filter=int(scale * layer_cfg[1]), + num_out_filter=int(scale * layer_cfg[2]), + act=layer_cfg[4], + stride=layer_cfg[5], + filter_size=layer_cfg[0], + use_se=layer_cfg[3], + name='conv' + str(i + 2)) + inplanes = int(scale * layer_cfg[2]) + i += 1 + + if not self.with_extra_blocks: + return conv + + # extra block + conv_extra = self._conv_bn_layer( + conv, + filter_size=1, + num_filters=int(scale * cfg[-1][1]), + stride=1, + padding="SAME", + num_groups=1, + if_act=True, + act='hard_swish', + name='conv' + str(i + 2)) + self.end_points.append(conv_extra) + i += 1 + for block_filter in self.extra_block_filters: + conv_extra = self._extra_block_dw(conv_extra, block_filter[0], + block_filter[1], 2, + 'conv' + str(i + 2)) + self.end_points.append(conv_extra) + i += 1 + + return self.end_points diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py index a94e1f4519194c60bbc19272c094f426910ad649..be17edcd95905df57ff78ee52901a33f057cfaa2 100644 --- a/ppdet/modeling/ops.py +++ b/ppdet/modeling/ops.py @@ -14,6 +14,8 @@ import numpy as np from numbers import Integral +import math +import six from paddle import fluid from paddle.fluid.param_attr import ParamAttr @@ -24,8 +26,9 @@ from ppdet.utils.bbox_utils import bbox_overlaps, box_to_delta __all__ = [ 'AnchorGenerator', 'DropBlock', 'RPNTargetAssign', 'GenerateProposals', 'MultiClassNMS', 'BBoxAssigner', 'MaskAssigner', 'RoIAlign', 'RoIPool', - 'MultiBoxHead', 'SSDOutputDecoder', 'RetinaTargetAssign', - 'RetinaOutputDecoder', 'ConvNorm', 'MultiClassSoftNMS', 'LibraBBoxAssigner' + 'MultiBoxHead', 'SSDLiteMultiBoxHead', 'SSDOutputDecoder', + 'RetinaTargetAssign', 'RetinaOutputDecoder', 'ConvNorm', + 'MultiClassSoftNMS', 'LibraBBoxAssigner' ] @@ -1064,6 +1067,155 @@ class MultiBoxHead(object): self.pad = pad +@register +@serializable +class SSDLiteMultiBoxHead(object): + def __init__(self, + min_ratio=20, + max_ratio=90, + base_size=300, + min_sizes=None, + max_sizes=None, + aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], + [2., 3.]], + steps=None, + offset=0.5, + flip=True, + clip=False, + pad=0, + conv_decay=0.0): + super(SSDLiteMultiBoxHead, self).__init__() + self.min_ratio = min_ratio + self.max_ratio = max_ratio + self.base_size = base_size + self.min_sizes = min_sizes + self.max_sizes = max_sizes + self.aspect_ratios = aspect_ratios + self.steps = steps + self.offset = offset + self.flip = flip + self.pad = pad + self.clip = clip + self.conv_decay = conv_decay + + def _separable_conv(self, input, num_filters, name): + dwconv_param_attr = ParamAttr( + name=name + 'dw_weights', regularizer=L2Decay(self.conv_decay)) + num_filter1 = input.shape[1] + depthwise_conv = fluid.layers.conv2d( + input=input, + num_filters=num_filter1, + filter_size=3, + stride=1, + padding="SAME", + groups=int(num_filter1), + act=None, + use_cudnn=False, + param_attr=dwconv_param_attr, + bias_attr=False) + bn_name = name + '_bn' + bn_param_attr = ParamAttr( + name=bn_name + "_scale", regularizer=L2Decay(0.0)) + bn_bias_attr = ParamAttr( + name=bn_name + "_offset", regularizer=L2Decay(0.0)) + bn = fluid.layers.batch_norm( + input=depthwise_conv, + param_attr=bn_param_attr, + bias_attr=bn_bias_attr, + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + bn = fluid.layers.relu6(bn) + pwconv_param_attr = ParamAttr( + name=name + 'pw_weights', regularizer=L2Decay(self.conv_decay)) + pointwise_conv = fluid.layers.conv2d( + input=bn, + num_filters=num_filters, + filter_size=1, + stride=1, + act=None, + use_cudnn=True, + param_attr=pwconv_param_attr, + bias_attr=False) + return pointwise_conv + + def __call__(self, inputs, image, num_classes): + def _permute_and_reshape(input, last_dim): + trans = fluid.layers.transpose(input, perm=[0, 2, 3, 1]) + compile_shape = [0, -1, last_dim] + return fluid.layers.reshape(trans, shape=compile_shape) + + def _is_list_or_tuple_(data): + return (isinstance(data, list) or isinstance(data, tuple)) + + if self.min_sizes is None and self.max_sizes is None: + num_layer = len(inputs) + self.min_sizes = [] + self.max_sizes = [] + step = int( + math.floor(((self.max_ratio - self.min_ratio)) / (num_layer - 2 + ))) + for ratio in six.moves.range(self.min_ratio, self.max_ratio + 1, + step): + self.min_sizes.append(self.base_size * ratio / 100.) + self.max_sizes.append(self.base_size * (ratio + step) / 100.) + self.min_sizes = [self.base_size * .10] + self.min_sizes + self.max_sizes = [self.base_size * .20] + self.max_sizes + + locs, confs = [], [] + boxes, mvars = [], [] + + for i, input in enumerate(inputs): + min_size = self.min_sizes[i] + max_size = self.max_sizes[i] + if not _is_list_or_tuple_(min_size): + min_size = [min_size] + if not _is_list_or_tuple_(max_size): + max_size = [max_size] + step = [ + self.steps[i] if self.steps else 0.0, self.steps[i] + if self.steps else 0.0 + ] + box, var = fluid.layers.prior_box( + input, + image, + min_sizes=min_size, + max_sizes=max_size, + steps=step, + aspect_ratios=self.aspect_ratios[i], + variance=[0.1, 0.1, 0.2, 0.2], + clip=self.clip, + flip=self.flip, + offset=0.5) + + num_boxes = box.shape[2] + box = fluid.layers.reshape(box, shape=[-1, 4]) + var = fluid.layers.reshape(var, shape=[-1, 4]) + num_loc_output = num_boxes * 4 + num_conf_output = num_boxes * num_classes + # get loc + mbox_loc = self._separable_conv(input, num_loc_output, + "loc_{}".format(i + 1)) + loc = _permute_and_reshape(mbox_loc, 4) + # get conf + mbox_conf = self._separable_conv(input, num_conf_output, + "conf_{}".format(i + 1)) + conf = _permute_and_reshape(mbox_conf, num_classes) + + locs.append(loc) + confs.append(conf) + boxes.append(box) + mvars.append(var) + + ssd_mbox_loc = fluid.layers.concat(locs, axis=1) + ssd_mbox_conf = fluid.layers.concat(confs, axis=1) + prior_boxes = fluid.layers.concat(boxes) + box_vars = fluid.layers.concat(mvars) + + prior_boxes.stop_gradient = True + box_vars.stop_gradient = True + return ssd_mbox_loc, ssd_mbox_conf, prior_boxes, box_vars + + @register @serializable class SSDOutputDecoder(object):