diff --git a/configs/ssd/ssdlite_ghostnet.yml b/configs/ssd/ssdlite_ghostnet.yml new file mode 100644 index 0000000000000000000000000000000000000000..5a7f0e749f68582826da70be68c6ee90cc1e700c --- /dev/null +++ b/configs/ssd/ssdlite_ghostnet.yml @@ -0,0 +1,162 @@ +architecture: SSD +use_gpu: true +max_iters: 400000 +snapshot_iter: 20000 +log_smooth_window: 20 +log_iter: 20 +metric: COCO +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/GhostNet_x1_3_ssld_pretrained.tar +save_dir: output +weights: output/ssdlite_ghostnet/model_final +# 80(label_class) + 1(background) +num_classes: 81 + +SSD: + backbone: GhostNet + multi_box_head: SSDLiteMultiBoxHead + output_decoder: + background_label: 0 + keep_top_k: 200 + nms_eta: 1.0 + nms_threshold: 0.45 + nms_top_k: 400 + score_threshold: 0.01 + + +GhostNet: + scale: 1.3 + extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]] + feature_maps: [5, 7, 8, 9, 10, 11] + conv_decay: 0.00004 + lr_mult_list: [0.25, 0.25, 0.5, 0.5, 0.75] + +SSDLiteMultiBoxHead: + aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]] + base_size: 320 + steps: [16, 32, 64, 107, 160, 320] + flip: true + clip: true + max_ratio: 95 + min_ratio: 20 + offset: 0.5 + conv_decay: 0.00004 + +LearningRate: + base_lr: 0.2 + schedulers: + - !CosineDecay + max_iters: 400000 + - !LinearWarmup + start_factor: 0.33333 + steps: 2000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 + +TrainReader: + inputs_def: + image_shape: [3, 320, 320] + fields: ['image', 'gt_bbox', 'gt_class'] + dataset: + !COCODataSet + dataset_dir: dataset/coco + anno_path: annotations/instances_train2017.json + image_dir: train2017 + sample_transforms: + - !DecodeImage + to_rgb: true + - !RandomDistort + brightness_lower: 0.875 + brightness_upper: 1.125 + is_order: true + - !RandomExpand + fill_value: [123.675, 116.28, 103.53] + - !RandomCrop + allow_no_crop: false + - !NormalizeBox {} + - !ResizeImage + interp: 1 + target_size: 320 + use_cv2: false + - !RandomFlipImage + is_normalized: false + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: true + is_channel_first: false + - !Permute + to_bgr: false + channel_first: true + batch_size: 64 + shuffle: true + drop_last: true + # Number of working threads/processes. To speed up, can be set to 16 or 32 etc. + worker_num: 8 + # Size of shared memory used in result queue. After increasing `worker_num`, need expand `memsize`. + memsize: 8G + # Buffer size for multi threads/processes.one instance in buffer is one batch data. + # To speed up, can be set to 64 or 128 etc. + bufsize: 32 + use_process: true + + +EvalReader: + inputs_def: + image_shape: [3, 320, 320] + fields: ['image', 'gt_bbox', 'gt_class', 'im_shape', 'im_id'] + dataset: + !COCODataSet + dataset_dir: dataset/coco + anno_path: annotations/instances_val2017.json + image_dir: val2017 + sample_transforms: + - !DecodeImage + to_rgb: true + - !NormalizeBox {} + - !ResizeImage + interp: 1 + target_size: 320 + use_cv2: false + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: true + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + batch_size: 8 + worker_num: 8 + bufsize: 32 + use_process: false + +TestReader: + inputs_def: + image_shape: [3,320,320] + fields: ['image', 'im_id', 'im_shape'] + dataset: + !ImageFolder + anno_path: annotations/instances_val2017.json + sample_transforms: + - !DecodeImage + to_rgb: true + - !ResizeImage + interp: 1 + max_size: 0 + target_size: 320 + use_cv2: false + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: true + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + batch_size: 1 diff --git a/docs/MODEL_ZOO.md b/docs/MODEL_ZOO.md index 59b0ee03031bccf198ff13f09a2bbba57a13f4e5..dcf66d2cf1d591b203dc6e05193c3cc47f0f7b3c 100644 --- a/docs/MODEL_ZOO.md +++ b/docs/MODEL_ZOO.md @@ -200,6 +200,7 @@ results of image size 608/416/320 above. Deformable conv is added on stage 5 of | MobileNet_v3 large | 320 | 64 | Cosine decay(40w) | - | 23.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_large.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v3_large.yml) | | MobileNet_v3 small w/ FPN | 320 | 64 | Cosine decay(40w) | - | 18.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_small_fpn.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v3_small_fpn.yml) | | MobileNet_v3 large w/ FPN | 320 | 64 | Cosine decay(40w) | - | 24.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_large_fpn.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v3_large_fpn.yml) | +| GhostNet | 320 | 64 | Cosine decay(40w) | - | 23.3 | [model](htts://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_ghostnet.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_ghostnet.yml) | **Notes:** `SSDLite` is trained in 8 GPU with total batch size as 512 and uses cosine decay strategy to train. diff --git a/docs/MODEL_ZOO_cn.md b/docs/MODEL_ZOO_cn.md index bf82ca010c4b6b709ffd587bd06c7700accda785..778d866e2261fdb572645e8d1a5fce7df05b659b 100644 --- a/docs/MODEL_ZOO_cn.md +++ b/docs/MODEL_ZOO_cn.md @@ -192,6 +192,7 @@ Paddle提供基于ImageNet的骨架网络预训练模型。所有预训练模型 | MobileNet_v3 large | 320 | 64 | Cosine decay(40w) | - | 23.3 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_large.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v3_large.yml) | | MobileNet_v3 small w/ FPN | 320 | 64 | Cosine decay(40w) | - | 18.9 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_small_fpn.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v3_small_fpn.yml) | | MobileNet_v3 large w/ FPN | 320 | 64 | Cosine decay(40w) | - | 24.3 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_large_fpn.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v3_large_fpn.yml) | +| GhostNet | 320 | 64 | Cosine decay(40w) | - | 23.3 | [下载链接](htts://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_ghostnet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_ghostnet.yml) | **注意事项:** SSDLite模型使用学习率余弦衰减策略在8卡GPU下总batch size为512。 diff --git a/ppdet/modeling/backbones/__init__.py b/ppdet/modeling/backbones/__init__.py index 30d1b9f50ebbfafeedb39819d4724b3a5e1621d6..a6d2eb18fad8e8099be4ce26562f4b8e33c73c92 100644 --- a/ppdet/modeling/backbones/__init__.py +++ b/ppdet/modeling/backbones/__init__.py @@ -34,6 +34,7 @@ from . import efficientnet from . import bifpn from . import cspdarknet from . import acfpn +from . import ghostnet from .resnet import * from .resnext import * @@ -55,3 +56,4 @@ from .efficientnet import * from .bifpn import * from .cspdarknet import * from .acfpn import * +from .ghostnet import * diff --git a/ppdet/modeling/backbones/ghostnet.py b/ppdet/modeling/backbones/ghostnet.py new file mode 100644 index 0000000000000000000000000000000000000000..b40ca84e36ab1d3d3dada24fe295dd4b0f9d5ada --- /dev/null +++ b/ppdet/modeling/backbones/ghostnet.py @@ -0,0 +1,361 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +from collections import OrderedDict + +from ppdet.core.workspace import register + +__all__ = ["GhostNet"] + + +@register +class GhostNet(object): + """ + scale (float): scaling factor for convolution groups proportion of GhostNet. + feature_maps (list): index of stages whose feature maps are returned. + conv_decay (float): weight decay for convolution layer weights. + extra_block_filters (list): number of filter for each extra block. + lr_mult_list (list): learning rate ratio of different blocks, lower learning rate ratio + is need for pretrained model got using distillation(default as + [1.0, 1.0, 1.0, 1.0, 1.0]). + """ + + def __init__( + self, + scale, + feature_maps=[5, 6, 7, 8, 9, 10], + conv_decay=0.00001, + extra_block_filters=[[256, 512], [128, 256], [128, 256], [64, 128]], + lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0], + freeze_norm=False): + self.scale = scale + self.feature_maps = feature_maps + self.extra_block_filters = extra_block_filters + self.end_points = [] + self.block_stride = 0 + self.conv_decay = conv_decay + self.lr_mult_list = lr_mult_list + self.freeze_norm = freeze_norm + self.curr_stage = 0 + + self.cfgs = [ + # k, t, c, se, s + [3, 16, 16, 0, 1], + [3, 48, 24, 0, 2], + [3, 72, 24, 0, 1], + [5, 72, 40, 1, 2], + [5, 120, 40, 1, 1], + [3, 240, 80, 0, 2], + [3, 200, 80, 0, 1], + [3, 184, 80, 0, 1], + [3, 184, 80, 0, 1], + [3, 480, 112, 1, 1], + [3, 672, 112, 1, 1], + [5, 672, 160, 1, 2], + [5, 960, 160, 0, 1], + [5, 960, 160, 1, 1], + [5, 960, 160, 0, 1], + [5, 960, 160, 1, 1] + ] + + def _conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + lr_idx = self.curr_stage // 3 + lr_idx = min(lr_idx, len(self.lr_mult_list) - 1) + lr_mult = self.lr_mult_list[lr_idx] + norm_lr = 0. if self.freeze_norm else lr_mult + + x = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr( + regularizer=L2Decay(self.conv_decay), + learning_rate=lr_mult, + initializer=fluid.initializer.MSRA(), + name=name + "_weights"), + bias_attr=False) + bn_name = name + "_bn" + x = fluid.layers.batch_norm( + input=x, + act=act, + param_attr=ParamAttr( + name=bn_name + "_scale", + learning_rate=norm_lr, + regularizer=L2Decay(0.0)), + bias_attr=ParamAttr( + name=bn_name + "_offset", + learning_rate=norm_lr, + regularizer=L2Decay(0.0)), + moving_mean_name=bn_name + "_mean", + moving_variance_name=name + "_variance") + return x + + def se_block(self, input, num_channels, reduction_ratio=4, name=None): + lr_idx = self.curr_stage // 3 + lr_idx = min(lr_idx, len(self.lr_mult_list) - 1) + lr_mult = self.lr_mult_list[lr_idx] + pool = fluid.layers.pool2d( + input=input, pool_type='avg', global_pooling=True, use_cudnn=False) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + squeeze = fluid.layers.fc( + input=pool, + size=num_channels // reduction_ratio, + act='relu', + param_attr=ParamAttr( + learning_rate=lr_mult, + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + '_1_weights'), + bias_attr=ParamAttr( + name=name + '_1_offset', learning_rate=lr_mult)) + stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0) + excitation = fluid.layers.fc( + input=squeeze, + size=num_channels, + act=None, + param_attr=ParamAttr( + learning_rate=lr_mult, + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + '_2_weights'), + bias_attr=ParamAttr( + name=name + '_2_offset', learning_rate=lr_mult)) + excitation = fluid.layers.clip(x=excitation, min=0, max=1) + se_scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) + return se_scale + + def depthwise_conv(self, + input, + output, + kernel_size, + stride=1, + relu=False, + name=None): + return self._conv_bn_layer( + input=input, + num_filters=output, + filter_size=kernel_size, + stride=stride, + groups=input.shape[1], + act="relu" if relu else None, + name=name + "_depthwise") + + def ghost_module(self, + input, + output, + kernel_size=1, + ratio=2, + dw_size=3, + stride=1, + relu=True, + name=None): + self.output = output + init_channels = int(math.ceil(output / ratio)) + new_channels = int(init_channels * (ratio - 1)) + primary_conv = self._conv_bn_layer( + input=input, + num_filters=init_channels, + filter_size=kernel_size, + stride=stride, + groups=1, + act="relu" if relu else None, + name=name + "_primary_conv") + cheap_operation = self._conv_bn_layer( + input=primary_conv, + num_filters=new_channels, + filter_size=dw_size, + stride=1, + groups=init_channels, + act="relu" if relu else None, + name=name + "_cheap_operation") + out = fluid.layers.concat([primary_conv, cheap_operation], axis=1) + return out + + def ghost_bottleneck(self, + input, + hidden_dim, + output, + kernel_size, + stride, + use_se, + name=None): + inp_channels = input.shape[1] + x = self.ghost_module( + input=input, + output=hidden_dim, + kernel_size=1, + stride=1, + relu=True, + name=name + "_ghost_module_1") + + if self.block_stride == 4 and stride == 2: + self.block_stride += 1 + if self.block_stride in self.feature_maps: + self.end_points.append(x) + + if stride == 2: + x = self.depthwise_conv( + input=x, + output=hidden_dim, + kernel_size=kernel_size, + stride=stride, + relu=False, + name=name + "_depthwise") + if use_se: + x = self.se_block( + input=x, num_channels=hidden_dim, name=name + "_se") + x = self.ghost_module( + input=x, + output=output, + kernel_size=1, + relu=False, + name=name + "_ghost_module_2") + if stride == 1 and inp_channels == output: + shortcut = input + else: + shortcut = self.depthwise_conv( + input=input, + output=inp_channels, + kernel_size=kernel_size, + stride=stride, + relu=False, + name=name + "_shortcut_depthwise") + shortcut = self._conv_bn_layer( + input=shortcut, + num_filters=output, + filter_size=1, + stride=1, + groups=1, + act=None, + name=name + "_shortcut_conv") + return fluid.layers.elementwise_add(x=x, y=shortcut, axis=-1) + + def _extra_block_dw(self, + input, + num_filters1, + num_filters2, + stride, + name=None): + pointwise_conv = self._conv_bn_layer( + input=input, + filter_size=1, + num_filters=int(num_filters1), + stride=1, + act='relu6', + name=name + "_extra1") + depthwise_conv = self._conv_bn_layer( + input=pointwise_conv, + filter_size=3, + num_filters=int(num_filters2), + stride=stride, + groups=int(num_filters1), + act='relu6', + name=name + "_extra2_dw") + normal_conv = self._conv_bn_layer( + input=depthwise_conv, + filter_size=1, + num_filters=int(num_filters2), + stride=1, + act='relu6', + name=name + "_extra2_sep") + return normal_conv + + def _make_divisible(self, v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + def __call__(self, input): + # build first layer + output_channel = int(self._make_divisible(16 * self.scale, 4)) + x = self._conv_bn_layer( + input=input, + num_filters=output_channel, + filter_size=3, + stride=2, + groups=1, + act="relu", + name="conv1") + # build inverted residual blocks + idx = 0 + for k, exp_size, c, use_se, s in self.cfgs: + if s == 2: + self.block_stride += 1 + if self.block_stride in self.feature_maps: + self.end_points.append(x) + output_channel = int(self._make_divisible(c * self.scale, 4)) + hidden_channel = int(self._make_divisible(exp_size * self.scale, 4)) + x = self.ghost_bottleneck( + input=x, + hidden_dim=hidden_channel, + output=output_channel, + kernel_size=k, + stride=s, + use_se=use_se, + name="_ghostbottleneck_" + str(idx)) + idx += 1 + self.curr_stage += 1 + self.block_stride += 1 + if self.block_stride in self.feature_maps: + self.end_points.append(conv) + + # extra block + # check whether conv_extra is needed + if self.block_stride < max(self.feature_maps): + conv_extra = self._conv_bn_layer( + x, + num_filters=self._make_divisible(self.scale * self.cfgs[-1][1]), + filter_size=1, + stride=1, + groups=1, + act='relu6', + name='conv' + str(idx + 2)) + self.block_stride += 1 + if self.block_stride in self.feature_maps: + self.end_points.append(conv_extra) + idx += 1 + for block_filter in self.extra_block_filters: + conv_extra = self._extra_block_dw(conv_extra, block_filter[0], + block_filter[1], 2, + 'conv' + str(idx + 2)) + self.block_stride += 1 + if self.block_stride in self.feature_maps: + self.end_points.append(conv_extra) + idx += 1 + + return OrderedDict([('ghost_{}'.format(idx), feat) + for idx, feat in enumerate(self.end_points)]) + return res