diff --git a/configs/ssd/ssdlite_mobilenet_v3_large.yml b/configs/ssd/ssdlite_mobilenet_v3_large.yml new file mode 100644 index 0000000000000000000000000000000000000000..b55c31474852797875b0dfe18c839686df486197 --- /dev/null +++ b/configs/ssd/ssdlite_mobilenet_v3_large.yml @@ -0,0 +1,161 @@ +architecture: SSD +use_gpu: true +max_iters: 400000 +snapshot_iter: 20000 +log_smooth_window: 20 +log_iter: 20 +metric: COCO +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_ssld_pretrained.tar +save_dir: output +weights: output/ssdlite_mobilenet_v3_large/model_final +# 80(label_class) + 1(background) +num_classes: 81 + +SSD: + backbone: MobileNetV3 + multi_box_head: SSDLiteMultiBoxHead + output_decoder: + background_label: 0 + keep_top_k: 200 + nms_eta: 1.0 + nms_threshold: 0.45 + nms_top_k: 400 + score_threshold: 0.01 + +MobileNetV3: + scale: 1.0 + model_name: large + extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]] + with_extra_blocks: true + conv_decay: 0.00004 + +SSDLiteMultiBoxHead: + aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]] + base_size: 320 + steps: [16, 32, 64, 107, 160, 320] + flip: true + clip: true + max_ratio: 95 + min_ratio: 20 + offset: 0.5 + conv_decay: 0.00004 + +LearningRate: + base_lr: 0.4 + schedulers: + - !CosineDecay + max_iters: 400000 + - !LinearWarmup + start_factor: 0.33333 + steps: 2000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 + +TrainReader: + inputs_def: + image_shape: [3, 320, 320] + fields: ['image', 'gt_bbox', 'gt_class'] + dataset: + !COCODataSet + dataset_dir: dataset/coco + anno_path: annotations/instances_train2017.json + image_dir: train2017 + sample_transforms: + - !DecodeImage + to_rgb: true + - !RandomDistort + brightness_lower: 0.875 + brightness_upper: 1.125 + is_order: true + - !RandomExpand + fill_value: [123.675, 116.28, 103.53] + - !RandomCrop + allow_no_crop: false + - !NormalizeBox {} + - !ResizeImage + interp: 1 + target_size: 320 + use_cv2: false + - !RandomFlipImage + is_normalized: false + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: true + is_channel_first: false + - !Permute + to_bgr: false + channel_first: true + batch_size: 64 + shuffle: true + drop_last: true + # Number of working threads/processes. To speed up, can be set to 16 or 32 etc. + worker_num: 8 + # Size of shared memory used in result queue. After increasing `worker_num`, need expand `memsize`. + memsize: 8G + # Buffer size for multi threads/processes.one instance in buffer is one batch data. + # To speed up, can be set to 64 or 128 etc. + bufsize: 32 + use_process: true + + +EvalReader: + inputs_def: + image_shape: [3, 320, 320] + fields: ['image', 'gt_bbox', 'gt_class', 'im_shape', 'im_id'] + dataset: + !COCODataSet + dataset_dir: dataset/coco + anno_path: annotations/instances_val2017.json + image_dir: val2017 + sample_transforms: + - !DecodeImage + to_rgb: true + - !NormalizeBox {} + - !ResizeImage + interp: 1 + target_size: 320 + use_cv2: false + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: true + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + batch_size: 8 + worker_num: 8 + bufsize: 32 + use_process: false + +TestReader: + inputs_def: + image_shape: [3,320,320] + fields: ['image', 'im_id', 'im_shape'] + dataset: + !ImageFolder + anno_path: annotations/instances_val2017.json + sample_transforms: + - !DecodeImage + to_rgb: true + - !ResizeImage + interp: 1 + max_size: 0 + target_size: 320 + use_cv2: false + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: true + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + batch_size: 1 diff --git a/configs/ssd/ssdlite_mobilenet_v3_small.yml b/configs/ssd/ssdlite_mobilenet_v3_small.yml new file mode 100644 index 0000000000000000000000000000000000000000..b9c8428ff89733729c4728ba46025d0a157b69b1 --- /dev/null +++ b/configs/ssd/ssdlite_mobilenet_v3_small.yml @@ -0,0 +1,161 @@ +architecture: SSD +use_gpu: true +max_iters: 400000 +snapshot_iter: 20000 +log_smooth_window: 20 +log_iter: 20 +metric: COCO +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_pretrained.tar +save_dir: output +weights: output/ssd_mobilenet_v3_small/model_final +# 80(label_class) + 1(background) +num_classes: 81 + +SSD: + backbone: MobileNetV3 + multi_box_head: SSDLiteMultiBoxHead + output_decoder: + background_label: 0 + keep_top_k: 200 + nms_eta: 1.0 + nms_threshold: 0.45 + nms_top_k: 400 + score_threshold: 0.01 + +MobileNetV3: + scale: 1.0 + model_name: small + extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]] + with_extra_blocks: true + conv_decay: 0.00004 + +SSDLiteMultiBoxHead: + aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]] + base_size: 320 + steps: [16, 32, 64, 107, 160, 320] + flip: true + clip: true + max_ratio: 95 + min_ratio: 20 + offset: 0.5 + conv_decay: 0.00004 + +LearningRate: + base_lr: 0.4 + schedulers: + - !CosineDecay + max_iters: 400000 + - !LinearWarmup + start_factor: 0.33333 + steps: 2000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 + +TrainReader: + inputs_def: + image_shape: [3, 320, 320] + fields: ['image', 'gt_bbox', 'gt_class'] + dataset: + !COCODataSet + dataset_dir: dataset/coco + anno_path: annotations/instances_train2017.json + image_dir: train2017 + sample_transforms: + - !DecodeImage + to_rgb: true + - !RandomDistort + brightness_lower: 0.875 + brightness_upper: 1.125 + is_order: true + - !RandomExpand + fill_value: [123.675, 116.28, 103.53] + - !RandomCrop + allow_no_crop: false + - !NormalizeBox {} + - !ResizeImage + interp: 1 + target_size: 320 + use_cv2: false + - !RandomFlipImage + is_normalized: false + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: true + is_channel_first: false + - !Permute + to_bgr: false + channel_first: true + batch_size: 64 + shuffle: true + drop_last: true + # Number of working threads/processes. To speed up, can be set to 16 or 32 etc. + worker_num: 8 + # Size of shared memory used in result queue. After increasing `worker_num`, need expand `memsize`. + memsize: 8G + # Buffer size for multi threads/processes.one instance in buffer is one batch data. + # To speed up, can be set to 64 or 128 etc. + bufsize: 32 + use_process: true + + +EvalReader: + inputs_def: + image_shape: [3, 320, 320] + fields: ['image', 'gt_bbox', 'gt_class', 'im_shape', 'im_id'] + dataset: + !COCODataSet + dataset_dir: dataset/coco + anno_path: annotations/instances_val2017.json + image_dir: val2017 + sample_transforms: + - !DecodeImage + to_rgb: true + - !NormalizeBox {} + - !ResizeImage + interp: 1 + target_size: 320 + use_cv2: false + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: true + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + batch_size: 8 + worker_num: 8 + bufsize: 32 + use_process: false + +TestReader: + inputs_def: + image_shape: [3,320,320] + fields: ['image', 'im_id', 'im_shape'] + dataset: + !ImageFolder + anno_path: annotations/instances_val2017.json + sample_transforms: + - !DecodeImage + to_rgb: true + - !ResizeImage + interp: 1 + max_size: 0 + target_size: 320 + use_cv2: false + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: true + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + batch_size: 1 diff --git a/docs/MODEL_ZOO.md b/docs/MODEL_ZOO.md index 99ac8ce8f6e8b436a4b4d8d9fbe8f8e37610b417..5985ce526bc77da3334683ebca5e78be051dd778 100644 --- a/docs/MODEL_ZOO.md +++ b/docs/MODEL_ZOO.md @@ -176,6 +176,15 @@ results of image size 608/416/320 above. Deformable conv is added on stage 5 of **Notes:** In RetinaNet, the base LR is changed to 0.01 for minibatch size 16. +### SSDLite + +| Backbone | Size | Image/gpu | Lr schd | Inf time (fps) | Box AP | Download | +| :------: | :--: | :-------: | :-----: | :------------: | :----: | :----------------------------------------------------------: | +| MobileNet_v3 small | 320 | 64 | 40w | - | 16.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mobilenet_v3_ssdlite_small.tar) | +| MobileNet_v3 large | 320 | 64 | 40w | - | 22.8 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mobilenet_v3_ssdlite_large.tar) | + +**Notes:** MobileNet_v3-SSDLite is trained in 8 GPU with total batch size as 512 and uses cosine decay strategy to train. + ### SSD | Backbone | Size | Image/gpu | Lr schd | Inf time (fps) | Box AP | Download | diff --git a/docs/MODEL_ZOO_cn.md b/docs/MODEL_ZOO_cn.md index 1b25e8348342054c0734972ed12a9259ab4b7943..0deaf0ee14304f66709456c4999825b7e4f52033 100644 --- a/docs/MODEL_ZOO_cn.md +++ b/docs/MODEL_ZOO_cn.md @@ -129,9 +129,9 @@ Paddle提供基于ImageNet的骨架网络预训练模型。所有预训练模型 | MobileNet-V1 | ImageNet | 608 | 否 | 8 | 270e | 78.302 | 29.3 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar) | | MobileNet-V1 | ImageNet | 416 | 否 | 8 | 270e | - | 29.3 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar) | | MobileNet-V1 | ImageNet | 320 | 否 | 8 | 270e | - | 27.1 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar) | -| MobileNet-V1 | ImageNet | 608 | 否 | 8 | 270e | - | 31.6 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v3.pdparams) | -| MobileNet-V1 | ImageNet | 416 | 否 | 8 | 270e | - | 29.9 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v3.pdparams) | -| MobileNet-V1 | ImageNet | 320 | 否 | 8 | 270e | - | 27.1 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v3.pdparams) | +| MobileNet-V3 | ImageNet | 608 | 否 | 8 | 270e | - | 31.6 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v3.pdparams) | +| MobileNet-V3 | ImageNet | 416 | 否 | 8 | 270e | - | 29.9 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v3.pdparams) | +| MobileNet-V3 | ImageNet | 320 | 否 | 8 | 270e | - | 27.1 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v3.pdparams) | | ResNet34 | ImageNet | 608 | 否 | 8 | 270e | 63.356 | 36.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar) | | ResNet34 | ImageNet | 416 | 否 | 8 | 270e | - | 34.3 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar) | | ResNet34 | ImageNet | 320 | 否 | 8 | 270e | - | 31.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar) | @@ -168,6 +168,15 @@ Paddle提供基于ImageNet的骨架网络预训练模型。所有预训练模型 **注意事项:** RetinaNet系列模型中,在总batch size为16下情况下,初始学习率改为0.01。 +### SSDLite + +| 骨架网络 | 输入尺寸 | 每张GPU图片个数 | 学习率策略|推理时间(fps) | Box AP | 下载 | +| :----------: | :--: | :-----: | :-----: |:------------: |:----: | :-------: | +| MobileNet_v3 small | 320 | 64 | 40w | - | 16.6 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mobilenet_v3_ssdlite_small.tar) | +| MobileNet_v3 large | 320 | 64 | 40w | - | 22.8 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mobilenet_v3_ssdlite_large.tar) | + +**注意事项:** MobileNet_v3-SSDLite 使用学习率余弦衰减策略在8卡GPU下总batch size为512。 + ### SSD | 骨架网络 | 输入尺寸 | 每张GPU图片个数 | 学习率策略|推理时间(fps) | Box AP | 下载 | diff --git a/ppdet/modeling/backbones/mobilenet_v3.py b/ppdet/modeling/backbones/mobilenet_v3.py index f473d88b2998ea704b1d79156de631f60c4e42cb..2d947e3e79a61e3c6868d847940851bad9562d06 100644 --- a/ppdet/modeling/backbones/mobilenet_v3.py +++ b/ppdet/modeling/backbones/mobilenet_v3.py @@ -1,3 +1,17 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr from paddle.fluid.regularizer import L2Decay @@ -10,6 +24,19 @@ __all__ = ['MobileNetV3'] @register class MobileNetV3(): + """ + MobileNet v3, see https://arxiv.org/abs/1905.02244 + Args: + scale (float): scaling factor for convolution groups proportion of mobilenet_v3. + model_name (str): There are two modes, small and large. + norm_type (str): normalization type, 'bn' and 'sync_bn' are supported. + norm_decay (float): weight decay for normalization layer weights. + conv_decay (float): weight decay for convolution layer weights. + with_extra_blocks (bool): if extra blocks should be added. + extra_block_filters (list): number of filter for each extra block. + """ + __shared__ = ['norm_type'] + def __init__(self, scale=1.0, model_name='small',