From 4e88bec59cc15f1f980ba9ad61f760c552062861 Mon Sep 17 00:00:00 2001 From: shangliang Xu Date: Thu, 13 May 2021 18:43:06 +0800 Subject: [PATCH] add res2net (#2992) --- configs/res2net/README.md | 37 ++ ...r_rcnn_res2net50_vb_26w_4s_fpn_1x_coco.yml | 33 ++ ...k_rcnn_res2net50_vb_26w_4s_fpn_2x_coco.yml | 47 +++ ...k_rcnn_res2net50_vd_26w_4s_fpn_2x_coco.yml | 47 +++ docs/MODEL_ZOO_cn.md | 20 + ppdet/modeling/backbones/__init__.py | 2 + ppdet/modeling/backbones/res2net.py | 357 ++++++++++++++++++ 7 files changed, 543 insertions(+) create mode 100644 configs/res2net/README.md create mode 100644 configs/res2net/faster_rcnn_res2net50_vb_26w_4s_fpn_1x_coco.yml create mode 100644 configs/res2net/mask_rcnn_res2net50_vb_26w_4s_fpn_2x_coco.yml create mode 100644 configs/res2net/mask_rcnn_res2net50_vd_26w_4s_fpn_2x_coco.yml create mode 100644 ppdet/modeling/backbones/res2net.py diff --git a/configs/res2net/README.md b/configs/res2net/README.md new file mode 100644 index 000000000..89fb659ed --- /dev/null +++ b/configs/res2net/README.md @@ -0,0 +1,37 @@ +# Res2Net + +## Introduction + +- Res2Net: A New Multi-scale Backbone Architecture: [https://arxiv.org/abs/1904.01169](https://arxiv.org/abs/1904.01169) + +``` +@article{DBLP:journals/corr/abs-1904-01169, + author = {Shanghua Gao and + Ming{-}Ming Cheng and + Kai Zhao and + Xinyu Zhang and + Ming{-}Hsuan Yang and + Philip H. S. Torr}, + title = {Res2Net: {A} New Multi-scale Backbone Architecture}, + journal = {CoRR}, + volume = {abs/1904.01169}, + year = {2019}, + url = {http://arxiv.org/abs/1904.01169}, + archivePrefix = {arXiv}, + eprint = {1904.01169}, + timestamp = {Thu, 25 Apr 2019 10:24:54 +0200}, + biburl = {https://dblp.org/rec/bib/journals/corr/abs-1904-01169}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} +``` + + +## Model Zoo + +| Backbone | Type | Image/gpu | Lr schd | Inf time (fps) | Box AP | Mask AP | Download | Configs | +| :---------------------- | :------------- | :-------: | :-----: | :------------: | :----: | :-----: | :----------------------------------------------------------: | :-----: | +| Res2Net50-FPN | Faster | 2 | 1x | - | 40.6 | - | [model](https://paddledet.bj.bcebos.com/models/faster_rcnn_res2net50_vb_26w_4s_fpn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/develop/configs/res2net/faster_rcnn_res2net50_vb_26w_4s_fpn_1x.yml) | +| Res2Net50-FPN | Mask | 2 | 2x | - | 42.4 | 38.1 | [model](https://paddledet.bj.bcebos.com/models/mask_rcnn_res2net50_vb_26w_4s_fpn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/develop/configs/res2net/mask_rcnn_res2net50_vb_26w_4s_fpn_2x_coco.yml) | +| Res2Net50-vd-FPN | Mask | 2 | 2x | - | 42.6 | 38.1 | [model](https://paddledet.bj.bcebos.com/models/mask_rcnn_res2net50_vd_26w_4s_fpn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/develop/configs/res2net/mask_rcnn_res2net50_vd_26w_4s_fpn_2x_coco.yml) | + +Note: all the above models are trained with 8 gpus. diff --git a/configs/res2net/faster_rcnn_res2net50_vb_26w_4s_fpn_1x_coco.yml b/configs/res2net/faster_rcnn_res2net50_vb_26w_4s_fpn_1x_coco.yml new file mode 100644 index 000000000..1fbdc9d73 --- /dev/null +++ b/configs/res2net/faster_rcnn_res2net50_vb_26w_4s_fpn_1x_coco.yml @@ -0,0 +1,33 @@ +_BASE_: [ + '../datasets/coco_detection.yml', + '../runtime.yml', + '../faster_rcnn/_base_/optimizer_1x.yml', + '../faster_rcnn/_base_/faster_rcnn_r50_fpn.yml', + '../faster_rcnn/_base_/faster_fpn_reader.yml', +] +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/Res2Net50_26w_4s_pretrained.pdparams +weights: output/faster_rcnn_res2net50_vb_26w_4s_fpn_1x_coco/model_final + +FasterRCNN: + backbone: Res2Net + neck: FPN + rpn_head: RPNHead + bbox_head: BBoxHead + # post process + bbox_post_process: BBoxPostProcess + + +Res2Net: + # index 0 stands for res2 + depth: 50 + width: 26 + scales: 4 + norm_type: bn + freeze_at: 0 + return_idx: [0,1,2,3] + num_stages: 4 + variant: b + + +TrainReader: + batch_size: 2 diff --git a/configs/res2net/mask_rcnn_res2net50_vb_26w_4s_fpn_2x_coco.yml b/configs/res2net/mask_rcnn_res2net50_vb_26w_4s_fpn_2x_coco.yml new file mode 100644 index 000000000..02970d1f0 --- /dev/null +++ b/configs/res2net/mask_rcnn_res2net50_vb_26w_4s_fpn_2x_coco.yml @@ -0,0 +1,47 @@ +_BASE_: [ + '../datasets/coco_instance.yml', + '../runtime.yml', + '../mask_rcnn/_base_/optimizer_1x.yml', + '../mask_rcnn/_base_/mask_rcnn_r50_fpn.yml', + '../mask_rcnn/_base_/mask_fpn_reader.yml', +] +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/Res2Net50_26w_4s_pretrained.pdparams +weights: output/mask_rcnn_res2net50_vb_26w_4s_fpn_2x_coco/model_final + +MaskRCNN: + backbone: Res2Net + neck: FPN + rpn_head: RPNHead + bbox_head: BBoxHead + mask_head: MaskHead + # post process + bbox_post_process: BBoxPostProcess + mask_post_process: MaskPostProcess + + +Res2Net: + # index 0 stands for res2 + depth: 50 + width: 26 + scales: 4 + norm_type: bn + freeze_at: 0 + return_idx: [0,1,2,3] + num_stages: 4 + variant: b + + +epoch: 24 +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [16, 22] + - !LinearWarmup + start_factor: 0.3333333333333333 + steps: 500 + + +TrainReader: + batch_size: 2 diff --git a/configs/res2net/mask_rcnn_res2net50_vd_26w_4s_fpn_2x_coco.yml b/configs/res2net/mask_rcnn_res2net50_vd_26w_4s_fpn_2x_coco.yml new file mode 100644 index 000000000..549e1f791 --- /dev/null +++ b/configs/res2net/mask_rcnn_res2net50_vd_26w_4s_fpn_2x_coco.yml @@ -0,0 +1,47 @@ +_BASE_: [ + '../datasets/coco_instance.yml', + '../runtime.yml', + '../mask_rcnn/_base_/optimizer_1x.yml', + '../mask_rcnn/_base_/mask_rcnn_r50_fpn.yml', + '../mask_rcnn/_base_/mask_fpn_reader.yml', +] +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/Res2Net50_vd_26w_4s_pretrained.pdparams +weights: output/mask_rcnn_res2net50_vd_26w_4s_fpn_2x_coco/model_final + +MaskRCNN: + backbone: Res2Net + neck: FPN + rpn_head: RPNHead + bbox_head: BBoxHead + mask_head: MaskHead + # post process + bbox_post_process: BBoxPostProcess + mask_post_process: MaskPostProcess + + +Res2Net: + # index 0 stands for res2 + depth: 50 + width: 26 + scales: 4 + norm_type: bn + freeze_at: 0 + return_idx: [0,1,2,3] + num_stages: 4 + variant: d + + +epoch: 24 +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [16, 22] + - !LinearWarmup + start_factor: 0.3333333333333333 + steps: 500 + + +TrainReader: + batch_size: 2 diff --git a/docs/MODEL_ZOO_cn.md b/docs/MODEL_ZOO_cn.md index 5cad37311..8f6dae73b 100644 --- a/docs/MODEL_ZOO_cn.md +++ b/docs/MODEL_ZOO_cn.md @@ -63,3 +63,23 @@ Paddle提供基于ImageNet的骨架网络预训练模型。所有预训练模型 ### TTFNet 请参考[TTFNet](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ttfnet/) + +### Group Normalization + +请参考[Group Normalization](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/gn/) + +### Deformable ConvNets v2 + +请参考[Deformable ConvNets v2](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/) + +### HRNets + +请参考[HRNets](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/hrnet/) + +### S2ANet + +请参考[S2ANet](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dota/) + +### Res2Net + +请参考[Res2Net](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/res2net/) diff --git a/ppdet/modeling/backbones/__init__.py b/ppdet/modeling/backbones/__init__.py index 4937c9b8d..c1eb8e85d 100644 --- a/ppdet/modeling/backbones/__init__.py +++ b/ppdet/modeling/backbones/__init__.py @@ -21,6 +21,7 @@ from . import hrnet from . import blazenet from . import ghostnet from . import senet +from . import res2net from .vgg import * from .resnet import * @@ -31,3 +32,4 @@ from .hrnet import * from .blazenet import * from .ghostnet import * from .senet import * +from .res2net import * diff --git a/ppdet/modeling/backbones/res2net.py b/ppdet/modeling/backbones/res2net.py new file mode 100644 index 000000000..9e7677247 --- /dev/null +++ b/ppdet/modeling/backbones/res2net.py @@ -0,0 +1,357 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from numbers import Integral + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from ppdet.core.workspace import register, serializable +from ..shape_spec import ShapeSpec +from .resnet import ConvNormLayer + +__all__ = ['Res2Net', 'Res2NetC5'] + +Res2Net_cfg = { + 50: [3, 4, 6, 3], + 101: [3, 4, 23, 3], + 152: [3, 8, 36, 3], + 200: [3, 12, 48, 3] +} + + +class BottleNeck(nn.Layer): + def __init__(self, + ch_in, + ch_out, + stride, + shortcut, + width, + scales=4, + variant='b', + groups=1, + lr=1.0, + norm_type='bn', + norm_decay=0., + freeze_norm=True, + dcn_v2=False): + super(BottleNeck, self).__init__() + + self.shortcut = shortcut + self.scales = scales + self.stride = stride + if not shortcut: + if variant == 'd' and stride == 2: + self.branch1 = nn.Sequential() + self.branch1.add_sublayer( + 'pool', + nn.AvgPool2D( + kernel_size=2, stride=2, padding=0, ceil_mode=True)) + self.branch1.add_sublayer( + 'conv', + ConvNormLayer( + ch_in=ch_in, + ch_out=ch_out, + filter_size=1, + stride=1, + norm_type=norm_type, + norm_decay=norm_decay, + freeze_norm=freeze_norm, + lr=lr)) + else: + self.branch1 = ConvNormLayer( + ch_in=ch_in, + ch_out=ch_out, + filter_size=1, + stride=stride, + norm_type=norm_type, + norm_decay=norm_decay, + freeze_norm=freeze_norm, + lr=lr) + + self.branch2a = ConvNormLayer( + ch_in=ch_in, + ch_out=width * scales, + filter_size=1, + stride=stride if variant == 'a' else 1, + groups=1, + act='relu', + norm_type=norm_type, + norm_decay=norm_decay, + freeze_norm=freeze_norm, + lr=lr) + + self.branch2b = nn.LayerList([ + ConvNormLayer( + ch_in=width, + ch_out=width, + filter_size=3, + stride=1 if variant == 'a' else stride, + groups=groups, + act='relu', + norm_type=norm_type, + norm_decay=norm_decay, + freeze_norm=freeze_norm, + lr=lr, + dcn_v2=dcn_v2) for _ in range(self.scales - 1) + ]) + + self.branch2c = ConvNormLayer( + ch_in=width * scales, + ch_out=ch_out, + filter_size=1, + stride=1, + groups=1, + norm_type=norm_type, + norm_decay=norm_decay, + freeze_norm=freeze_norm, + lr=lr) + + def forward(self, inputs): + + out = self.branch2a(inputs) + feature_split = paddle.split(out, self.scales, 1) + out_split = [] + for i in range(self.scales - 1): + if i == 0 or self.stride == 2: + out_split.append(self.branch2b[i](feature_split[i])) + else: + out_split.append(self.branch2b[i](paddle.add(feature_split[i], + out_split[-1]))) + if self.stride == 1: + out_split.append(feature_split[-1]) + else: + out_split.append(F.avg_pool2d(feature_split[-1], 3, self.stride, 1)) + out = self.branch2c(paddle.concat(out_split, 1)) + + if self.shortcut: + short = inputs + else: + short = self.branch1(inputs) + + out = paddle.add(out, short) + out = F.relu(out) + + return out + + +class Blocks(nn.Layer): + def __init__(self, + ch_in, + ch_out, + count, + stage_num, + width, + scales=4, + variant='b', + groups=1, + lr=1.0, + norm_type='bn', + norm_decay=0., + freeze_norm=True, + dcn_v2=False): + super(Blocks, self).__init__() + + self.blocks = nn.Sequential() + for i in range(count): + self.blocks.add_sublayer( + str(i), + BottleNeck( + ch_in=ch_in if i == 0 else ch_out, + ch_out=ch_out, + stride=2 if i == 0 and stage_num != 2 else 1, + shortcut=False if i == 0 else True, + width=width * (2**(stage_num - 2)), + scales=scales, + variant=variant, + groups=groups, + lr=lr, + norm_type=norm_type, + norm_decay=norm_decay, + freeze_norm=freeze_norm, + dcn_v2=dcn_v2)) + + def forward(self, inputs): + return self.blocks(inputs) + + +@register +@serializable +class Res2Net(nn.Layer): + """ + Res2Net, see https://arxiv.org/abs/1904.01169 + Args: + depth (int): Res2Net depth, should be 50, 101, 152, 200. + width (int): Res2Net width + scales (int): Res2Net scale + variant (str): Res2Net variant, supports 'a', 'b', 'c', 'd' currently + lr_mult_list (list): learning rate ratio of different resnet stages(2,3,4,5), + lower learning rate ratio is need for pretrained model + got using distillation(default as [1.0, 1.0, 1.0, 1.0]). + groups (int): The groups number of the Conv Layer. + norm_type (str): normalization type, 'bn' or 'sync_bn' + norm_decay (float): weight decay for normalization layer weights + freeze_norm (bool): freeze normalization layers + freeze_at (int): freeze the backbone at which stage + return_idx (list): index of stages whose feature maps are returned, + index 0 stands for res2 + dcn_v2_stages (list): index of stages who select deformable conv v2 + num_stages (int): number of stages created + + """ + __shared__ = ['norm_type'] + + def __init__(self, + depth=50, + width=26, + scales=4, + variant='b', + lr_mult_list=[1.0, 1.0, 1.0, 1.0], + groups=1, + norm_type='bn', + norm_decay=0., + freeze_norm=True, + freeze_at=0, + return_idx=[0, 1, 2, 3], + dcn_v2_stages=[-1], + num_stages=4): + super(Res2Net, self).__init__() + + self._model_type = 'Res2Net' if groups == 1 else 'Res2NeXt' + + assert depth in [50, 101, 152, 200], \ + "depth {} not in [50, 101, 152, 200]" + assert variant in ['a', 'b', 'c', 'd'], "invalid Res2Net variant" + assert num_stages >= 1 and num_stages <= 4 + + self.depth = depth + self.variant = variant + self.norm_type = norm_type + self.norm_decay = norm_decay + self.freeze_norm = freeze_norm + self.freeze_at = freeze_at + if isinstance(return_idx, Integral): + return_idx = [return_idx] + assert max(return_idx) < num_stages, \ + 'the maximum return index must smaller than num_stages, ' \ + 'but received maximum return index is {} and num_stages ' \ + 'is {}'.format(max(return_idx), num_stages) + self.return_idx = return_idx + self.num_stages = num_stages + assert len(lr_mult_list) == 4, \ + "lr_mult_list length must be 4 but got {}".format(len(lr_mult_list)) + if isinstance(dcn_v2_stages, Integral): + dcn_v2_stages = [dcn_v2_stages] + assert max(dcn_v2_stages) < num_stages + self.dcn_v2_stages = dcn_v2_stages + + block_nums = Res2Net_cfg[depth] + + # C1 stage + if self.variant in ['c', 'd']: + conv_def = [ + [3, 32, 3, 2, "conv1_1"], + [32, 32, 3, 1, "conv1_2"], + [32, 64, 3, 1, "conv1_3"], + ] + else: + conv_def = [[3, 64, 7, 2, "conv1"]] + self.res1 = nn.Sequential() + for (c_in, c_out, k, s, _name) in conv_def: + self.res1.add_sublayer( + _name, + ConvNormLayer( + ch_in=c_in, + ch_out=c_out, + filter_size=k, + stride=s, + groups=1, + act='relu', + norm_type=norm_type, + norm_decay=norm_decay, + freeze_norm=freeze_norm, + lr=1.0)) + + self._in_channels = [64, 256, 512, 1024] + self._out_channels = [256, 512, 1024, 2048] + self._out_strides = [4, 8, 16, 32] + + # C2-C5 stages + self.res_layers = [] + for i in range(num_stages): + lr_mult = lr_mult_list[i] + stage_num = i + 2 + self.res_layers.append( + self.add_sublayer( + "res{}".format(stage_num), + Blocks( + self._in_channels[i], + self._out_channels[i], + count=block_nums[i], + stage_num=stage_num, + width=width, + scales=scales, + groups=groups, + lr=lr_mult, + norm_type=norm_type, + norm_decay=norm_decay, + freeze_norm=freeze_norm, + dcn_v2=(i in self.dcn_v2_stages)))) + + @property + def out_shape(self): + return [ + ShapeSpec( + channels=self._out_channels[i], stride=self._out_strides[i]) + for i in self.return_idx + ] + + def forward(self, inputs): + x = inputs['image'] + res1 = self.res1(x) + x = F.max_pool2d(res1, kernel_size=3, stride=2, padding=1) + outs = [] + for idx, stage in enumerate(self.res_layers): + x = stage(x) + if idx == self.freeze_at: + x.stop_gradient = True + if idx in self.return_idx: + outs.append(x) + return outs + + +@register +class Res2NetC5(nn.Layer): + def __init__(self, depth=50, width=26, scales=4, variant='b'): + super(Res2NetC5, self).__init__() + feat_in, feat_out = [1024, 2048] + self.res5 = Blocks( + feat_in, + feat_out, + count=3, + stage_num=5, + width=width, + scales=scales, + variant=variant) + self.feat_out = feat_out + + @property + def out_shape(self): + return [ShapeSpec( + channels=self.feat_out, + stride=32, )] + + def forward(self, roi_feat, stage=0): + y = self.res5(roi_feat) + return y -- GitLab