From f8b420946c103bba47c250be632dbd617b2b6ba7 Mon Sep 17 00:00:00 2001 From: littletomatodonkey Date: Mon, 11 Oct 2021 17:23:31 +0800 Subject: [PATCH] fix cspnet (#1282) * fix cspnet * minor fix --- ppcls/arch/backbone/__init__.py | 1 + ppcls/arch/backbone/model_zoo/cspnet.py | 374 ++++++++++++++++++ .../configs/ImageNet/CSPNet/CSPDarkNet53.yaml | 131 ++++++ 3 files changed, 506 insertions(+) create mode 100644 ppcls/arch/backbone/model_zoo/cspnet.py create mode 100644 ppcls/configs/ImageNet/CSPNet/CSPDarkNet53.yaml diff --git a/ppcls/arch/backbone/__init__.py b/ppcls/arch/backbone/__init__.py index d2efcdc0..9dd929bf 100644 --- a/ppcls/arch/backbone/__init__.py +++ b/ppcls/arch/backbone/__init__.py @@ -58,6 +58,7 @@ from ppcls.arch.backbone.model_zoo.dla import DLA34, DLA46_c, DLA46x_c, DLA60, D from ppcls.arch.backbone.model_zoo.rednet import RedNet26, RedNet38, RedNet50, RedNet101, RedNet152 from ppcls.arch.backbone.model_zoo.tnt import TNT_small from ppcls.arch.backbone.model_zoo.hardnet import HarDNet68, HarDNet85, HarDNet39_ds, HarDNet68_ds +from ppcls.arch.backbone.model_zoo.cspnet import CSPDarkNet53 from ppcls.arch.backbone.variant_models.resnet_variant import ResNet50_last_stage_stride1 from ppcls.arch.backbone.variant_models.vgg_variant import VGG19Sigmoid diff --git a/ppcls/arch/backbone/model_zoo/cspnet.py b/ppcls/arch/backbone/model_zoo/cspnet.py new file mode 100644 index 00000000..30528214 --- /dev/null +++ b/ppcls/arch/backbone/model_zoo/cspnet.py @@ -0,0 +1,374 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle import ParamAttr + +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url + +MODEL_URLS = { + "CSPDarkNet53": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/CSPDarkNet53_pretrained.pdparams" +} + +MODEL_CFGS = { + "CSPDarkNet53": dict( + stem=dict( + out_chs=32, kernel_size=3, stride=1, pool=''), + stage=dict( + out_chs=(64, 128, 256, 512, 1024), + depth=(1, 2, 8, 8, 4), + stride=(2, ) * 5, + exp_ratio=(2., ) + (1., ) * 4, + bottle_ratio=(0.5, ) + (1.0, ) * 4, + block_ratio=(1., ) + (0.5, ) * 4, + down_growth=True, )) +} + +__all__ = ['CSPDarkNet53' + ] # model_registry will add each entrypoint fn to this + + +class ConvBnAct(nn.Layer): + def __init__(self, + input_channels, + output_channels, + kernel_size=1, + stride=1, + padding=None, + dilation=1, + groups=1, + act_layer=nn.LeakyReLU, + norm_layer=nn.BatchNorm2D): + super().__init__() + if padding is None: + padding = (kernel_size - 1) // 2 + self.conv = nn.Conv2D( + in_channels=input_channels, + out_channels=output_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + weight_attr=ParamAttr(), + bias_attr=False) + + self.bn = norm_layer(num_features=output_channels) + self.act = act_layer() + + def forward(self, inputs): + x = self.conv(inputs) + x = self.bn(x) + if self.act is not None: + x = self.act(x) + return x + + +def create_stem(in_chans=3, + out_chs=32, + kernel_size=3, + stride=2, + pool='', + act_layer=None, + norm_layer=None): + stem = nn.Sequential() + if not isinstance(out_chs, (tuple, list)): + out_chs = [out_chs] + assert len(out_chs) + in_c = in_chans + for i, out_c in enumerate(out_chs): + conv_name = f'conv{i + 1}' + stem.add_sublayer( + conv_name, + ConvBnAct( + in_c, + out_c, + kernel_size, + stride=stride if i == 0 else 1, + act_layer=act_layer, + norm_layer=norm_layer)) + in_c = out_c + last_conv = conv_name + if pool: + stem.add_sublayer( + 'pool', nn.MaxPool2D( + kernel_size=3, stride=2, padding=1)) + return stem, dict( + num_chs=in_c, reduction=stride, module='.'.join(['stem', last_conv])) + + +class DarkBlock(nn.Layer): + def __init__(self, + in_chs, + out_chs, + dilation=1, + bottle_ratio=0.5, + groups=1, + act_layer=nn.ReLU, + norm_layer=nn.BatchNorm2D, + attn_layer=None, + drop_block=None): + super(DarkBlock, self).__init__() + mid_chs = int(round(out_chs * bottle_ratio)) + ckwargs = dict(act_layer=act_layer, norm_layer=norm_layer) + self.conv1 = ConvBnAct(in_chs, mid_chs, kernel_size=1, **ckwargs) + self.conv2 = ConvBnAct( + mid_chs, + out_chs, + kernel_size=3, + dilation=dilation, + groups=groups, + **ckwargs) + + def forward(self, x): + shortcut = x + x = self.conv1(x) + x = self.conv2(x) + x = x + shortcut + return x + + +class CrossStage(nn.Layer): + def __init__(self, + in_chs, + out_chs, + stride, + dilation, + depth, + block_ratio=1., + bottle_ratio=1., + exp_ratio=1., + groups=1, + first_dilation=None, + down_growth=False, + cross_linear=False, + block_dpr=None, + block_fn=DarkBlock, + **block_kwargs): + super(CrossStage, self).__init__() + first_dilation = first_dilation or dilation + down_chs = out_chs if down_growth else in_chs + exp_chs = int(round(out_chs * exp_ratio)) + block_out_chs = int(round(out_chs * block_ratio)) + conv_kwargs = dict( + act_layer=block_kwargs.get('act_layer'), + norm_layer=block_kwargs.get('norm_layer')) + + if stride != 1 or first_dilation != dilation: + self.conv_down = ConvBnAct( + in_chs, + down_chs, + kernel_size=3, + stride=stride, + dilation=first_dilation, + groups=groups, + **conv_kwargs) + prev_chs = down_chs + else: + self.conv_down = None + prev_chs = in_chs + + self.conv_exp = ConvBnAct( + prev_chs, exp_chs, kernel_size=1, **conv_kwargs) + prev_chs = exp_chs // 2 # output of conv_exp is always split in two + + self.blocks = nn.Sequential() + for i in range(depth): + self.blocks.add_sublayer( + str(i), + block_fn(prev_chs, block_out_chs, dilation, bottle_ratio, + groups, **block_kwargs)) + prev_chs = block_out_chs + + # transition convs + self.conv_transition_b = ConvBnAct( + prev_chs, exp_chs // 2, kernel_size=1, **conv_kwargs) + self.conv_transition = ConvBnAct( + exp_chs, out_chs, kernel_size=1, **conv_kwargs) + + def forward(self, x): + if self.conv_down is not None: + x = self.conv_down(x) + x = self.conv_exp(x) + split = x.shape[1] // 2 + xs, xb = x[:, :split], x[:, split:] + xb = self.blocks(xb) + xb = self.conv_transition_b(xb) + out = self.conv_transition(paddle.concat([xs, xb], axis=1)) + return out + + +class DarkStage(nn.Layer): + def __init__(self, + in_chs, + out_chs, + stride, + dilation, + depth, + block_ratio=1., + bottle_ratio=1., + groups=1, + first_dilation=None, + block_fn=DarkBlock, + block_dpr=None, + **block_kwargs): + super().__init__() + first_dilation = first_dilation or dilation + + self.conv_down = ConvBnAct( + in_chs, + out_chs, + kernel_size=3, + stride=stride, + dilation=first_dilation, + groups=groups, + act_layer=block_kwargs.get('act_layer'), + norm_layer=block_kwargs.get('norm_layer')) + + prev_chs = out_chs + block_out_chs = int(round(out_chs * block_ratio)) + self.blocks = nn.Sequential() + for i in range(depth): + self.blocks.add_sublayer( + str(i), + block_fn(prev_chs, block_out_chs, dilation, bottle_ratio, + groups, **block_kwargs)) + prev_chs = block_out_chs + + def forward(self, x): + x = self.conv_down(x) + x = self.blocks(x) + return x + + +def _cfg_to_stage_args(cfg, curr_stride=2, output_stride=32): + # get per stage args for stage and containing blocks, calculate strides to meet target output_stride + num_stages = len(cfg['depth']) + if 'groups' not in cfg: + cfg['groups'] = (1, ) * num_stages + if 'down_growth' in cfg and not isinstance(cfg['down_growth'], + (list, tuple)): + cfg['down_growth'] = (cfg['down_growth'], ) * num_stages + stage_strides = [] + stage_dilations = [] + stage_first_dilations = [] + dilation = 1 + for cfg_stride in cfg['stride']: + stage_first_dilations.append(dilation) + if curr_stride >= output_stride: + dilation *= cfg_stride + stride = 1 + else: + stride = cfg_stride + curr_stride *= stride + stage_strides.append(stride) + stage_dilations.append(dilation) + cfg['stride'] = stage_strides + cfg['dilation'] = stage_dilations + cfg['first_dilation'] = stage_first_dilations + stage_args = [ + dict(zip(cfg.keys(), values)) for values in zip(*cfg.values()) + ] + return stage_args + + +class CSPNet(nn.Layer): + def __init__(self, + cfg, + in_chans=3, + class_num=1000, + output_stride=32, + global_pool='avg', + drop_rate=0., + act_layer=nn.LeakyReLU, + norm_layer=nn.BatchNorm2D, + zero_init_last_bn=True, + stage_fn=CrossStage, + block_fn=DarkBlock): + super().__init__() + self.class_num = class_num + self.drop_rate = drop_rate + assert output_stride in (8, 16, 32) + layer_args = dict(act_layer=act_layer, norm_layer=norm_layer) + + # Construct the stem + self.stem, stem_feat_info = create_stem(in_chans, **cfg['stem'], + **layer_args) + self.feature_info = [stem_feat_info] + prev_chs = stem_feat_info['num_chs'] + curr_stride = stem_feat_info[ + 'reduction'] # reduction does not include pool + if cfg['stem']['pool']: + curr_stride *= 2 + + # Construct the stages + per_stage_args = _cfg_to_stage_args( + cfg['stage'], curr_stride=curr_stride, output_stride=output_stride) + self.stages = nn.LayerList() + for i, sa in enumerate(per_stage_args): + self.stages.add_sublayer( + str(i), + stage_fn( + prev_chs, **sa, **layer_args, block_fn=block_fn)) + prev_chs = sa['out_chs'] + curr_stride *= sa['stride'] + self.feature_info += [ + dict( + num_chs=prev_chs, + reduction=curr_stride, + module=f'stages.{i}') + ] + + # Construct the head + self.num_features = prev_chs + + self.pool = nn.AdaptiveAvgPool2D(1) + self.flatten = nn.Flatten(1) + self.fc = nn.Linear( + prev_chs, + class_num, + weight_attr=ParamAttr(), + bias_attr=ParamAttr()) + + def forward(self, x): + x = self.stem(x) + for stage in self.stages: + x = stage(x) + x = self.pool(x) + x = self.flatten(x) + x = self.fc(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif pretrained is True: + load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError( + "pretrained type is not available. Please use `string` or `boolean` type." + ) + + +def CSPDarkNet53(pretrained=False, use_ssld=False, **kwargs): + model = CSPNet(MODEL_CFGS["CSPDarkNet53"], block_fn=DarkBlock, **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["CSPDarkNet53"], use_ssld=use_ssld) + return model diff --git a/ppcls/configs/ImageNet/CSPNet/CSPDarkNet53.yaml b/ppcls/configs/ImageNet/CSPNet/CSPDarkNet53.yaml new file mode 100644 index 00000000..cce4e3e5 --- /dev/null +++ b/ppcls/configs/ImageNet/CSPNet/CSPDarkNet53.yaml @@ -0,0 +1,131 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: ./output/ + device: gpu + save_interval: 1 + eval_during_train: True + eval_interval: 1 + epochs: 120 + print_batch_step: 10 + use_visualdl: False + # used for static mode and model export + image_shape: [3, 224, 224] + save_inference_dir: ./inference + # training model under @to_static + to_static: False + +# model architecture +Arch: + name: CSPDarkNet53 + class_num: 1000 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + Eval: + - CELoss: + weight: 1.0 + + +Optimizer: + name: Momentum + momentum: 0.9 + lr: + name: Piecewise + decay_epochs: [30, 60, 90] + values: [0.1, 0.01, 0.001, 0.0001] + regularizer: + name: 'L2' + coeff: 0.0001 + + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 256 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + + Eval: + dataset: + name: ImageNetDataset + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/val_list.txt + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 288 + - CropImage: + size: 256 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + sampler: + name: DistributedBatchSampler + batch_size: 64 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Infer: + infer_imgs: docs/images/whl/demo.jpg + batch_size: 10 + transforms: + - DecodeImage: + to_rgb: True + channel_first: False + - ResizeImage: + resize_short: 288 + - CropImage: + size: 256 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + PostProcess: + name: Topk + topk: 5 + class_id_map_file: ppcls/utils/imagenet1k_label_list.txt + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] -- GitLab