From c619f4eeb148a2beba5581f0c8e6fda1b405eff7 Mon Sep 17 00:00:00 2001 From: michaelowenliu Date: Fri, 28 Aug 2020 17:17:16 +0800 Subject: [PATCH] add FastSCNN --- dygraph/models/__init__.py | 1 + dygraph/models/fast_scnn.py | 302 ++++++++++++++++++++++++++++++++++ dygraph/models/model_utils.py | 32 +++- dygraph/models/pspnet.py | 24 ++- 4 files changed, 348 insertions(+), 11 deletions(-) create mode 100644 dygraph/models/fast_scnn.py diff --git a/dygraph/models/__init__.py b/dygraph/models/__init__.py index 52b73c3b..ce2fa4a6 100644 --- a/dygraph/models/__init__.py +++ b/dygraph/models/__init__.py @@ -17,3 +17,4 @@ from .unet import UNet from .deeplab import * from .fcn import * from .pspnet import * +from .fast_scnn import * diff --git a/dygraph/models/fast_scnn.py b/dygraph/models/fast_scnn.py new file mode 100644 index 00000000..6bd9b4d6 --- /dev/null +++ b/dygraph/models/fast_scnn.py @@ -0,0 +1,302 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle import fluid, nn + +from dygraph.cvlibs import manager +from dygraph.models import model_utils, pspnet +from dygraph.models.architectures import layer_utils + + +@manager.MODELS.add_component +class FastSCNN(fluid.dygraph.Layer): + """ + The FastSCNN implementation. + + As mentioned in original paper, FastSCNN is a real-time segmentation algorithm (123.5fps) + even for high resolution images (1024x2048). + + The orginal artile refers to + Poudel, Rudra PK, et al. "Fast-scnn: Fast semantic segmentation network." + (https://arxiv.org/pdf/1902.04502.pdf) + + Args: + + num_classes (int): the unique number of target classes. Default to 2. + + enable_auxiliary_loss (bool): a bool values indictes whether adding auxiliary loss. + if true, auxiliary loss will be added after LearningToDownsample module, where the weight is 0.4. Default to False. + + ignore_index (int): the value of ground-truth mask would be ignored while doing evaluation. Default to 255. + """ + + def __init__(self, + num_classes=2, + enable_auxiliary_loss=False, + ignore_index=255): + + super(FastSCNN, self).__init__() + + self.learning_to_downsample = LearningToDownsample(32, 48, 64) + self.global_feature_extractor = GlobalFeatureExtractor(64, [64, 96, 128], 128, 6, [3, 3, 3]) + self.feature_fusion = FeatureFusionModule(64, 128, 128) + self.classifier = Classifier(128, num_classes) + + if enable_auxiliary_loss: + self.auxlayer = model_utils.AuxLayer(64, 32, num_classes) + + self.enable_auxiliary_loss = enable_auxiliary_loss + self.ignore_index = ignore_index + + def forward(self, input, label=None): + + higher_res_features = self.learning_to_downsample(input) + x = self.global_feature_extractor(higher_res_features) + x = self.feature_fusion(higher_res_features, x) + logit = self.classifier(x) + logit = fluid.layers.resize_bilinear(logit, input.shape[2:]) + + if self.enable_auxiliary_loss: + auxiliary_logit = self.auxlayer(higher_res_features) + auxiliary_logit = fluid.layers.resize_bilinear(auxiliary_logit, input.shape[2:]) + + if self.training: + loss = model_utils.get_loss(logit, label) + if self.enable_auxiliary_loss: + auxiliary_loss = model_utils.get_loss(auxiliary_logit, label) + loss += (0.4 * auxiliary_loss) + return loss + else: + pred, score_map = model_utils.get_pred_score_map(logit) + return pred, score_map + + +class LearningToDownsample(fluid.dygraph.Layer): + """ + Learning to downsample module. + + This module consists of three downsampling blocks (one Conv and two separable Conv) + + Args: + dw_channels1 (int): the input channels of the first sep conv. Default to 32. + + dw_channels2 (int): the input channels of the second sep conv. Default to 48. + + out_channels (int): the output channels of LearningToDownsample module. Default to 64. + """ + + def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64): + super(LearningToDownsample, self).__init__() + + self.conv_bn_relu = layer_utils.ConvBnRelu(num_channels=3, + num_filters=dw_channels1, + filter_size=3, + stride=2) + self.dsconv_bn_relu1 = layer_utils.ConvBnRelu(num_channels=dw_channels1, + num_filters=dw_channels2, + filter_size=3, + using_sep_conv=True, # using sep conv + stride=2, + padding=1) + self.dsconv_bn_relu2 = layer_utils.ConvBnRelu(num_channels=dw_channels2, + num_filters=out_channels, + filter_size=3, + using_sep_conv=True, # using sep conv + stride=2, + padding=1) + + def forward(self, x): + x = self.conv_bn_relu(x) + x = self.dsconv_bn_relu1(x) + x = self.dsconv_bn_relu2(x) + return x + + +class GlobalFeatureExtractor(fluid.dygraph.Layer): + """ + Global feature extractor module + + This module consists of three LinearBottleneck blocks (like inverted residual introduced by MobileNetV2) and + a PPModule (introduced by PSPNet). + + Args: + in_channels (int): the number of input channels to the module. Default to 64. + block_channels (tuple): a tuple represents output channels of each bottleneck block. Default to (64, 96, 128). + out_channels (int): the number of output channels of the module. Default to 128. + expansion (int): the expansion factor in bottleneck. Default to 6. + num_blocks (tuple): it indicates the repeat time of each bottleneck. Default to (3, 3, 3). + """ + + def __init__(self, in_channels=64, block_channels=(64, 96, 128), + out_channels=128, expansion=6, num_blocks=(3, 3, 3)): + super(GlobalFeatureExtractor, self).__init__() + + self.bottleneck1 = self._make_layer(LinearBottleneck, in_channels, block_channels[0], num_blocks[0], expansion, + 2) + self.bottleneck2 = self._make_layer(LinearBottleneck, block_channels[0], block_channels[1], num_blocks[1], + expansion, 2) + self.bottleneck3 = self._make_layer(LinearBottleneck, block_channels[1], block_channels[2], num_blocks[2], + expansion, 1) + + self.ppm = pspnet.PPModule(block_channels[2], out_channels, dim_reduction=True) + + def _make_layer(self, block, in_channels, out_channels, blocks, expansion=6, stride=1): + layers = [] + layers.append(block(in_channels, out_channels, expansion, stride)) + for i in range(1, blocks): + layers.append(block(out_channels, out_channels, expansion, 1)) + return nn.Sequential(*layers) + + def forward(self, x): + x = self.bottleneck1(x) + x = self.bottleneck2(x) + x = self.bottleneck3(x) + x = self.ppm(x) + return x + + +class LinearBottleneck(fluid.dygraph.Layer): + """ + Single bottleneck implementation. + + Args: + in_channels (int): the number of input channels to bottleneck block. + + out_channels (int): the number of output channels of bottleneck block. + + expansion (int). the expansion factor in bottleneck. Default to 6. + + stride (int). the stride used in depth-wise conv. + """ + + def __init__(self, in_channels, out_channels, expansion=6, stride=2, **kwargs): + super(LinearBottleneck, self).__init__() + + self.use_shortcut = stride == 1 and in_channels == out_channels + + expand_channels = in_channels * expansion + self.block = nn.Sequential( + # pw + layer_utils.ConvBnRelu(num_channels=in_channels, + num_filters=expand_channels, + filter_size=1, + bias_attr=False), + # dw + layer_utils.ConvBnRelu(num_channels=expand_channels, + num_filters=expand_channels, + filter_size=3, + stride=stride, + padding=1, + groups=expand_channels, + bias_attr=False), + # pw-linear + nn.Conv2D(num_channels=expand_channels, + num_filters=out_channels, + filter_size=1, + bias_attr=False), + + nn.BatchNorm(out_channels) + ) + + def forward(self, x): + out = self.block(x) + if self.use_shortcut: + out = x + out + return out + + +class FeatureFusionModule(fluid.dygraph.Layer): + """ + Feature Fusion Module Implememtation. + + This module fuses high-resolution feature and low-resolution feature. + + Args: + high_in_channels (int): the channels of high-resolution feature (output of LearningToDownsample). + + low_in_channels (int). the channels of low-resolution feature (output of GlobalFeatureExtractor). + + out_channels (int). the output channels of this module. + """ + + def __init__(self, high_in_channels, low_in_channels, out_channels): + super(FeatureFusionModule, self).__init__() + + # There only depth-wise conv is used WITHOUT point-sied conv + self.dwconv = layer_utils.ConvBnRelu(num_channels=low_in_channels, + num_filters=out_channels, + filter_size=3, + padding=1, + groups=128) + + self.conv_low_res = nn.Sequential( + nn.Conv2D(num_channels=out_channels, num_filters=out_channels, filter_size=1), + nn.BatchNorm(out_channels)) + + self.conv_high_res = nn.Sequential( + nn.Conv2D(num_channels=high_in_channels, num_filters=out_channels, filter_size=1), + nn.BatchNorm(out_channels)) + + self.relu = nn.ReLU(True) + + def forward(self, high_res_input, low_res_input): + low_res_input = fluid.layers.resize_bilinear(input=low_res_input, scale=4) + low_res_input = self.dwconv(low_res_input) + low_res_input = self.conv_low_res(low_res_input) + + high_res_input = self.conv_high_res(high_res_input) + + x = high_res_input + low_res_input + + return self.relu(x) + + +class Classifier(fluid.dygraph.Layer): + """ + The Classifier module implemetation. + + This module consists of two depth-wsie conv and one conv. + + Args: + input_channels (int): the input channels to this module. + + num_classes (int). the unique number of target classes. + + """ + + def __init__(self, input_channels, num_classes): + super(Classifier, self).__init__() + + self.dsconv1 = layer_utils.ConvBnRelu(num_channels=input_channels, + num_filters=input_channels, + filter_size=3, + using_sep_conv=True # using sep conv + ) + + self.dsconv2 = layer_utils.ConvBnRelu(num_channels=input_channels, + num_filters=input_channels, + filter_size=3, + using_sep_conv=True # using sep conv + ) + + self.conv = nn.Conv2D(num_channels=input_channels, + num_filters=num_classes, + filter_size=1) + + def forward(self, x): + x = self.dsconv1(x) + x = self.dsconv2(x) + x = fluid.layers.dropout(x, dropout_prob=0.1) + x = self.conv(x) + return x diff --git a/dygraph/models/model_utils.py b/dygraph/models/model_utils.py index e0a88c35..7f529199 100644 --- a/dygraph/models/model_utils.py +++ b/dygraph/models/model_utils.py @@ -18,7 +18,8 @@ import paddle.nn.functional as F from paddle import fluid from paddle.fluid import dygraph from paddle.fluid.dygraph import Conv2D -from paddle.nn import SyncBatchNorm as BatchNorm +#from paddle.nn import SyncBatchNorm as BatchNorm +from paddle.fluid.dygraph import SyncBatchNorm as BatchNorm from dygraph.models.architectures import layer_utils @@ -47,10 +48,37 @@ class FCNHead(fluid.dygraph.Layer): def forward(self, x): x = self.conv_bn_relu(x) - x = F.dropout(x, p=0.1) + x = F.dropout(x, dropout_prob=0.1) x = self.conv(x) return x +class AuxLayer(fluid.dygraph.Layer): + """ + The auxilary layer implementation for auxilary loss + + Args: + in_channels (int): the number of input channels. + inter_channels (int): intermediate channels. + out_channels (int): the number of output channels, which is usually num_classes. + """ + + def __init__(self, in_channels, inter_channels, out_channels): + super(AuxLayer, self).__init__() + + self.conv_bn_relu = layer_utils.ConvBnRelu(num_channels=in_channels, + num_filters=inter_channels, + filter_size=3, + padding=1) + + self.conv = Conv2D(num_channels=inter_channels, + num_filters=out_channels, + filter_size=1) + + def forward(self, x): + x = self.conv_bn_relu(x) + x = F.dropout(x, dropout_prob=0.1) + x = self.conv(x) + return x def get_loss(logit, label, ignore_index=255, EPS=1e-5): """ diff --git a/dygraph/models/pspnet.py b/dygraph/models/pspnet.py index 2b30d43c..9017484d 100644 --- a/dygraph/models/pspnet.py +++ b/dygraph/models/pspnet.py @@ -144,21 +144,27 @@ class PPModule(fluid.dygraph.Layer): out_channels (int): the number of output channels after pyramid pooling module. bin_sizes (tuple): the out size of pooled feature maps. Default to (1,2,3,6). + + dim_reduction (bool): a bool value represent if reduing dimention after pooling. Default to True. """ - def __init__(self, in_channels, out_channels, bin_sizes=(1, 2, 3, 6)): + def __init__(self, in_channels, out_channels, bin_sizes=(1, 2, 3, 6), dim_reduction=True): super(PPModule, self).__init__() self.bin_sizes = bin_sizes + inter_channels = in_channels + if dim_reduction: + inter_channels = in_channels // len(bin_sizes) + # we use dimension reduction after pooling mentioned in original implementation. - self.stages = fluid.dygraph.LayerList([self._make_stage(in_channels, size) for size in bin_sizes]) + self.stages = fluid.dygraph.LayerList([self._make_stage(in_channels, inter_channels, size) for size in bin_sizes]) - self.conv_bn_relu2 = layer_utils.ConvBnRelu(num_channels=in_channels * 2, + self.conv_bn_relu2 = layer_utils.ConvBnRelu(num_channels=in_channels + inter_channels * len(bin_sizes), num_filters=out_channels, filter_size=3, padding=1) - def _make_stage(self, in_channels, size): + def _make_stage(self, in_channels, out_channels, size): """ Create one pooling layer. @@ -181,7 +187,7 @@ class PPModule(fluid.dygraph.Layer): # this paddle version does not support AdaptiveAvgPool2d, so skip it here. # prior = nn.AdaptiveAvgPool2d(output_size=(size, size)) conv = layer_utils.ConvBnRelu(num_channels=in_channels, - num_filters=in_channels // len(self.bin_sizes), + num_filters=out_channels, filter_size=1) return conv @@ -203,23 +209,23 @@ class PPModule(fluid.dygraph.Layer): @manager.MODELS.add_component def pspnet_resnet101_vd(*args, **kwargs): - pretrained_model = None + pretrained_model = "/mnt/liuyi22/PaddlePaddle/PaddleClas/pretrained/resnet101_vd_ssld_imagenet" return PSPNet(backbone='ResNet101_vd', pretrained_model=pretrained_model, **kwargs) @manager.MODELS.add_component def pspnet_resnet101_vd_os8(*args, **kwargs): - pretrained_model = None + pretrained_model = "/mnt/liuyi22/PaddlePaddle/PaddleClas/pretrained/resnet101_vd_ssld_imagenet" return PSPNet(backbone='ResNet101_vd', output_stride=8, pretrained_model=pretrained_model, **kwargs) @manager.MODELS.add_component def pspnet_resnet50_vd(*args, **kwargs): - pretrained_model = None + pretrained_model = "/mnt/liuyi22/PaddlePaddle/PaddleClas/pretrained/resnet50_vd_ssld_v2_imagenet" return PSPNet(backbone='ResNet50_vd', pretrained_model=pretrained_model, **kwargs) @manager.MODELS.add_component def pspnet_resnet50_vd_os8(*args, **kwargs): - pretrained_model = None + pretrained_model = "/mnt/liuyi22/PaddlePaddle/PaddleClas/pretrained/resnet50_vd_ssld_v2_imagenet" return PSPNet(backbone='ResNet50_vd', output_stride=8, pretrained_model=pretrained_model, **kwargs) -- GitLab