update modnet_resnet50vd_matting (#2100)

* add requirements.txt * add init * update format

update modnet_resnet50vd_matting (#2100)
* add requirements.txt * add init * update format
1b5a1e26 · jm_12138 · GitHub · 755425ce · 1b5a1e26 · 1b5a1e26
7 changed file
--- a/modules/image/matting/modnet_resnet50vd_matting/README.md
+++ b/modules/image/matting/modnet_resnet50vd_matting/README.md
--- a/modules/image/matting/modnet_resnet50vd_matting/README_en.md
+++ b/modules/image/matting/modnet_resnet50vd_matting/README_en.md
--- a/modules/image/matting/modnet_resnet50vd_matting/__init__.py
+++ b/modules/image/matting/modnet_resnet50vd_matting/__init__.py
--- a/modules/image/matting/modnet_resnet50vd_matting/module.py
+++ b/modules/image/matting/modnet_resnet50vd_matting/module.py
@@ -11,33 +11,32 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import argparse
 import os
 import time
-import argparse
+from typing import Callable
-from typing import Callable, Union, List, Tuple
+from typing import List
+from typing import Union
-import numpy as np
 import cv2
-import scipy
+import modnet_resnet50vd_matting.processor as P
+import numpy as np
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
-from paddlehub.module.module import moduleinfo
+import scipy
-import paddlehub.vision.segmentation_transforms as T
-from paddlehub.module.module import moduleinfo, runnable, serving
 from modnet_resnet50vd_matting.resnet import ResNet50_vd
-import modnet_resnet50vd_matting.processor as P
+from paddlehub.module.module import moduleinfo
+from paddlehub.module.module import runnable
+from paddlehub.module.module import serving
-@moduleinfo(
-    name="modnet_resnet50vd_matting", 
+@moduleinfo(name="modnet_resnet50vd_matting",
            type="CV/matting",
            author="paddlepaddle",
            summary="modnet_resnet50vd_matting is a matting model",
-    version="1.0.0"  
+            version="1.0.0")
-)
 class MODNetResNet50Vd(nn.Layer):
    """
    The MODNet implementation based on PaddlePaddle.
@@ -51,14 +50,13 @@ class MODNetResNet50Vd(nn.Layer):
        pretrained(str, optional): The path of pretrianed model. Defautl: None.
    """
-    def __init__(self, hr_channels:int = 32, pretrained=None):
+    def __init__(self, hr_channels: int = 32, pretrained=None):
        super(MODNetResNet50Vd, self).__init__()
        self.backbone = ResNet50_vd()
        self.pretrained = pretrained
-        self.head = MODNetHead(
+        self.head = MODNetHead(hr_channels=hr_channels, backbone_channels=self.backbone.feat_channels)
-            hr_channels=hr_channels, backbone_channels=self.backbone.feat_channels)
        self.blurer = GaussianBlurLayer(1, 3)
        self.transforms = P.Compose([P.LoadImages(), P.ResizeByShort(), P.ResizeToIntMult(), P.Normalize()])
@@ -73,14 +71,14 @@ class MODNetResNet50Vd(nn.Layer):
            self.set_dict(model_dict)
            print("load pretrained parameters success")
-    def preprocess(self, img: Union[str, np.ndarray] , transforms: Callable, trimap: Union[str, np.ndarray] = None):
+    def preprocess(self, img: Union[str, np.ndarray], transforms: Callable, trimap: Union[str, np.ndarray] = None):
        data = {}
        data['img'] = img
        if trimap is not None:
            data['trimap'] = trimap
            data['gt_fields'] = ['trimap']
        data['trans_info'] = []
-        data = self.transforms(data)
+        data = transforms(data)
        data['img'] = paddle.to_tensor(data['img'])
        data['img'] = data['img'].unsqueeze(0)
        if trimap is not None:
@@ -95,9 +93,13 @@ class MODNetResNet50Vd(nn.Layer):
        y = self.head(inputs=inputs, feat_list=feat_list)
        return y
-    def predict(self, image_list: list, trimap_list: list = None, visualization: bool =False, save_path: str = "modnet_resnet50vd_matting_output"):
+    def predict(self,
+                image_list: list,
+                trimap_list: list = None,
+                visualization: bool = False,
+                save_path: str = "modnet_resnet50vd_matting_output"):
        self.eval()
-        result= []
+        result = []
        with paddle.no_grad():
            for i, im_path in enumerate(image_list):
                trimap = trimap_list[i] if trimap_list is not None else None
@@ -118,7 +120,7 @@ class MODNetResNet50Vd(nn.Layer):
        return result
    @serving
-    def serving_method(self, images: list, trimaps:list = None, **kwargs):
+    def serving_method(self, images: list, trimaps: list = None, **kwargs):
        """
        Run as a service.
        """
@@ -128,7 +130,7 @@ class MODNetResNet50Vd(nn.Layer):
        else:
            trimap_decoder = None
-        outputs = self.predict(image_list=images_decode, trimap_list= trimap_decoder, **kwargs)
+        outputs = self.predict(image_list=images_decode, trimap_list=trimap_decoder, **kwargs)
        serving_data = [P.cv2_to_base64(outputs[i]) for i in range(len(outputs))]
        results = {'data': serving_data}
@@ -139,8 +141,7 @@ class MODNetResNet50Vd(nn.Layer):
        """
        Run as a command.
        """
-        self.parser = argparse.ArgumentParser(
+        self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name),
-            description="Run the {} module.".format(self.name),
                                              prog='hub run {}'.format(self.name),
                                              usage='%(prog)s',
                                              add_help=True)
@@ -155,7 +156,10 @@ class MODNetResNet50Vd(nn.Layer):
        else:
            trimap_list = None
-        results = self.predict(image_list=[args.input_path], trimap_list=trimap_list, save_path=args.output_dir, visualization=args.visualization)
+        results = self.predict(image_list=[args.input_path],
+                               trimap_list=trimap_list,
+                               save_path=args.output_dir,
+                               visualization=args.visualization)
        return results
@@ -164,10 +168,14 @@ class MODNetResNet50Vd(nn.Layer):
        Add the command config options.
        """
-        self.arg_config_group.add_argument(
+        self.arg_config_group.add_argument('--output_dir',
-            '--output_dir', type=str, default="modnet_resnet50vd_matting_output", help="The directory to save output images.")
+                                           type=str,
-        self.arg_config_group.add_argument(
+                                           default="modnet_resnet50vd_matting_output",
-            '--visualization', type=bool, default=True, help="whether to save output as images.")
+                                           help="The directory to save output images.")
+        self.arg_config_group.add_argument('--visualization',
+                                           type=bool,
+                                           default=True,
+                                           help="whether to save output as images.")
    def add_module_input_arg(self):
        """
@@ -177,11 +185,11 @@ class MODNetResNet50Vd(nn.Layer):
        self.arg_input_group.add_argument('--trimap_path', type=str, default=None, help="path to trimap.")
 class MODNetHead(nn.Layer):
    """
    Segmentation head.
    """
    def __init__(self, hr_channels: int, backbone_channels: int):
        super().__init__()
@@ -196,37 +204,24 @@ class MODNetHead(nn.Layer):
        return pred_matte
 class FusionBranch(nn.Layer):
    def __init__(self, hr_channels: int, enc_channels: int):
        super().__init__()
-        self.conv_lr4x = Conv2dIBNormRelu(
+        self.conv_lr4x = Conv2dIBNormRelu(enc_channels[2], hr_channels, 5, stride=1, padding=2)
-            enc_channels[2], hr_channels, 5, stride=1, padding=2)
-        self.conv_f2x = Conv2dIBNormRelu(
+        self.conv_f2x = Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1)
-            2 * hr_channels, hr_channels, 3, stride=1, padding=1)
        self.conv_f = nn.Sequential(
-            Conv2dIBNormRelu(
+            Conv2dIBNormRelu(hr_channels + 3, int(hr_channels / 2), 3, stride=1, padding=1),
-                hr_channels + 3, int(hr_channels / 2), 3, stride=1, padding=1),
+            Conv2dIBNormRelu(int(hr_channels / 2), 1, 1, stride=1, padding=0, with_ibn=False, with_relu=False))
-            Conv2dIBNormRelu(
-                int(hr_channels / 2),
-                1,
-                1,
-                stride=1,
-                padding=0,
-                with_ibn=False,
-                with_relu=False))
    def forward(self, img: paddle.Tensor, lr8x: paddle.Tensor, hr2x: paddle.Tensor) -> paddle.Tensor:
-        lr4x = F.interpolate(
+        lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
-            lr8x, scale_factor=2, mode='bilinear', align_corners=False)
        lr4x = self.conv_lr4x(lr4x)
-        lr2x = F.interpolate(
+        lr2x = F.interpolate(lr4x, scale_factor=2, mode='bilinear', align_corners=False)
-            lr4x, scale_factor=2, mode='bilinear', align_corners=False)
        f2x = self.conv_f2x(paddle.concat((lr2x, hr2x), axis=1))
-        f = F.interpolate(
+        f = F.interpolate(f2x, scale_factor=2, mode='bilinear', align_corners=False)
-            f2x, scale_factor=2, mode='bilinear', align_corners=False)
        f = self.conv_f(paddle.concat((f, img), axis=1))
        pred_matte = F.sigmoid(f)
@@ -238,56 +233,33 @@ class HRBranch(nn.Layer):
    High Resolution Branch of MODNet
    """
-    def __init__(self, hr_channels: int, enc_channels:int):
+    def __init__(self, hr_channels: int, enc_channels: int):
        super().__init__()
-        self.tohr_enc2x = Conv2dIBNormRelu(
+        self.tohr_enc2x = Conv2dIBNormRelu(enc_channels[0], hr_channels, 1, stride=1, padding=0)
-            enc_channels[0], hr_channels, 1, stride=1, padding=0)
+        self.conv_enc2x = Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=2, padding=1)
-        self.conv_enc2x = Conv2dIBNormRelu(
-            hr_channels + 3, hr_channels, 3, stride=2, padding=1)
-        self.tohr_enc4x = Conv2dIBNormRelu(
+        self.tohr_enc4x = Conv2dIBNormRelu(enc_channels[1], hr_channels, 1, stride=1, padding=0)
-            enc_channels[1], hr_channels, 1, stride=1, padding=0)
+        self.conv_enc4x = Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1)
-        self.conv_enc4x = Conv2dIBNormRelu(
-            2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1)
        self.conv_hr4x = nn.Sequential(
-            Conv2dIBNormRelu(
+            Conv2dIBNormRelu(2 * hr_channels + enc_channels[2] + 3, 2 * hr_channels, 3, stride=1, padding=1),
-                2 * hr_channels + enc_channels[2] + 3,
+            Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
-                2 * hr_channels,
+            Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1))
-                3,
-                stride=1,
+        self.conv_hr2x = nn.Sequential(Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
-                padding=1),
+                                       Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
-            Conv2dIBNormRelu(
-                2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
-            Conv2dIBNormRelu(
-                2 * hr_channels, hr_channels, 3, stride=1, padding=1))
-        self.conv_hr2x = nn.Sequential(
-            Conv2dIBNormRelu(
-                2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
-            Conv2dIBNormRelu(
-                2 * hr_channels, hr_channels, 3, stride=1, padding=1),
                                       Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
                                       Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1))
        self.conv_hr = nn.Sequential(
-            Conv2dIBNormRelu(
+            Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=1, padding=1),
-                hr_channels + 3, hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(hr_channels, 1, 1, stride=1, padding=0, with_ibn=False, with_relu=False))
-            Conv2dIBNormRelu(
-                hr_channels,
-                1,
-                1,
-                stride=1,
-                padding=0,
-                with_ibn=False,
-                with_relu=False))
-    def forward(self, img: paddle.Tensor, enc2x: paddle.Tensor, enc4x: paddle.Tensor, lr8x: paddle.Tensor) -> paddle.Tensor:
+    def forward(self, img: paddle.Tensor, enc2x: paddle.Tensor, enc4x: paddle.Tensor,
-        img2x = F.interpolate(
+                lr8x: paddle.Tensor) -> paddle.Tensor:
-            img, scale_factor=1 / 2, mode='bilinear', align_corners=False)
+        img2x = F.interpolate(img, scale_factor=1 / 2, mode='bilinear', align_corners=False)
-        img4x = F.interpolate(
+        img4x = F.interpolate(img, scale_factor=1 / 4, mode='bilinear', align_corners=False)
-            img, scale_factor=1 / 4, mode='bilinear', align_corners=False)
        enc2x = self.tohr_enc2x(enc2x)
        hr4x = self.conv_enc2x(paddle.concat((img2x, enc2x), axis=1))
@@ -295,12 +267,10 @@ class HRBranch(nn.Layer):
        enc4x = self.tohr_enc4x(enc4x)
        hr4x = self.conv_enc4x(paddle.concat((hr4x, enc4x), axis=1))
-        lr4x = F.interpolate(
+        lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
-            lr8x, scale_factor=2, mode='bilinear', align_corners=False)
        hr4x = self.conv_hr4x(paddle.concat((hr4x, lr4x, img4x), axis=1))
-        hr2x = F.interpolate(
+        hr2x = F.interpolate(hr4x, scale_factor=2, mode='bilinear', align_corners=False)
-            hr4x, scale_factor=2, mode='bilinear', align_corners=False)
        hr2x = self.conv_hr2x(paddle.concat((hr2x, enc2x), axis=1))
        pred_detail = None
        return pred_detail, hr2x
@@ -310,15 +280,13 @@ class LRBranch(nn.Layer):
    """
    Low Resolution Branch of MODNet
    """
    def __init__(self, backbone_channels: int):
        super().__init__()
        self.se_block = SEBlock(backbone_channels[4], reduction=4)
-        self.conv_lr16x = Conv2dIBNormRelu(
+        self.conv_lr16x = Conv2dIBNormRelu(backbone_channels[4], backbone_channels[3], 5, stride=1, padding=2)
-            backbone_channels[4], backbone_channels[3], 5, stride=1, padding=2)
+        self.conv_lr8x = Conv2dIBNormRelu(backbone_channels[3], backbone_channels[2], 5, stride=1, padding=2)
-        self.conv_lr8x = Conv2dIBNormRelu(
+        self.conv_lr = Conv2dIBNormRelu(backbone_channels[2],
-            backbone_channels[3], backbone_channels[2], 5, stride=1, padding=2)
-        self.conv_lr = Conv2dIBNormRelu(
-            backbone_channels[2],
                                        1,
                                        3,
                                        stride=2,
@@ -330,11 +298,9 @@ class LRBranch(nn.Layer):
        enc2x, enc4x, enc32x = feat_list[0], feat_list[1], feat_list[4]
        enc32x = self.se_block(enc32x)
-        lr16x = F.interpolate(
+        lr16x = F.interpolate(enc32x, scale_factor=2, mode='bilinear', align_corners=False)
-            enc32x, scale_factor=2, mode='bilinear', align_corners=False)
        lr16x = self.conv_lr16x(lr16x)
-        lr8x = F.interpolate(
+        lr8x = F.interpolate(lr16x, scale_factor=2, mode='bilinear', align_corners=False)
-            lr16x, scale_factor=2, mode='bilinear', align_corners=False)
        lr8x = self.conv_lr8x(lr8x)
        pred_semantic = None
@@ -376,7 +342,7 @@ class Conv2dIBNormRelu(nn.Layer):
                 kernel_size: int,
                 stride: int = 1,
                 padding: int = 0,
-                 dilation:int = 1,
+                 dilation: int = 1,
                 groups: int = 1,
                 bias_attr: paddle.ParamAttr = None,
                 with_ibn: bool = True,
@@ -385,8 +351,7 @@ class Conv2dIBNormRelu(nn.Layer):
        super().__init__()
        layers = [
-            nn.Conv2D(
+            nn.Conv2D(in_channels,
-                in_channels,
                      out_channels,
                      kernel_size,
                      stride=stride,
@@ -413,20 +378,13 @@ class SEBlock(nn.Layer):
    SE Block Proposed in https://arxiv.org/pdf/1709.01507.pdf
    """
-    def __init__(self, num_channels: int, reduction:int = 1):
+    def __init__(self, num_channels: int, reduction: int = 1):
        super().__init__()
        self.pool = nn.AdaptiveAvgPool2D(1)
-        self.conv = nn.Sequential(
+        self.conv = nn.Sequential(nn.Conv2D(num_channels, int(num_channels // reduction), 1,
-            nn.Conv2D(
-                num_channels,
-                int(num_channels // reduction),
-                1,
                                            bias_attr=False), nn.ReLU(),
-            nn.Conv2D(
+                                  nn.Conv2D(int(num_channels // reduction), num_channels, 1, bias_attr=False),
-                int(num_channels // reduction),
+                                  nn.Sigmoid())
-                num_channels,
-                1,
-                bias_attr=False), nn.Sigmoid())
    def forward(self, x: paddle.Tensor) -> paddle.Tensor:
        w = self.pool(x)
@@ -454,14 +412,7 @@ class GaussianBlurLayer(nn.Layer):
        self.op = nn.Sequential(
            nn.Pad2D(int(self.kernel_size / 2), mode='reflect'),
-            nn.Conv2D(
+            nn.Conv2D(channels, channels, self.kernel_size, stride=1, padding=0, bias_attr=False, groups=channels))
-                channels,
-                channels,
-                self.kernel_size,
-                stride=1,
-                padding=0,
-                bias_attr=False,
-                groups=channels))
        self._init_kernel()
        self.op[1].weight.stop_gradient = True
@@ -479,8 +430,7 @@ class GaussianBlurLayer(nn.Layer):
            exit()
        elif not x.shape[1] == self.channels:
            print('In \'GaussianBlurLayer\', the required channel ({0}) is'
-                  'not the same as input ({1})\n'.format(
+                  'not the same as input ({1})\n'.format(self.channels, x.shape[1]))
-                      self.channels, x.shape[1]))
            exit()
        return self.op(x)

--- a/modules/image/matting/modnet_resnet50vd_matting/processor.py
+++ b/modules/image/matting/modnet_resnet50vd_matting/processor.py
@@ -11,17 +11,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import random
 import base64
-from typing import Callable, Union, List, Tuple
+from typing import Callable
+from typing import List
+from typing import Tuple
+from typing import Union
 import cv2
 import numpy as np
 import paddle
 import paddle.nn.functional as F
 from paddleseg.transforms import functional
-from PIL import Image
 class Compose:
@@ -61,6 +61,7 @@ class LoadImages:
    Args:
        to_rgb (bool, optional): If converting image to RGB color space. Default: True.
    """
    def __init__(self, to_rgb: bool = True):
        self.to_rgb = to_rgb
@@ -95,7 +96,7 @@ class ResizeByShort:
        short_size (int): The target size of short side.
    """
-    def __init__(self, short_size: int =512):
+    def __init__(self, short_size: int = 512):
        self.short_size = short_size
    def __call__(self, data: dict) -> dict:
@@ -140,14 +141,13 @@ class Normalize:
        ValueError: When mean/std is not list or any value in std is 0.
    """
-    def __init__(self, mean: Union[List[float], Tuple[float]] = (0.5, 0.5, 0.5), std: Union[List[float], Tuple[float]] = (0.5, 0.5, 0.5)):
+    def __init__(self,
+                 mean: Union[List[float], Tuple[float]] = (0.5, 0.5, 0.5),
+                 std: Union[List[float], Tuple[float]] = (0.5, 0.5, 0.5)):
        self.mean = mean
        self.std = std
-        if not (isinstance(self.mean, (list, tuple))
+        if not (isinstance(self.mean, (list, tuple)) and isinstance(self.std, (list, tuple))):
-                and isinstance(self.std, (list, tuple))):
+            raise ValueError("{}: input type is invalid. It should be list or tuple".format(self))
-            raise ValueError(
-                "{}: input type is invalid. It should be list or tuple".format(
-                    self))
        from functools import reduce
        if reduce(lambda x, y: x * y, self.std) == 0:
            raise ValueError('{}: std is invalid!'.format(self))
@@ -177,6 +177,7 @@ def reverse_transform(alpha: paddle.Tensor, trans_info: List[str]):
            raise Exception("Unexpected info '{}' in im_info".format(item[0]))
    return alpha
 def save_alpha_pred(alpha: np.ndarray, trimap: np.ndarray = None):
    """
    The value of alpha is range [0, 1], shape should be [h,w]

--- a/modules/image/matting/modnet_resnet50vd_matting/requirements.txt
+++ b/modules/image/matting/modnet_resnet50vd_matting/requirements.txt
+paddleseg>=2.3.0
--- a/modules/image/matting/modnet_resnet50vd_matting/resnet.py
+++ b/modules/image/matting/modnet_resnet50vd_matting/resnet.py
@@ -11,13 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddleseg.models import layers
-from paddleseg.utils import utils
 __all__ = ["ResNet50_vd"]
@@ -39,10 +36,8 @@ class ConvBNLayer(nn.Layer):
        super(ConvBNLayer, self).__init__()
        self.is_vd_mode = is_vd_mode
-        self._pool2d_avg = nn.AvgPool2D(
+        self._pool2d_avg = nn.AvgPool2D(kernel_size=2, stride=2, padding=0, ceil_mode=True)
-            kernel_size=2, stride=2, padding=0, ceil_mode=True)
+        self._conv = nn.Conv2D(in_channels=in_channels,
-        self._conv = nn.Conv2D(
-            in_channels=in_channels,
                               out_channels=out_channels,
                               kernel_size=kernel_size,
                               stride=stride,
@@ -76,30 +71,20 @@ class BottleneckBlock(nn.Layer):
                 dilation: int = 1):
        super(BottleneckBlock, self).__init__()
-        self.conv0 = ConvBNLayer(
+        self.conv0 = ConvBNLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, act='relu')
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=1,
-            act='relu')
        self.dilation = dilation
-        self.conv1 = ConvBNLayer(
+        self.conv1 = ConvBNLayer(in_channels=out_channels,
-            in_channels=out_channels,
                                 out_channels=out_channels,
                                 kernel_size=3,
                                 stride=stride,
                                 act='relu',
                                 dilation=dilation)
-        self.conv2 = ConvBNLayer(
+        self.conv2 = ConvBNLayer(in_channels=out_channels, out_channels=out_channels * 4, kernel_size=1, act=None)
-            in_channels=out_channels,
-            out_channels=out_channels * 4,
-            kernel_size=1,
-            act=None)
        if not shortcut:
-            self.short = ConvBNLayer(
+            self.short = ConvBNLayer(in_channels=in_channels,
-                in_channels=in_channels,
                                     out_channels=out_channels * 4,
                                     kernel_size=1,
                                     stride=1,
@@ -133,29 +118,19 @@ class BottleneckBlock(nn.Layer):
 class BasicBlock(nn.Layer):
    """Basic residual block"""
-    def __init__(self,
-                 in_channels: int,
+    def __init__(self, in_channels: int, out_channels: int, stride: int, shortcut: bool = True, if_first: bool = False):
-                 out_channels: int,
-                 stride: int,
-                 shortcut: bool = True,
-                 if_first: bool = False):
        super(BasicBlock, self).__init__()
        self.stride = stride
-        self.conv0 = ConvBNLayer(
+        self.conv0 = ConvBNLayer(in_channels=in_channels,
-            in_channels=in_channels,
                                 out_channels=out_channels,
                                 kernel_size=3,
                                 stride=stride,
                                 act='relu')
-        self.conv1 = ConvBNLayer(
+        self.conv1 = ConvBNLayer(in_channels=out_channels, out_channels=out_channels, kernel_size=3, act=None)
-            in_channels=out_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            act=None)
        if not shortcut:
-            self.short = ConvBNLayer(
+            self.short = ConvBNLayer(in_channels=in_channels,
-                in_channels=in_channels,
                                     out_channels=out_channels,
                                     kernel_size=1,
                                     stride=1,
@@ -212,13 +187,11 @@ class ResNet_vd(nn.Layer):
            depth = [3, 8, 36, 3]
        elif layers == 200:
            depth = [3, 12, 48, 3]
-        num_channels = [64, 256, 512, 1024
+        num_channels = [64, 256, 512, 1024] if layers >= 50 else [64, 64, 128, 256]
-                        ] if layers >= 50 else [64, 64, 128, 256]
        num_filters = [64, 128, 256, 512]
        # for channels of four returned stages
-        self.feat_channels = [c * 4 for c in num_filters
+        self.feat_channels = [c * 4 for c in num_filters] if layers >= 50 else num_filters
-                              ] if layers >= 50 else num_filters
        self.feat_channels = [64] + self.feat_channels
        dilation_dict = None
@@ -227,24 +200,9 @@ class ResNet_vd(nn.Layer):
        elif output_stride == 16:
            dilation_dict = {3: 2}
-        self.conv1_1 = ConvBNLayer(
+        self.conv1_1 = ConvBNLayer(in_channels=input_channels, out_channels=32, kernel_size=3, stride=2, act='relu')
-            in_channels=input_channels,
+        self.conv1_2 = ConvBNLayer(in_channels=32, out_channels=32, kernel_size=3, stride=1, act='relu')
-            out_channels=32,
+        self.conv1_3 = ConvBNLayer(in_channels=32, out_channels=64, kernel_size=3, stride=1, act='relu')
-            kernel_size=3,
-            stride=2,
-            act='relu')
-        self.conv1_2 = ConvBNLayer(
-            in_channels=32,
-            out_channels=32,
-            kernel_size=3,
-            stride=1,
-            act='relu')
-        self.conv1_3 = ConvBNLayer(
-            in_channels=32,
-            out_channels=64,
-            kernel_size=3,
-            stride=1,
-            act='relu')
        self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
        # self.block_list = []
@@ -264,8 +222,7 @@ class ResNet_vd(nn.Layer):
                    ###############################################################################
                    # Add dilation rate for some segmentation tasks, if dilation_dict is not None.
-                    dilation_rate = dilation_dict[
+                    dilation_rate = dilation_dict[block] if dilation_dict and block in dilation_dict else 1
-                        block] if dilation_dict and block in dilation_dict else 1
                    # Actually block here is 'stage', and i is 'block' in 'stage'
                    # At the stage 4, expand the the dilation_rate if given multi_grid
@@ -275,12 +232,9 @@ class ResNet_vd(nn.Layer):
                    bottleneck_block = self.add_sublayer(
                        'bb_%d_%d' % (block, i),
-                        BottleneckBlock(
+                        BottleneckBlock(in_channels=num_channels[block] if i == 0 else num_filters[block] * 4,
-                            in_channels=num_channels[block]
-                            if i == 0 else num_filters[block] * 4,
                                        out_channels=num_filters[block],
-                            stride=2 if i == 0 and block != 0
+                                        stride=2 if i == 0 and block != 0 and dilation_rate == 1 else 1,
-                            and dilation_rate == 1 else 1,
                                        shortcut=shortcut,
                                        if_first=block == i == 0,
                                        dilation=dilation_rate))
@@ -296,9 +250,7 @@ class ResNet_vd(nn.Layer):
                    conv_name = "res" + str(block + 2) + chr(97 + i)
                    basic_block = self.add_sublayer(
                        'bb_%d_%d' % (block, i),
-                        BasicBlock(
+                        BasicBlock(in_channels=num_channels[block] if i == 0 else num_filters[block],
-                            in_channels=num_channels[block]
-                            if i == 0 else num_filters[block],
                                   out_channels=num_filters[block],
                                   stride=2 if i == 0 and block != 0 else 1,
                                   shortcut=shortcut,