reuse ConvNormActivation in some vision models (#40431)

* reuse ConvNormActivation in some vision models

reuse ConvNormActivation in some vision models (#40431)
* reuse ConvNormActivation in some vision models
f6219dda · Nyakku Shigure · GitHub · 34ac7b74 · f6219dda · f6219dda
5 changed file
--- a/python/paddle/vision/models/inceptionv3.py
+++ b/python/paddle/vision/models/inceptionv3.py
@@ -19,75 +19,60 @@ from __future__ import print_function
 import math
 import paddle
 import paddle.nn as nn
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
+from paddle.nn import Linear, Dropout
 from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
 from paddle.nn.initializer import Uniform
 from paddle.fluid.param_attr import ParamAttr

 from paddle.utils.download import get_weights_path_from_url
+from ..ops import ConvNormActivation

 __all__ = []

 model_urls = {
    "inception_v3":
-    ("https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/InceptionV3_pretrained.pdparams",
-     "e4d0905a818f6bb7946e881777a8a935")
+    ("https://paddle-hapi.bj.bcebos.com/models/inception_v3.pdparams",
+     "649a4547c3243e8b59c656f41fe330b8")
 }


-class ConvBNLayer(nn.Layer):
-    def __init__(self,
-                 num_channels,
-                 num_filters,
-                 filter_size,
-                 stride=1,
-                 padding=0,
-                 groups=1,
-                 act="relu"):
-        super().__init__()
-        self.act = act
-        self.conv = Conv2D(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            bias_attr=False)
-        self.bn = BatchNorm(num_filters)
-        self.relu = nn.ReLU()
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        if self.act:
-            x = self.relu(x)
-        return x
-
-
 class InceptionStem(nn.Layer):
    def __init__(self):
        super().__init__()
-        self.conv_1a_3x3 = ConvBNLayer(
-            num_channels=3, num_filters=32, filter_size=3, stride=2, act="relu")
-        self.conv_2a_3x3 = ConvBNLayer(
-            num_channels=32,
-            num_filters=32,
-            filter_size=3,
+        self.conv_1a_3x3 = ConvNormActivation(
+            in_channels=3,
+            out_channels=32,
+            kernel_size=3,
+            stride=2,
+            padding=0,
+            activation_layer=nn.ReLU)
+        self.conv_2a_3x3 = ConvNormActivation(
+            in_channels=32,
+            out_channels=32,
+            kernel_size=3,
            stride=1,
-            act="relu")
-        self.conv_2b_3x3 = ConvBNLayer(
-            num_channels=32,
-            num_filters=64,
-            filter_size=3,
+            padding=0,
+            activation_layer=nn.ReLU)
+        self.conv_2b_3x3 = ConvNormActivation(
+            in_channels=32,
+            out_channels=64,
+            kernel_size=3,
            padding=1,
-            act="relu")
+            activation_layer=nn.ReLU)

        self.max_pool = MaxPool2D(kernel_size=3, stride=2, padding=0)
-        self.conv_3b_1x1 = ConvBNLayer(
-            num_channels=64, num_filters=80, filter_size=1, act="relu")
-        self.conv_4a_3x3 = ConvBNLayer(
-            num_channels=80, num_filters=192, filter_size=3, act="relu")
+        self.conv_3b_1x1 = ConvNormActivation(
+            in_channels=64,
+            out_channels=80,
+            kernel_size=1,
+            padding=0,
+            activation_layer=nn.ReLU)
+        self.conv_4a_3x3 = ConvNormActivation(
+            in_channels=80,
+            out_channels=192,
+            kernel_size=3,
+            padding=0,
+            activation_layer=nn.ReLU)

    def forward(self, x):
        x = self.conv_1a_3x3(x)
@@ -103,47 +88,53 @@ class InceptionStem(nn.Layer):
 class InceptionA(nn.Layer):
    def __init__(self, num_channels, pool_features):
        super().__init__()
-        self.branch1x1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=64,
-            filter_size=1,
-            act="relu")
-        self.branch5x5_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=48,
-            filter_size=1,
-            act="relu")
-        self.branch5x5_2 = ConvBNLayer(
-            num_channels=48,
-            num_filters=64,
-            filter_size=5,
+        self.branch1x1 = ConvNormActivation(
+            in_channels=num_channels,
+            out_channels=64,
+            kernel_size=1,
+            padding=0,
+            activation_layer=nn.ReLU)
+
+        self.branch5x5_1 = ConvNormActivation(
+            in_channels=num_channels,
+            out_channels=48,
+            kernel_size=1,
+            padding=0,
+            activation_layer=nn.ReLU)
+        self.branch5x5_2 = ConvNormActivation(
+            in_channels=48,
+            out_channels=64,
+            kernel_size=5,
            padding=2,
-            act="relu")
-
-        self.branch3x3dbl_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=64,
-            filter_size=1,
-            act="relu")
-        self.branch3x3dbl_2 = ConvBNLayer(
-            num_channels=64,
-            num_filters=96,
-            filter_size=3,
+            activation_layer=nn.ReLU)
+
+        self.branch3x3dbl_1 = ConvNormActivation(
+            in_channels=num_channels,
+            out_channels=64,
+            kernel_size=1,
+            padding=0,
+            activation_layer=nn.ReLU)
+        self.branch3x3dbl_2 = ConvNormActivation(
+            in_channels=64,
+            out_channels=96,
+            kernel_size=3,
            padding=1,
-            act="relu")
-        self.branch3x3dbl_3 = ConvBNLayer(
-            num_channels=96,
-            num_filters=96,
-            filter_size=3,
+            activation_layer=nn.ReLU)
+        self.branch3x3dbl_3 = ConvNormActivation(
+            in_channels=96,
+            out_channels=96,
+            kernel_size=3,
            padding=1,
-            act="relu")
+            activation_layer=nn.ReLU)
+
        self.branch_pool = AvgPool2D(
            kernel_size=3, stride=1, padding=1, exclusive=False)
-        self.branch_pool_conv = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=pool_features,
-            filter_size=1,
-            act="relu")
+        self.branch_pool_conv = ConvNormActivation(
+            in_channels=num_channels,
+            out_channels=pool_features,
+            kernel_size=1,
+            padding=0,
+            activation_layer=nn.ReLU)

    def forward(self, x):
        branch1x1 = self.branch1x1(x)
@@ -164,29 +155,34 @@ class InceptionA(nn.Layer):
 class InceptionB(nn.Layer):
    def __init__(self, num_channels):
        super().__init__()
-        self.branch3x3 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=384,
-            filter_size=3,
+        self.branch3x3 = ConvNormActivation(
+            in_channels=num_channels,
+            out_channels=384,
+            kernel_size=3,
            stride=2,
-            act="relu")
-        self.branch3x3dbl_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=64,
-            filter_size=1,
-            act="relu")
-        self.branch3x3dbl_2 = ConvBNLayer(
-            num_channels=64,
-            num_filters=96,
-            filter_size=3,
+            padding=0,
+            activation_layer=nn.ReLU)
+
+        self.branch3x3dbl_1 = ConvNormActivation(
+            in_channels=num_channels,
+            out_channels=64,
+            kernel_size=1,
+            padding=0,
+            activation_layer=nn.ReLU)
+        self.branch3x3dbl_2 = ConvNormActivation(
+            in_channels=64,
+            out_channels=96,
+            kernel_size=3,
            padding=1,
-            act="relu")
-        self.branch3x3dbl_3 = ConvBNLayer(
-            num_channels=96,
-            num_filters=96,
-            filter_size=3,
+            activation_layer=nn.ReLU)
+        self.branch3x3dbl_3 = ConvNormActivation(
+            in_channels=96,
+            out_channels=96,
+            kernel_size=3,
            stride=2,
-            act="relu")
+            padding=0,
+            activation_layer=nn.ReLU)
+
        self.branch_pool = MaxPool2D(kernel_size=3, stride=2)

    def forward(self, x):
@@ -206,70 +202,74 @@ class InceptionB(nn.Layer):
 class InceptionC(nn.Layer):
    def __init__(self, num_channels, channels_7x7):
        super().__init__()
-        self.branch1x1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=192,
-            filter_size=1,
-            act="relu")
-
-        self.branch7x7_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=channels_7x7,
-            filter_size=1,
+        self.branch1x1 = ConvNormActivation(
+            in_channels=num_channels,
+            out_channels=192,
+            kernel_size=1,
+            padding=0,
+            activation_layer=nn.ReLU)
+
+        self.branch7x7_1 = ConvNormActivation(
+            in_channels=num_channels,
+            out_channels=channels_7x7,
+            kernel_size=1,
            stride=1,
-            act="relu")
-        self.branch7x7_2 = ConvBNLayer(
-            num_channels=channels_7x7,
-            num_filters=channels_7x7,
-            filter_size=(1, 7),
+            padding=0,
+            activation_layer=nn.ReLU)
+        self.branch7x7_2 = ConvNormActivation(
+            in_channels=channels_7x7,
+            out_channels=channels_7x7,
+            kernel_size=(1, 7),
            stride=1,
            padding=(0, 3),
-            act="relu")
-        self.branch7x7_3 = ConvBNLayer(
-            num_channels=channels_7x7,
-            num_filters=192,
-            filter_size=(7, 1),
+            activation_layer=nn.ReLU)
+        self.branch7x7_3 = ConvNormActivation(
+            in_channels=channels_7x7,
+            out_channels=192,
+            kernel_size=(7, 1),
            stride=1,
            padding=(3, 0),
-            act="relu")
-
-        self.branch7x7dbl_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=channels_7x7,
-            filter_size=1,
-            act="relu")
-        self.branch7x7dbl_2 = ConvBNLayer(
-            num_channels=channels_7x7,
-            num_filters=channels_7x7,
-            filter_size=(7, 1),
+            activation_layer=nn.ReLU)
+
+        self.branch7x7dbl_1 = ConvNormActivation(
+            in_channels=num_channels,
+            out_channels=channels_7x7,
+            kernel_size=1,
+            padding=0,
+            activation_layer=nn.ReLU)
+        self.branch7x7dbl_2 = ConvNormActivation(
+            in_channels=channels_7x7,
+            out_channels=channels_7x7,
+            kernel_size=(7, 1),
            padding=(3, 0),
-            act="relu")
-        self.branch7x7dbl_3 = ConvBNLayer(
-            num_channels=channels_7x7,
-            num_filters=channels_7x7,
-            filter_size=(1, 7),
+            activation_layer=nn.ReLU)
+        self.branch7x7dbl_3 = ConvNormActivation(
+            in_channels=channels_7x7,
+            out_channels=channels_7x7,
+            kernel_size=(1, 7),
            padding=(0, 3),
-            act="relu")
-        self.branch7x7dbl_4 = ConvBNLayer(
-            num_channels=channels_7x7,
-            num_filters=channels_7x7,
-            filter_size=(7, 1),
+            activation_layer=nn.ReLU)
+        self.branch7x7dbl_4 = ConvNormActivation(
+            in_channels=channels_7x7,
+            out_channels=channels_7x7,
+            kernel_size=(7, 1),
            padding=(3, 0),
-            act="relu")
-        self.branch7x7dbl_5 = ConvBNLayer(
-            num_channels=channels_7x7,
-            num_filters=192,
-            filter_size=(1, 7),
+            activation_layer=nn.ReLU)
+        self.branch7x7dbl_5 = ConvNormActivation(
+            in_channels=channels_7x7,
+            out_channels=192,
+            kernel_size=(1, 7),
            padding=(0, 3),
-            act="relu")
+            activation_layer=nn.ReLU)

        self.branch_pool = AvgPool2D(
            kernel_size=3, stride=1, padding=1, exclusive=False)
-        self.branch_pool_conv = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=192,
-            filter_size=1,
-            act="relu")
+        self.branch_pool_conv = ConvNormActivation(
+            in_channels=num_channels,
+            out_channels=192,
+            kernel_size=1,
+            padding=0,
+            activation_layer=nn.ReLU)

    def forward(self, x):
        branch1x1 = self.branch1x1(x)
@@ -296,40 +296,46 @@ class InceptionC(nn.Layer):
 class InceptionD(nn.Layer):
    def __init__(self, num_channels):
        super().__init__()
-        self.branch3x3_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=192,
-            filter_size=1,
-            act="relu")
-        self.branch3x3_2 = ConvBNLayer(
-            num_channels=192,
-            num_filters=320,
-            filter_size=3,
+        self.branch3x3_1 = ConvNormActivation(
+            in_channels=num_channels,
+            out_channels=192,
+            kernel_size=1,
+            padding=0,
+            activation_layer=nn.ReLU)
+        self.branch3x3_2 = ConvNormActivation(
+            in_channels=192,
+            out_channels=320,
+            kernel_size=3,
            stride=2,
-            act="relu")
-        self.branch7x7x3_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=192,
-            filter_size=1,
-            act="relu")
-        self.branch7x7x3_2 = ConvBNLayer(
-            num_channels=192,
-            num_filters=192,
-            filter_size=(1, 7),
+            padding=0,
+            activation_layer=nn.ReLU)
+
+        self.branch7x7x3_1 = ConvNormActivation(
+            in_channels=num_channels,
+            out_channels=192,
+            kernel_size=1,
+            padding=0,
+            activation_layer=nn.ReLU)
+        self.branch7x7x3_2 = ConvNormActivation(
+            in_channels=192,
+            out_channels=192,
+            kernel_size=(1, 7),
            padding=(0, 3),
-            act="relu")
-        self.branch7x7x3_3 = ConvBNLayer(
-            num_channels=192,
-            num_filters=192,
-            filter_size=(7, 1),
+            activation_layer=nn.ReLU)
+        self.branch7x7x3_3 = ConvNormActivation(
+            in_channels=192,
+            out_channels=192,
+            kernel_size=(7, 1),
            padding=(3, 0),
-            act="relu")
-        self.branch7x7x3_4 = ConvBNLayer(
-            num_channels=192,
-            num_filters=192,
-            filter_size=3,
+            activation_layer=nn.ReLU)
+        self.branch7x7x3_4 = ConvNormActivation(
+            in_channels=192,
+            out_channels=192,
+            kernel_size=3,
            stride=2,
-            act="relu")
+            padding=0,
+            activation_layer=nn.ReLU)
+
        self.branch_pool = MaxPool2D(kernel_size=3, stride=2)

    def forward(self, x):
@@ -350,59 +356,64 @@ class InceptionD(nn.Layer):
 class InceptionE(nn.Layer):
    def __init__(self, num_channels):
        super().__init__()
-        self.branch1x1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=320,
-            filter_size=1,
-            act="relu")
-        self.branch3x3_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=384,
-            filter_size=1,
-            act="relu")
-        self.branch3x3_2a = ConvBNLayer(
-            num_channels=384,
-            num_filters=384,
-            filter_size=(1, 3),
+        self.branch1x1 = ConvNormActivation(
+            in_channels=num_channels,
+            out_channels=320,
+            kernel_size=1,
+            padding=0,
+            activation_layer=nn.ReLU)
+        self.branch3x3_1 = ConvNormActivation(
+            in_channels=num_channels,
+            out_channels=384,
+            kernel_size=1,
+            padding=0,
+            activation_layer=nn.ReLU)
+        self.branch3x3_2a = ConvNormActivation(
+            in_channels=384,
+            out_channels=384,
+            kernel_size=(1, 3),
            padding=(0, 1),
-            act="relu")
-        self.branch3x3_2b = ConvBNLayer(
-            num_channels=384,
-            num_filters=384,
-            filter_size=(3, 1),
+            activation_layer=nn.ReLU)
+        self.branch3x3_2b = ConvNormActivation(
+            in_channels=384,
+            out_channels=384,
+            kernel_size=(3, 1),
            padding=(1, 0),
-            act="relu")
-
-        self.branch3x3dbl_1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=448,
-            filter_size=1,
-            act="relu")
-        self.branch3x3dbl_2 = ConvBNLayer(
-            num_channels=448,
-            num_filters=384,
-            filter_size=3,
+            activation_layer=nn.ReLU)
+
+        self.branch3x3dbl_1 = ConvNormActivation(
+            in_channels=num_channels,
+            out_channels=448,
+            kernel_size=1,
+            padding=0,
+            activation_layer=nn.ReLU)
+        self.branch3x3dbl_2 = ConvNormActivation(
+            in_channels=448,
+            out_channels=384,
+            kernel_size=3,
            padding=1,
-            act="relu")
-        self.branch3x3dbl_3a = ConvBNLayer(
-            num_channels=384,
-            num_filters=384,
-            filter_size=(1, 3),
+            activation_layer=nn.ReLU)
+        self.branch3x3dbl_3a = ConvNormActivation(
+            in_channels=384,
+            out_channels=384,
+            kernel_size=(1, 3),
            padding=(0, 1),
-            act="relu")
-        self.branch3x3dbl_3b = ConvBNLayer(
-            num_channels=384,
-            num_filters=384,
-            filter_size=(3, 1),
+            activation_layer=nn.ReLU)
+        self.branch3x3dbl_3b = ConvNormActivation(
+            in_channels=384,
+            out_channels=384,
+            kernel_size=(3, 1),
            padding=(1, 0),
-            act="relu")
+            activation_layer=nn.ReLU)
+
        self.branch_pool = AvgPool2D(
            kernel_size=3, stride=1, padding=1, exclusive=False)
-        self.branch_pool_conv = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=192,
-            filter_size=1,
-            act="relu")
+        self.branch_pool_conv = ConvNormActivation(
+            in_channels=num_channels,
+            out_channels=192,
+            kernel_size=1,
+            padding=0,
+            activation_layer=nn.ReLU)

    def forward(self, x):
        branch1x1 = self.branch1x1(x)

--- a/python/paddle/vision/models/mobilenetv1.py
+++ b/python/paddle/vision/models/mobilenetv1.py
@@ -16,59 +16,31 @@ import paddle
 import paddle.nn as nn

 from paddle.utils.download import get_weights_path_from_url
+from ..ops import ConvNormActivation

 __all__ = []

 model_urls = {
    'mobilenetv1_1.0':
-    ('https://paddle-hapi.bj.bcebos.com/models/mobilenet_v1_x1.0.pdparams',
-     '42a154c2f26f86e7457d6daded114e8c')
+    ('https://paddle-hapi.bj.bcebos.com/models/mobilenetv1_1.0.pdparams',
+     '3033ab1975b1670bef51545feb65fc45')
 }


-class ConvBNLayer(nn.Layer):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride,
-                 padding,
-                 num_groups=1):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = nn.Conv2D(
-            in_channels,
-            out_channels,
-            kernel_size,
-            stride=stride,
-            padding=padding,
-            groups=num_groups,
-            bias_attr=False)
-
-        self._norm_layer = nn.BatchNorm2D(out_channels)
-        self._act = nn.ReLU()
-
-    def forward(self, x):
-        x = self._conv(x)
-        x = self._norm_layer(x)
-        x = self._act(x)
-        return x
-
-
 class DepthwiseSeparable(nn.Layer):
    def __init__(self, in_channels, out_channels1, out_channels2, num_groups,
                 stride, scale):
        super(DepthwiseSeparable, self).__init__()

-        self._depthwise_conv = ConvBNLayer(
+        self._depthwise_conv = ConvNormActivation(
            in_channels,
            int(out_channels1 * scale),
            kernel_size=3,
            stride=stride,
            padding=1,
-            num_groups=int(num_groups * scale))
+            groups=int(num_groups * scale))

-        self._pointwise_conv = ConvBNLayer(
+        self._pointwise_conv = ConvNormActivation(
            int(out_channels1 * scale),
            int(out_channels2 * scale),
            kernel_size=1,
@@ -94,9 +66,15 @@ class MobileNetV1(nn.Layer):
    Examples:
        .. code-block:: python

+            import paddle
            from paddle.vision.models import MobileNetV1

            model = MobileNetV1()
+
+            x = paddle.rand([1, 3, 224, 224])
+            out = model(x)
+
+            print(out.shape)
    """

    def __init__(self, scale=1.0, num_classes=1000, with_pool=True):
@@ -106,7 +84,7 @@ class MobileNetV1(nn.Layer):
        self.num_classes = num_classes
        self.with_pool = with_pool

-        self.conv1 = ConvBNLayer(
+        self.conv1 = ConvNormActivation(
            in_channels=3,
            out_channels=int(32 * scale),
            kernel_size=3,
@@ -257,6 +235,7 @@ def mobilenet_v1(pretrained=False, scale=1.0, **kwargs):
    Examples:
        .. code-block:: python

+            import paddle
            from paddle.vision.models import mobilenet_v1

            # build model
@@ -266,7 +245,12 @@ def mobilenet_v1(pretrained=False, scale=1.0, **kwargs):
            # model = mobilenet_v1(pretrained=True)

            # build mobilenet v1 with scale=0.5
-            model = mobilenet_v1(scale=0.5)
+            model_scale = mobilenet_v1(scale=0.5)
+
+            x = paddle.rand([1, 3, 224, 224])
+            out = model(x)
+
+            print(out.shape)
    """
    model = _mobilenet(
        'mobilenetv1_' + str(scale), pretrained, scale=scale, **kwargs)

--- a/python/paddle/vision/models/mobilenetv2.py
+++ b/python/paddle/vision/models/mobilenetv2.py
@@ -17,6 +17,7 @@ import paddle.nn as nn
 from paddle.utils.download import get_weights_path_from_url

 from .utils import _make_divisible
+from ..ops import ConvNormActivation

 __all__ = []

@@ -27,29 +28,6 @@ model_urls = {
 }


-class ConvBNReLU(nn.Sequential):
-    def __init__(self,
-                 in_planes,
-                 out_planes,
-                 kernel_size=3,
-                 stride=1,
-                 groups=1,
-                 norm_layer=nn.BatchNorm2D):
-        padding = (kernel_size - 1) // 2
-
-        super(ConvBNReLU, self).__init__(
-            nn.Conv2D(
-                in_planes,
-                out_planes,
-                kernel_size,
-                stride,
-                padding,
-                groups=groups,
-                bias_attr=False),
-            norm_layer(out_planes),
-            nn.ReLU6())
-
-
 class InvertedResidual(nn.Layer):
    def __init__(self,
                 inp,
@@ -67,15 +45,20 @@ class InvertedResidual(nn.Layer):
        layers = []
        if expand_ratio != 1:
            layers.append(
-                ConvBNReLU(
-                    inp, hidden_dim, kernel_size=1, norm_layer=norm_layer))
+                ConvNormActivation(
+                    inp,
+                    hidden_dim,
+                    kernel_size=1,
+                    norm_layer=norm_layer,
+                    activation_layer=nn.ReLU6))
        layers.extend([
-            ConvBNReLU(
+            ConvNormActivation(
                hidden_dim,
                hidden_dim,
                stride=stride,
                groups=hidden_dim,
-                norm_layer=norm_layer),
+                norm_layer=norm_layer,
+                activation_layer=nn.ReLU6),
            nn.Conv2D(
                hidden_dim, oup, 1, 1, 0, bias_attr=False),
            norm_layer(oup),
@@ -90,23 +73,30 @@ class InvertedResidual(nn.Layer):


 class MobileNetV2(nn.Layer):
-    def __init__(self, scale=1.0, num_classes=1000, with_pool=True):
-        """MobileNetV2 model from
-        `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_.
+    """MobileNetV2 model from
+    `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_.
+
+    Args:
+        scale (float): scale of channels in each layer. Default: 1.0.
+        num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer 
+                            will not be defined. Default: 1000.
+        with_pool (bool): use pool before the last fc layer or not. Default: True.

-        Args:
-            scale (float): scale of channels in each layer. Default: 1.0.
-            num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer 
-                                will not be defined. Default: 1000.
-            with_pool (bool): use pool before the last fc layer or not. Default: True.
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            from paddle.vision.models import MobileNetV2

-        Examples:
-            .. code-block:: python
+            model = MobileNetV2()

-                from paddle.vision.models import MobileNetV2
+            x = paddle.rand([1, 3, 224, 224])
+            out = model(x)
+
+            print(out.shape)
+    """

-                model = MobileNetV2()
-        """
+    def __init__(self, scale=1.0, num_classes=1000, with_pool=True):
        super(MobileNetV2, self).__init__()
        self.num_classes = num_classes
        self.with_pool = with_pool
@@ -130,8 +120,12 @@ class MobileNetV2(nn.Layer):
        self.last_channel = _make_divisible(last_channel * max(1.0, scale),
                                            round_nearest)
        features = [
-            ConvBNReLU(
-                3, input_channel, stride=2, norm_layer=norm_layer)
+            ConvNormActivation(
+                3,
+                input_channel,
+                stride=2,
+                norm_layer=norm_layer,
+                activation_layer=nn.ReLU6)
        ]

        for t, c, n, s in inverted_residual_setting:
@@ -148,11 +142,12 @@ class MobileNetV2(nn.Layer):
                input_channel = output_channel

        features.append(
-            ConvBNReLU(
+            ConvNormActivation(
                input_channel,
                self.last_channel,
                kernel_size=1,
-                norm_layer=norm_layer))
+                norm_layer=norm_layer,
+                activation_layer=nn.ReLU6))

        self.features = nn.Sequential(*features)

@@ -199,6 +194,7 @@ def mobilenet_v2(pretrained=False, scale=1.0, **kwargs):
    Examples:
        .. code-block:: python

+            import paddle
            from paddle.vision.models import mobilenet_v2

            # build model
@@ -209,6 +205,11 @@ def mobilenet_v2(pretrained=False, scale=1.0, **kwargs):

            # build mobilenet v2 with scale=0.5
            model = mobilenet_v2(scale=0.5)
+
+            x = paddle.rand([1, 3, 224, 224])
+            out = model(x)
+
+            print(out.shape)
    """
    model = _mobilenet(
        'mobilenetv2_' + str(scale), pretrained, scale=scale, **kwargs)

--- a/python/paddle/vision/models/shufflenetv2.py
+++ b/python/paddle/vision/models/shufflenetv2.py
@@ -18,37 +18,50 @@ from __future__ import print_function

 import paddle
 import paddle.nn as nn
-from paddle.fluid.param_attr import ParamAttr
-from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Linear, MaxPool2D
+from paddle.nn import AdaptiveAvgPool2D, Linear, MaxPool2D
 from paddle.utils.download import get_weights_path_from_url

+from ..ops import ConvNormActivation
+
 __all__ = []

 model_urls = {
    "shufflenet_v2_x0_25": (
-        "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_25_pretrained.pdparams",
-        "e753404cbd95027759c5f56ecd6c9c4b", ),
+        "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_x0_25.pdparams",
+        "1e509b4c140eeb096bb16e214796d03b", ),
    "shufflenet_v2_x0_33": (
-        "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_33_pretrained.pdparams",
-        "776e3cf9a4923abdfce789c45b8fe1f2", ),
+        "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_x0_33.pdparams",
+        "3d7b3ab0eaa5c0927ff1026d31b729bd", ),
    "shufflenet_v2_x0_5": (
-        "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_5_pretrained.pdparams",
-        "e3649cf531566917e2969487d2bc6b60", ),
+        "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_x0_5.pdparams",
+        "5e5cee182a7793c4e4c73949b1a71bd4", ),
    "shufflenet_v2_x1_0": (
-        "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_0_pretrained.pdparams",
-        "7821c348ea34e58847c43a08a4ac0bdf", ),
+        "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_x1_0.pdparams",
+        "122d42478b9e81eb49f8a9ede327b1a4", ),
    "shufflenet_v2_x1_5": (
-        "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_5_pretrained.pdparams",
-        "93a07fa557ab2d8803550f39e5b6c391", ),
+        "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_x1_5.pdparams",
+        "faced5827380d73531d0ee027c67826d", ),
    "shufflenet_v2_x2_0": (
-        "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x2_0_pretrained.pdparams",
-        "4ab1f622fd0d341e0f84b4e057797563", ),
+        "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_x2_0.pdparams",
+        "cd3dddcd8305e7bcd8ad14d1c69a5784", ),
    "shufflenet_v2_swish": (
-        "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_swish_pretrained.pdparams",
-        "daff38b3df1b3748fccbb13cfdf02519", ),
+        "https://paddle-hapi.bj.bcebos.com/models/shufflenet_v2_swish.pdparams",
+        "adde0aa3b023e5b0c94a68be1c394b84", ),
 }


+def create_activation_layer(act):
+    if act == "swish":
+        return nn.Swish
+    elif act == "relu":
+        return nn.ReLU
+    elif act is None:
+        return None
+    else:
+        raise RuntimeError(
+            "The activation function is not supported: {}".format(act))
+
+
 def channel_shuffle(x, groups):
    batch_size, num_channels, height, width = x.shape[0:4]
    channels_per_group = num_channels // groups
@@ -65,61 +78,37 @@ def channel_shuffle(x, groups):
    return x


-class ConvBNLayer(nn.Layer):
+class InvertedResidual(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
-                 kernel_size,
                 stride,
-                 padding,
-                 groups=1,
-                 act=None):
-        super(ConvBNLayer, self).__init__()
-        self._conv = Conv2D(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            weight_attr=ParamAttr(initializer=nn.initializer.KaimingNormal()),
-            bias_attr=False, )
-
-        self._batch_norm = BatchNorm(out_channels, act=act)
-
-    def forward(self, inputs):
-        x = self._conv(inputs)
-        x = self._batch_norm(x)
-        return x
-
-
-class InvertedResidual(nn.Layer):
-    def __init__(self, in_channels, out_channels, stride, act="relu"):
+                 activation_layer=nn.ReLU):
        super(InvertedResidual, self).__init__()
-        self._conv_pw = ConvBNLayer(
+        self._conv_pw = ConvNormActivation(
            in_channels=in_channels // 2,
            out_channels=out_channels // 2,
            kernel_size=1,
            stride=1,
            padding=0,
            groups=1,
-            act=act)
-        self._conv_dw = ConvBNLayer(
+            activation_layer=activation_layer)
+        self._conv_dw = ConvNormActivation(
            in_channels=out_channels // 2,
            out_channels=out_channels // 2,
            kernel_size=3,
            stride=stride,
            padding=1,
            groups=out_channels // 2,
-            act=None)
-        self._conv_linear = ConvBNLayer(
+            activation_layer=None)
+        self._conv_linear = ConvNormActivation(
            in_channels=out_channels // 2,
            out_channels=out_channels // 2,
            kernel_size=1,
            stride=1,
            padding=0,
            groups=1,
-            act=act)
+            activation_layer=activation_layer)

    def forward(self, inputs):
        x1, x2 = paddle.split(
@@ -134,51 +123,55 @@ class InvertedResidual(nn.Layer):


 class InvertedResidualDS(nn.Layer):
-    def __init__(self, in_channels, out_channels, stride, act="relu"):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 stride,
+                 activation_layer=nn.ReLU):
        super(InvertedResidualDS, self).__init__()

        # branch1
-        self._conv_dw_1 = ConvBNLayer(
+        self._conv_dw_1 = ConvNormActivation(
            in_channels=in_channels,
            out_channels=in_channels,
            kernel_size=3,
            stride=stride,
            padding=1,
            groups=in_channels,
-            act=None)
-        self._conv_linear_1 = ConvBNLayer(
+            activation_layer=None)
+        self._conv_linear_1 = ConvNormActivation(
            in_channels=in_channels,
            out_channels=out_channels // 2,
            kernel_size=1,
            stride=1,
            padding=0,
            groups=1,
-            act=act)
+            activation_layer=activation_layer)
        # branch2
-        self._conv_pw_2 = ConvBNLayer(
+        self._conv_pw_2 = ConvNormActivation(
            in_channels=in_channels,
            out_channels=out_channels // 2,
            kernel_size=1,
            stride=1,
            padding=0,
            groups=1,
-            act=act)
-        self._conv_dw_2 = ConvBNLayer(
+            activation_layer=activation_layer)
+        self._conv_dw_2 = ConvNormActivation(
            in_channels=out_channels // 2,
            out_channels=out_channels // 2,
            kernel_size=3,
            stride=stride,
            padding=1,
            groups=out_channels // 2,
-            act=None)
-        self._conv_linear_2 = ConvBNLayer(
+            activation_layer=None)
+        self._conv_linear_2 = ConvNormActivation(
            in_channels=out_channels // 2,
            out_channels=out_channels // 2,
            kernel_size=1,
            stride=1,
            padding=0,
            groups=1,
-            act=act)
+            activation_layer=activation_layer)

    def forward(self, inputs):
        x1 = self._conv_dw_1(inputs)
@@ -221,6 +214,7 @@ class ShuffleNetV2(nn.Layer):
        self.num_classes = num_classes
        self.with_pool = with_pool
        stage_repeats = [4, 8, 4]
+        activation_layer = create_activation_layer(act)

        if scale == 0.25:
            stage_out_channels = [-1, 24, 24, 48, 96, 512]
@@ -238,13 +232,13 @@ class ShuffleNetV2(nn.Layer):
            raise NotImplementedError("This scale size:[" + str(scale) +
                                      "] is not implemented!")
        # 1. conv1
-        self._conv1 = ConvBNLayer(
+        self._conv1 = ConvNormActivation(
            in_channels=3,
            out_channels=stage_out_channels[1],
            kernel_size=3,
            stride=2,
            padding=1,
-            act=act)
+            activation_layer=activation_layer)
        self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)

        # 2. bottleneck sequences
@@ -257,7 +251,7 @@ class ShuffleNetV2(nn.Layer):
                            in_channels=stage_out_channels[stage_id + 1],
                            out_channels=stage_out_channels[stage_id + 2],
                            stride=2,
-                            act=act),
+                            activation_layer=activation_layer),
                        name=str(stage_id + 2) + "_" + str(i + 1))
                else:
                    block = self.add_sublayer(
@@ -265,17 +259,17 @@ class ShuffleNetV2(nn.Layer):
                            in_channels=stage_out_channels[stage_id + 2],
                            out_channels=stage_out_channels[stage_id + 2],
                            stride=1,
-                            act=act),
+                            activation_layer=activation_layer),
                        name=str(stage_id + 2) + "_" + str(i + 1))
                self._block_list.append(block)
        # 3. last_conv
-        self._last_conv = ConvBNLayer(
+        self._last_conv = ConvNormActivation(
            in_channels=stage_out_channels[-2],
            out_channels=stage_out_channels[-1],
            kernel_size=1,
            stride=1,
            padding=0,
-            act=act)
+            activation_layer=activation_layer)
        # 4. pool
        if with_pool:
            self._pool2d_avg = AdaptiveAvgPool2D(1)

--- a/python/paddle/vision/ops.py
+++ b/python/paddle/vision/ops.py
@@ -1335,13 +1335,13 @@ class ConvNormActivation(Sequential):
    Args:
        in_channels (int): Number of channels in the input image
        out_channels (int): Number of channels produced by the Convolution-Normalzation-Activation block
-        kernel_size: (int, optional): Size of the convolving kernel. Default: 3
-        stride (int, optional): Stride of the convolution. Default: 1
-        padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None,
+        kernel_size: (int|list|tuple, optional): Size of the convolving kernel. Default: 3
+        stride (int|list|tuple, optional): Stride of the convolution. Default: 1
+        padding (int|str|tuple|list, optional): Padding added to all four sides of the input. Default: None,
            in wich case it will calculated as ``padding = (kernel_size - 1) // 2 * dilation``
        groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
        norm_layer (Callable[..., paddle.nn.Layer], optional): Norm layer that will be stacked on top of the convolutiuon layer.
-            If ``None`` this layer wont be used. Default: ``paddle.nn.BatchNorm2d``
+            If ``None`` this layer wont be used. Default: ``paddle.nn.BatchNorm2D``
        activation_layer (Callable[..., paddle.nn.Layer], optional): Activation function which will be stacked on top of the normalization
            layer (if not ``None``), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``paddle.nn.ReLU``
        dilation (int): Spacing between kernel elements. Default: 1