diff --git a/python/paddle/tests/test_pretrained_model.py b/python/paddle/tests/test_pretrained_model.py
index dbd5920f499580e572e593d9bb1efb341c1aeb60..6112d08fe356700cd762e6ddc4d78312b2b4529e 100644
--- a/python/paddle/tests/test_pretrained_model.py
+++ b/python/paddle/tests/test_pretrained_model.py
@@ -54,7 +54,8 @@ class TestPretrainedModel(unittest.TestCase):
     def test_models(self):
         arches = [
             'mobilenet_v1', 'mobilenet_v2', 'resnet18', 'vgg16', 'alexnet',
-            'resnext50_32x4d', 'inception_v3', 'densenet121', 'googlenet'
+            'resnext50_32x4d', 'inception_v3', 'densenet121', 'googlenet',
+            'shufflenet_v2_x0_25', 'shufflenet_v2_swish'
         ]
         for arch in arches:
             self.infer(arch)
diff --git a/python/paddle/tests/test_vision_models.py b/python/paddle/tests/test_vision_models.py
index bc9799da888854e9d121cde1622bff1b6467277d..29e00e73e29b28a08c19b3c98b41fc2102039149 100644
--- a/python/paddle/tests/test_vision_models.py
+++ b/python/paddle/tests/test_vision_models.py
@@ -88,6 +88,9 @@ class TestVisonModels(unittest.TestCase):
     def test_alexnet(self):
         self.models_infer('alexnet')
 
+    def test_shufflenetv2_swish(self):
+        self.models_infer('shufflenet_v2_swish')
+
     def test_resnext50_32x4d(self):
         self.models_infer('resnext50_32x4d')
 
@@ -112,6 +115,24 @@ class TestVisonModels(unittest.TestCase):
     def test_googlenet(self):
         self.models_infer('googlenet')
 
+    def test_shufflenetv2_x0_25(self):
+        self.models_infer('shufflenet_v2_x0_25')
+
+    def test_shufflenetv2_x0_33(self):
+        self.models_infer('shufflenet_v2_x0_33')
+
+    def test_shufflenetv2_x0_5(self):
+        self.models_infer('shufflenet_v2_x0_5')
+
+    def test_shufflenetv2_x1_0(self):
+        self.models_infer('shufflenet_v2_x1_0')
+
+    def test_shufflenetv2_x1_5(self):
+        self.models_infer('shufflenet_v2_x1_5')
+
+    def test_shufflenetv2_x2_0(self):
+        self.models_infer('shufflenet_v2_x2_0')
+
     def test_vgg16_num_classes(self):
         vgg16 = models.__dict__['vgg16'](pretrained=False, num_classes=10)
 
diff --git a/python/paddle/vision/__init__.py b/python/paddle/vision/__init__.py
index 5695ddc93ed044c62fc3b954ece83b67fa913cb9..22a42b6d312770fa2af2b1ac2ae3ae5af89bec4c 100644
--- a/python/paddle/vision/__init__.py
+++ b/python/paddle/vision/__init__.py
@@ -63,6 +63,14 @@ from .models import InceptionV3  # noqa: F401
 from .models import inception_v3  # noqa: F401
 from .models import GoogLeNet  # noqa: F401
 from .models import googlenet  # noqa: F401
+from .models import ShuffleNetV2  # noqa: F401
+from .models import shufflenet_v2_x0_25  # noqa: F401
+from .models import shufflenet_v2_x0_33  # noqa: F401
+from .models import shufflenet_v2_x0_5  # noqa: F401
+from .models import shufflenet_v2_x1_0  # noqa: F401
+from .models import shufflenet_v2_x1_5  # noqa: F401
+from .models import shufflenet_v2_x2_0  # noqa: F401
+from .models import shufflenet_v2_swish  # noqa: F401
 from .transforms import BaseTransform  # noqa: F401
 from .transforms import Compose  # noqa: F401
 from .transforms import Resize  # noqa: F401
diff --git a/python/paddle/vision/models/__init__.py b/python/paddle/vision/models/__init__.py
index 3c8a3da69273b3f0bb10b1439f10ee49c6f65c6e..a66d77fc888681151dbf9aa360cbb9766cebd846 100644
--- a/python/paddle/vision/models/__init__.py
+++ b/python/paddle/vision/models/__init__.py
@@ -47,6 +47,14 @@ from .inceptionv3 import InceptionV3  # noqa: F401
 from .inceptionv3 import inception_v3  # noqa: F401
 from .googlenet import GoogLeNet  # noqa: F401
 from .googlenet import googlenet  # noqa: F401
+from .shufflenetv2 import ShuffleNetV2  # noqa: F401
+from .shufflenetv2 import shufflenet_v2_x0_25  # noqa: F401
+from .shufflenetv2 import shufflenet_v2_x0_33  # noqa: F401
+from .shufflenetv2 import shufflenet_v2_x0_5  # noqa: F401
+from .shufflenetv2 import shufflenet_v2_x1_0  # noqa: F401
+from .shufflenetv2 import shufflenet_v2_x1_5  # noqa: F401
+from .shufflenetv2 import shufflenet_v2_x2_0  # noqa: F401
+from .shufflenetv2 import shufflenet_v2_swish  # noqa: F401
 
 __all__ = [ #noqa
     'ResNet',
@@ -84,4 +92,12 @@ __all__ = [ #noqa
     'inception_v3',
     'GoogLeNet',
     'googlenet',
+    'ShuffleNetV2',
+    'shufflenet_v2_x0_25',
+    'shufflenet_v2_x0_33',
+    'shufflenet_v2_x0_5',
+    'shufflenet_v2_x1_0',
+    'shufflenet_v2_x1_5',
+    'shufflenet_v2_x2_0',
+    'shufflenet_v2_swish'
 ]
diff --git a/python/paddle/vision/models/shufflenetv2.py b/python/paddle/vision/models/shufflenetv2.py
new file mode 100644
index 0000000000000000000000000000000000000000..041f3fc749b6ccd9950ee33505c88bd5d01c9227
--- /dev/null
+++ b/python/paddle/vision/models/shufflenetv2.py
@@ -0,0 +1,523 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+from paddle.fluid.param_attr import ParamAttr
+from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Linear, MaxPool2D
+from paddle.utils.download import get_weights_path_from_url
+
+__all__ = []
+
+model_urls = {
+    "shufflenet_v2_x0_25": (
+        "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_25_pretrained.pdparams",
+        "e753404cbd95027759c5f56ecd6c9c4b", ),
+    "shufflenet_v2_x0_33": (
+        "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_33_pretrained.pdparams",
+        "776e3cf9a4923abdfce789c45b8fe1f2", ),
+    "shufflenet_v2_x0_5": (
+        "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_5_pretrained.pdparams",
+        "e3649cf531566917e2969487d2bc6b60", ),
+    "shufflenet_v2_x1_0": (
+        "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_0_pretrained.pdparams",
+        "7821c348ea34e58847c43a08a4ac0bdf", ),
+    "shufflenet_v2_x1_5": (
+        "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_5_pretrained.pdparams",
+        "93a07fa557ab2d8803550f39e5b6c391", ),
+    "shufflenet_v2_x2_0": (
+        "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x2_0_pretrained.pdparams",
+        "4ab1f622fd0d341e0f84b4e057797563", ),
+    "shufflenet_v2_swish": (
+        "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_swish_pretrained.pdparams",
+        "daff38b3df1b3748fccbb13cfdf02519", ),
+}
+
+
+def channel_shuffle(x, groups):
+    batch_size, num_channels, height, width = x.shape[0:4]
+    channels_per_group = num_channels // groups
+
+    # reshape
+    x = paddle.reshape(
+        x, shape=[batch_size, groups, channels_per_group, height, width])
+
+    # transpose
+    x = paddle.transpose(x, perm=[0, 2, 1, 3, 4])
+
+    # flatten
+    x = paddle.reshape(x, shape=[batch_size, num_channels, height, width])
+    return x
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 padding,
+                 groups=1,
+                 act=None):
+        super(ConvBNLayer, self).__init__()
+        self._conv = Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=ParamAttr(initializer=nn.initializer.KaimingNormal()),
+            bias_attr=False, )
+
+        self._batch_norm = BatchNorm(out_channels, act=act)
+
+    def forward(self, inputs):
+        x = self._conv(inputs)
+        x = self._batch_norm(x)
+        return x
+
+
+class InvertedResidual(nn.Layer):
+    def __init__(self, in_channels, out_channels, stride, act="relu"):
+        super(InvertedResidual, self).__init__()
+        self._conv_pw = ConvBNLayer(
+            in_channels=in_channels // 2,
+            out_channels=out_channels // 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=act)
+        self._conv_dw = ConvBNLayer(
+            in_channels=out_channels // 2,
+            out_channels=out_channels // 2,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            groups=out_channels // 2,
+            act=None)
+        self._conv_linear = ConvBNLayer(
+            in_channels=out_channels // 2,
+            out_channels=out_channels // 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=act)
+
+    def forward(self, inputs):
+        x1, x2 = paddle.split(
+            inputs,
+            num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
+            axis=1)
+        x2 = self._conv_pw(x2)
+        x2 = self._conv_dw(x2)
+        x2 = self._conv_linear(x2)
+        out = paddle.concat([x1, x2], axis=1)
+        return channel_shuffle(out, 2)
+
+
+class InvertedResidualDS(nn.Layer):
+    def __init__(self, in_channels, out_channels, stride, act="relu"):
+        super(InvertedResidualDS, self).__init__()
+
+        # branch1
+        self._conv_dw_1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=in_channels,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            groups=in_channels,
+            act=None)
+        self._conv_linear_1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels // 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=act)
+        # branch2
+        self._conv_pw_2 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels // 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=act)
+        self._conv_dw_2 = ConvBNLayer(
+            in_channels=out_channels // 2,
+            out_channels=out_channels // 2,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            groups=out_channels // 2,
+            act=None)
+        self._conv_linear_2 = ConvBNLayer(
+            in_channels=out_channels // 2,
+            out_channels=out_channels // 2,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1,
+            act=act)
+
+    def forward(self, inputs):
+        x1 = self._conv_dw_1(inputs)
+        x1 = self._conv_linear_1(x1)
+        x2 = self._conv_pw_2(inputs)
+        x2 = self._conv_dw_2(x2)
+        x2 = self._conv_linear_2(x2)
+        out = paddle.concat([x1, x2], axis=1)
+
+        return channel_shuffle(out, 2)
+
+
+class ShuffleNetV2(nn.Layer):
+    """ShuffleNetV2 model from
+    `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_
+
+    Args:
+        scale (float, optional) - scale of output channels. Default: True.
+        act (str, optional) - activation function of neural network. Default: "relu".
+        num_classes (int, optional): output dim of last fc layer. If num_classes <=0, last fc layer
+                            will not be defined. Default: 1000.
+        with_pool (bool, optional): use pool before the last fc layer or not. Default: True.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            from paddle.vision.models import ShuffleNetV2
+
+            shufflenet_v2_swish = ShuffleNetV2(scale=1.0, act="swish")
+            x = paddle.rand([1, 3, 224, 224])
+            out = shufflenet_v2_swish(x)
+            print(out.shape)
+
+    """
+
+    def __init__(self, scale=1.0, act="relu", num_classes=1000, with_pool=True):
+        super(ShuffleNetV2, self).__init__()
+        self.scale = scale
+        self.num_classes = num_classes
+        self.with_pool = with_pool
+        stage_repeats = [4, 8, 4]
+
+        if scale == 0.25:
+            stage_out_channels = [-1, 24, 24, 48, 96, 512]
+        elif scale == 0.33:
+            stage_out_channels = [-1, 24, 32, 64, 128, 512]
+        elif scale == 0.5:
+            stage_out_channels = [-1, 24, 48, 96, 192, 1024]
+        elif scale == 1.0:
+            stage_out_channels = [-1, 24, 116, 232, 464, 1024]
+        elif scale == 1.5:
+            stage_out_channels = [-1, 24, 176, 352, 704, 1024]
+        elif scale == 2.0:
+            stage_out_channels = [-1, 24, 224, 488, 976, 2048]
+        else:
+            raise NotImplementedError("This scale size:[" + str(scale) +
+                                      "] is not implemented!")
+        # 1. conv1
+        self._conv1 = ConvBNLayer(
+            in_channels=3,
+            out_channels=stage_out_channels[1],
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            act=act)
+        self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
+
+        # 2. bottleneck sequences
+        self._block_list = []
+        for stage_id, num_repeat in enumerate(stage_repeats):
+            for i in range(num_repeat):
+                if i == 0:
+                    block = self.add_sublayer(
+                        sublayer=InvertedResidualDS(
+                            in_channels=stage_out_channels[stage_id + 1],
+                            out_channels=stage_out_channels[stage_id + 2],
+                            stride=2,
+                            act=act),
+                        name=str(stage_id + 2) + "_" + str(i + 1))
+                else:
+                    block = self.add_sublayer(
+                        sublayer=InvertedResidual(
+                            in_channels=stage_out_channels[stage_id + 2],
+                            out_channels=stage_out_channels[stage_id + 2],
+                            stride=1,
+                            act=act),
+                        name=str(stage_id + 2) + "_" + str(i + 1))
+                self._block_list.append(block)
+        # 3. last_conv
+        self._last_conv = ConvBNLayer(
+            in_channels=stage_out_channels[-2],
+            out_channels=stage_out_channels[-1],
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            act=act)
+        # 4. pool
+        if with_pool:
+            self._pool2d_avg = AdaptiveAvgPool2D(1)
+
+        # 5. fc
+        if num_classes > 0:
+            self._out_c = stage_out_channels[-1]
+            self._fc = Linear(stage_out_channels[-1], num_classes)
+
+    def forward(self, inputs):
+        x = self._conv1(inputs)
+        x = self._max_pool(x)
+        for inv in self._block_list:
+            x = inv(x)
+        x = self._last_conv(x)
+
+        if self.with_pool:
+            x = self._pool2d_avg(x)
+
+        if self.num_classes > 0:
+            x = paddle.flatten(x, start_axis=1, stop_axis=-1)
+            x = self._fc(x)
+        return x
+
+
+def _shufflenet_v2(arch, pretrained=False, **kwargs):
+    model = ShuffleNetV2(**kwargs)
+    if pretrained:
+        assert (
+            arch in model_urls
+        ), "{} model do not have a pretrained model now, you should set pretrained=False".format(
+            arch)
+        weight_path = get_weights_path_from_url(model_urls[arch][0],
+                                                model_urls[arch][1])
+
+        param = paddle.load(weight_path)
+        model.set_dict(param)
+    return model
+
+
+def shufflenet_v2_x0_25(pretrained=False, **kwargs):
+    """ShuffleNetV2 with 0.25x output channels, as described in
+    `"ShuffleNet V2: Practical Guidelines for Ecient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_ 。
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            from paddle.vision.models import shufflenet_v2_x0_25
+
+            # build model
+            model = shufflenet_v2_x0_25()
+
+            # build model and load imagenet pretrained weight
+            # model = shufflenet_v2_x0_25(pretrained=True)
+
+            x = paddle.rand([1, 3, 224, 224])
+            out = model(x)
+
+            print(out.shape)
+
+    """
+    return _shufflenet_v2(
+        "shufflenet_v2_x0_25", scale=0.25, pretrained=pretrained, **kwargs)
+
+
+def shufflenet_v2_x0_33(pretrained=False, **kwargs):
+    """ShuffleNetV2 with 0.33x output channels, as described in
+    `"ShuffleNet V2: Practical Guidelines for Ecient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_ 。
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            from paddle.vision.models import shufflenet_v2_x0_33
+
+            # build model
+            model = shufflenet_v2_x0_33()
+
+            # build model and load imagenet pretrained weight
+            # model = shufflenet_v2_x0_33(pretrained=True)
+
+            x = paddle.rand([1, 3, 224, 224])
+            out = model(x)
+
+            print(out.shape)
+
+    """
+    return _shufflenet_v2(
+        "shufflenet_v2_x0_33", scale=0.33, pretrained=pretrained, **kwargs)
+
+
+def shufflenet_v2_x0_5(pretrained=False, **kwargs):
+    """ShuffleNetV2 with 0.5x output channels, as described in
+    `"ShuffleNet V2: Practical Guidelines for Ecient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_ 。
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            from paddle.vision.models import shufflenet_v2_x0_5
+
+            # build model
+            model = shufflenet_v2_x0_5()
+
+            # build model and load imagenet pretrained weight
+            # model = shufflenet_v2_x0_5(pretrained=True)
+
+            x = paddle.rand([1, 3, 224, 224])
+            out = model(x)
+
+            print(out.shape)
+
+    """
+    return _shufflenet_v2(
+        "shufflenet_v2_x0_5", scale=0.5, pretrained=pretrained, **kwargs)
+
+
+def shufflenet_v2_x1_0(pretrained=False, **kwargs):
+    """ShuffleNetV2 with 1.0x output channels, as described in
+    `"ShuffleNet V2: Practical Guidelines for Ecient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_ 。
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            from paddle.vision.models import shufflenet_v2_x1_0
+
+            # build model
+            model = shufflenet_v2_x1_0()
+
+            # build model and load imagenet pretrained weight
+            # model = shufflenet_v2_x1_0(pretrained=True)
+
+            x = paddle.rand([1, 3, 224, 224])
+            out = model(x)
+
+            print(out.shape)
+
+    """
+    return _shufflenet_v2(
+        "shufflenet_v2_x1_0", scale=1.0, pretrained=pretrained, **kwargs)
+
+
+def shufflenet_v2_x1_5(pretrained=False, **kwargs):
+    """ShuffleNetV2 with 1.5x output channels, as described in
+    `"ShuffleNet V2: Practical Guidelines for Ecient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_ 。
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            from paddle.vision.models import shufflenet_v2_x1_5
+
+            # build model
+            model = shufflenet_v2_x1_5()
+
+            # build model and load imagenet pretrained weight
+            # model = shufflenet_v2_x1_5(pretrained=True)
+
+            x = paddle.rand([1, 3, 224, 224])
+            out = model(x)
+
+            print(out.shape)
+
+    """
+    return _shufflenet_v2(
+        "shufflenet_v2_x1_5", scale=1.5, pretrained=pretrained, **kwargs)
+
+
+def shufflenet_v2_x2_0(pretrained=False, **kwargs):
+    """ShuffleNetV2 with 2.0x output channels, as described in
+    `"ShuffleNet V2: Practical Guidelines for Ecient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_ 。
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            from paddle.vision.models import shufflenet_v2_x2_0
+
+            # build model
+            model = shufflenet_v2_x2_0()
+
+            # build model and load imagenet pretrained weight
+            # model = shufflenet_v2_x2_0(pretrained=True)
+
+            x = paddle.rand([1, 3, 224, 224])
+            out = model(x)
+
+            print(out.shape)
+
+    """
+    return _shufflenet_v2(
+        "shufflenet_v2_x2_0", scale=2.0, pretrained=pretrained, **kwargs)
+
+
+def shufflenet_v2_swish(pretrained=False, **kwargs):
+    """ShuffleNetV2 with 1.0x output channels and swish activation function, as described in
+    `"ShuffleNet V2: Practical Guidelines for Ecient CNN Architecture Design" <https://arxiv.org/pdf/1807.11164.pdf>`_ 。
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            from paddle.vision.models import shufflenet_v2_swish
+
+            # build model
+            model = shufflenet_v2_swish()
+
+            # build model and load imagenet pretrained weight
+            # model = shufflenet_v2_swish(pretrained=True)
+
+            x = paddle.rand([1, 3, 224, 224])
+            out = model(x)
+
+            print(out.shape)
+
+    """
+    return _shufflenet_v2(
+        "shufflenet_v2_swish",
+        scale=1.0,
+        act="swish",
+        pretrained=pretrained,
+        **kwargs)