diff --git a/dygraph/infer.py b/dygraph/infer.py
index 2e6aa3f58a5adbe608ce02f31a80286e6e7c717f..a9c0d380f553e5c9f57c46a8aff91b28c3a1888e 100644
--- a/dygraph/infer.py
+++ b/dygraph/infer.py
@@ -17,11 +17,11 @@ import argparse
 import paddle.fluid as fluid
 from paddle.fluid.dygraph.parallel import ParallelEnv
 
-from dygraph.datasets import DATASETS
-import dygraph.transforms as T
-from dygraph.cvlibs import manager
-from dygraph.utils import get_environ_info
-from dygraph.core import infer
+from paddleseg.datasets import DATASETS
+import paddleseg.transforms as T
+from paddleseg.cvlibs import manager
+from paddleseg.utils import get_environ_info
+from paddleseg.core import infer
 
 
 def parse_args():
diff --git a/dygraph/models/fast_scnn.py b/dygraph/models/fast_scnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..6bd9b4d6e656bcb3530c50be120293b4f3fb05c6
--- /dev/null
+++ b/dygraph/models/fast_scnn.py
@@ -0,0 +1,302 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle import fluid, nn
+
+from dygraph.cvlibs import manager
+from dygraph.models import model_utils, pspnet
+from dygraph.models.architectures import layer_utils
+
+
+@manager.MODELS.add_component
+class FastSCNN(fluid.dygraph.Layer):
+    """
+    The FastSCNN implementation.
+
+    As mentioned in original paper, FastSCNN is a real-time segmentation algorithm (123.5fps) 
+    even for high resolution images (1024x2048).
+
+    The orginal artile refers to 
+        Poudel, Rudra PK, et al. "Fast-scnn: Fast semantic segmentation network."
+        (https://arxiv.org/pdf/1902.04502.pdf)
+
+    Args:
+
+        num_classes (int): the unique number of target classes. Default to 2.
+
+        enable_auxiliary_loss (bool): a bool values indictes whether adding auxiliary loss.
+        if true, auxiliary loss will be added after LearningToDownsample module, where the weight is 0.4. Default to False.
+
+        ignore_index (int): the value of ground-truth mask would be ignored while doing evaluation. Default to 255.
+    """
+
+    def __init__(self,
+                 num_classes=2,
+                 enable_auxiliary_loss=False,
+                 ignore_index=255):
+
+        super(FastSCNN, self).__init__()
+
+        self.learning_to_downsample = LearningToDownsample(32, 48, 64)
+        self.global_feature_extractor = GlobalFeatureExtractor(64, [64, 96, 128], 128, 6, [3, 3, 3])
+        self.feature_fusion = FeatureFusionModule(64, 128, 128)
+        self.classifier = Classifier(128, num_classes)
+
+        if enable_auxiliary_loss:
+            self.auxlayer = model_utils.AuxLayer(64, 32, num_classes)
+
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+        self.ignore_index = ignore_index
+
+    def forward(self, input, label=None):
+
+        higher_res_features = self.learning_to_downsample(input)
+        x = self.global_feature_extractor(higher_res_features)
+        x = self.feature_fusion(higher_res_features, x)
+        logit = self.classifier(x)
+        logit = fluid.layers.resize_bilinear(logit, input.shape[2:])
+
+        if self.enable_auxiliary_loss:
+            auxiliary_logit = self.auxlayer(higher_res_features)
+            auxiliary_logit = fluid.layers.resize_bilinear(auxiliary_logit, input.shape[2:])
+
+        if self.training:
+            loss = model_utils.get_loss(logit, label)
+            if self.enable_auxiliary_loss:
+                auxiliary_loss = model_utils.get_loss(auxiliary_logit, label)
+                loss += (0.4 * auxiliary_loss)
+            return loss
+        else:
+            pred, score_map = model_utils.get_pred_score_map(logit)
+            return pred, score_map
+
+
+class LearningToDownsample(fluid.dygraph.Layer):
+    """
+    Learning to downsample module.
+
+    This module consists of three downsampling blocks (one Conv and two separable Conv)
+
+    Args:
+        dw_channels1 (int): the input channels of the first sep conv. Default to 32.
+
+        dw_channels2 (int): the input channels of the second sep conv. Default to 48.
+
+        out_channels (int): the output channels of LearningToDownsample module. Default to 64.
+    """
+
+    def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64):
+        super(LearningToDownsample, self).__init__()
+
+        self.conv_bn_relu = layer_utils.ConvBnRelu(num_channels=3,
+                                                   num_filters=dw_channels1,
+                                                   filter_size=3,
+                                                   stride=2)
+        self.dsconv_bn_relu1 = layer_utils.ConvBnRelu(num_channels=dw_channels1,
+                                                      num_filters=dw_channels2,
+                                                      filter_size=3,
+                                                      using_sep_conv=True,  # using sep conv
+                                                      stride=2,
+                                                      padding=1)
+        self.dsconv_bn_relu2 = layer_utils.ConvBnRelu(num_channels=dw_channels2,
+                                                      num_filters=out_channels,
+                                                      filter_size=3,
+                                                      using_sep_conv=True,  # using sep conv
+                                                      stride=2,
+                                                      padding=1)
+
+    def forward(self, x):
+        x = self.conv_bn_relu(x)
+        x = self.dsconv_bn_relu1(x)
+        x = self.dsconv_bn_relu2(x)
+        return x
+
+
+class GlobalFeatureExtractor(fluid.dygraph.Layer):
+    """
+    Global feature extractor module
+
+    This module consists of three LinearBottleneck blocks (like inverted residual introduced by MobileNetV2) and 
+    a PPModule (introduced by PSPNet).
+
+    Args:
+        in_channels (int): the number of input channels to the module. Default to 64.
+        block_channels (tuple): a tuple represents output channels of each bottleneck block. Default to (64, 96, 128).
+        out_channels (int): the number of output channels of the module. Default to 128.
+        expansion (int): the expansion factor in bottleneck. Default to 6.
+        num_blocks (tuple): it indicates the repeat time of each bottleneck. Default to (3, 3, 3).
+    """
+
+    def __init__(self, in_channels=64, block_channels=(64, 96, 128),
+                 out_channels=128, expansion=6, num_blocks=(3, 3, 3)):
+        super(GlobalFeatureExtractor, self).__init__()
+
+        self.bottleneck1 = self._make_layer(LinearBottleneck, in_channels, block_channels[0], num_blocks[0], expansion,
+                                            2)
+        self.bottleneck2 = self._make_layer(LinearBottleneck, block_channels[0], block_channels[1], num_blocks[1],
+                                            expansion, 2)
+        self.bottleneck3 = self._make_layer(LinearBottleneck, block_channels[1], block_channels[2], num_blocks[2],
+                                            expansion, 1)
+
+        self.ppm = pspnet.PPModule(block_channels[2], out_channels, dim_reduction=True)
+
+    def _make_layer(self, block, in_channels, out_channels, blocks, expansion=6, stride=1):
+        layers = []
+        layers.append(block(in_channels, out_channels, expansion, stride))
+        for i in range(1, blocks):
+            layers.append(block(out_channels, out_channels, expansion, 1))
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.bottleneck1(x)
+        x = self.bottleneck2(x)
+        x = self.bottleneck3(x)
+        x = self.ppm(x)
+        return x
+
+
+class LinearBottleneck(fluid.dygraph.Layer):
+    """
+    Single bottleneck implementation.
+
+    Args:
+        in_channels (int): the number of input channels to bottleneck block.
+
+        out_channels (int): the number of output channels of bottleneck block.
+
+        expansion (int). the expansion factor in bottleneck. Default to 6.
+
+        stride (int). the stride used in depth-wise conv.
+    """
+
+    def __init__(self, in_channels, out_channels, expansion=6, stride=2, **kwargs):
+        super(LinearBottleneck, self).__init__()
+
+        self.use_shortcut = stride == 1 and in_channels == out_channels
+
+        expand_channels = in_channels * expansion
+        self.block = nn.Sequential(
+            # pw
+            layer_utils.ConvBnRelu(num_channels=in_channels,
+                                   num_filters=expand_channels,
+                                   filter_size=1,
+                                   bias_attr=False),
+            # dw
+            layer_utils.ConvBnRelu(num_channels=expand_channels,
+                                   num_filters=expand_channels,
+                                   filter_size=3,
+                                   stride=stride,
+                                   padding=1,
+                                   groups=expand_channels,
+                                   bias_attr=False),
+            # pw-linear
+            nn.Conv2D(num_channels=expand_channels,
+                      num_filters=out_channels,
+                      filter_size=1,
+                      bias_attr=False),
+
+            nn.BatchNorm(out_channels)
+        )
+
+    def forward(self, x):
+        out = self.block(x)
+        if self.use_shortcut:
+            out = x + out
+        return out
+
+
+class FeatureFusionModule(fluid.dygraph.Layer):
+    """
+    Feature Fusion Module Implememtation.
+
+    This module fuses high-resolution feature and low-resolution feature.
+
+    Args:
+        high_in_channels (int): the channels of high-resolution feature (output of LearningToDownsample).
+
+        low_in_channels (int). the channels of low-resolution feature (output of GlobalFeatureExtractor).
+
+        out_channels (int). the output channels of this module.
+    """
+
+    def __init__(self, high_in_channels, low_in_channels, out_channels):
+        super(FeatureFusionModule, self).__init__()
+
+        # There only depth-wise conv is used WITHOUT point-sied conv
+        self.dwconv = layer_utils.ConvBnRelu(num_channels=low_in_channels,
+                                             num_filters=out_channels,
+                                             filter_size=3,
+                                             padding=1,
+                                             groups=128)
+
+        self.conv_low_res = nn.Sequential(
+            nn.Conv2D(num_channels=out_channels, num_filters=out_channels, filter_size=1),
+            nn.BatchNorm(out_channels))
+
+        self.conv_high_res = nn.Sequential(
+            nn.Conv2D(num_channels=high_in_channels, num_filters=out_channels, filter_size=1),
+            nn.BatchNorm(out_channels))
+
+        self.relu = nn.ReLU(True)
+
+    def forward(self, high_res_input, low_res_input):
+        low_res_input = fluid.layers.resize_bilinear(input=low_res_input, scale=4)
+        low_res_input = self.dwconv(low_res_input)
+        low_res_input = self.conv_low_res(low_res_input)
+
+        high_res_input = self.conv_high_res(high_res_input)
+
+        x = high_res_input + low_res_input
+
+        return self.relu(x)
+
+
+class Classifier(fluid.dygraph.Layer):
+    """
+    The Classifier module implemetation.
+
+    This module consists of two depth-wsie conv and one conv.
+
+    Args:
+        input_channels (int): the input channels to this module.
+
+        num_classes (int). the unique number of target classes.
+
+    """
+
+    def __init__(self, input_channels, num_classes):
+        super(Classifier, self).__init__()
+
+        self.dsconv1 = layer_utils.ConvBnRelu(num_channels=input_channels,
+                                              num_filters=input_channels,
+                                              filter_size=3,
+                                              using_sep_conv=True  # using sep conv
+                                              )
+
+        self.dsconv2 = layer_utils.ConvBnRelu(num_channels=input_channels,
+                                              num_filters=input_channels,
+                                              filter_size=3,
+                                              using_sep_conv=True  # using sep conv
+                                              )
+
+        self.conv = nn.Conv2D(num_channels=input_channels,
+                              num_filters=num_classes,
+                              filter_size=1)
+
+    def forward(self, x):
+        x = self.dsconv1(x)
+        x = self.dsconv2(x)
+        x = fluid.layers.dropout(x, dropout_prob=0.1)
+        x = self.conv(x)
+        return x
diff --git a/dygraph/models/model_utils.py b/dygraph/models/model_utils.py
index e0a88c355a78d98ff312aaa75cf175a2369ffa5d..7f52919915faf3fa2cca6b567e0c6b8a105e7e0b 100644
--- a/dygraph/models/model_utils.py
+++ b/dygraph/models/model_utils.py
@@ -18,7 +18,8 @@ import paddle.nn.functional as F
 from paddle import fluid
 from paddle.fluid import dygraph
 from paddle.fluid.dygraph import Conv2D
-from paddle.nn import SyncBatchNorm as BatchNorm
+#from paddle.nn import SyncBatchNorm as BatchNorm
+from paddle.fluid.dygraph import SyncBatchNorm as BatchNorm
 
 from dygraph.models.architectures import layer_utils
 
@@ -47,10 +48,37 @@ class FCNHead(fluid.dygraph.Layer):
 
     def forward(self, x):
         x = self.conv_bn_relu(x)
-        x = F.dropout(x, p=0.1)
+        x = F.dropout(x, dropout_prob=0.1)
         x = self.conv(x)
         return x
 
+class AuxLayer(fluid.dygraph.Layer):
+    """
+    The auxilary layer implementation for auxilary loss
+
+    Args:
+        in_channels (int): the number of input channels.
+        inter_channels (int): intermediate channels.
+        out_channels (int): the number of output channels, which is usually num_classes.
+    """
+
+    def __init__(self, in_channels, inter_channels, out_channels):
+        super(AuxLayer, self).__init__()
+
+        self.conv_bn_relu = layer_utils.ConvBnRelu(num_channels=in_channels,
+                                                   num_filters=inter_channels,
+                                                   filter_size=3,
+                                                   padding=1)
+
+        self.conv = Conv2D(num_channels=inter_channels,
+                           num_filters=out_channels,
+                           filter_size=1)
+
+    def forward(self, x):
+        x = self.conv_bn_relu(x)
+        x = F.dropout(x, dropout_prob=0.1)
+        x = self.conv(x)
+        return x
 
 def get_loss(logit, label, ignore_index=255, EPS=1e-5):
     """
diff --git a/dygraph/models/pspnet.py b/dygraph/models/pspnet.py
index d4457ed53435aa75257b68c476b55c15ab701c68..0e376e21ca7d6c57b2d0b121e82a3ca0f5a57c10 100644
--- a/dygraph/models/pspnet.py
+++ b/dygraph/models/pspnet.py
@@ -148,23 +148,27 @@ class PPModule(fluid.dygraph.Layer):
         out_channels (int): the number of output channels after pyramid pooling module.
 
         bin_sizes (tuple): the out size of pooled feature maps. Default to (1,2,3,6).
+
+        dim_reduction (bool): a bool value represent if reduing dimention after pooling. Default to True.
     """
 
-    def __init__(self, in_channels, out_channels, bin_sizes=(1, 2, 3, 6)):
+    def __init__(self, in_channels, out_channels, bin_sizes=(1, 2, 3, 6), dim_reduction=True):
         super(PPModule, self).__init__()
         self.bin_sizes = bin_sizes
 
+        inter_channels = in_channels
+        if dim_reduction:
+            inter_channels = in_channels // len(bin_sizes)
+        
         # we use dimension reduction after pooling mentioned in original implementation.
-        self.stages = fluid.dygraph.LayerList(
-            [self._make_stage(in_channels, size) for size in bin_sizes])
+        self.stages = fluid.dygraph.LayerList([self._make_stage(in_channels, inter_channels, size) for size in bin_sizes])
 
-        self.conv_bn_relu2 = layer_utils.ConvBnRelu(
-            num_channels=in_channels * 2,
-            num_filters=out_channels,
-            filter_size=3,
-            padding=1)
+        self.conv_bn_relu2 = layer_utils.ConvBnRelu(num_channels=in_channels + inter_channels * len(bin_sizes),
+                                                    num_filters=out_channels,
+                                                    filter_size=3,
+                                                    padding=1)
 
-    def _make_stage(self, in_channels, size):
+    def _make_stage(self, in_channels, out_channels, size):
         """
         Create one pooling layer.
 
@@ -186,10 +190,9 @@ class PPModule(fluid.dygraph.Layer):
 
         # this paddle version does not support AdaptiveAvgPool2d, so skip it here.
         # prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
-        conv = layer_utils.ConvBnRelu(
-            num_channels=in_channels,
-            num_filters=in_channels // len(self.bin_sizes),
-            filter_size=1)
+        conv = layer_utils.ConvBnRelu(num_channels=in_channels,
+                                      num_filters=out_channels,
+                                      filter_size=1)
 
         return conv
 
diff --git a/dygraph/paddleseg/__init__.py b/dygraph/paddleseg/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e2950fcdf71fe14a60b485bc786ef655be907a0
--- /dev/null
+++ b/dygraph/paddleseg/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import models
+from . import datasets
+from . import transforms
\ No newline at end of file
diff --git a/dygraph/paddleseg/core/__init__.py b/dygraph/paddleseg/core/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..202629f542f40a2741cb12022adb10d7a56861b5
--- /dev/null
+++ b/dygraph/paddleseg/core/__init__.py
@@ -0,0 +1,19 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .train import train
+from .val import evaluate
+from .infer import infer
+
+__all__ = ['train', 'evaluate', 'infer']
diff --git a/dygraph/paddleseg/core/infer.py b/dygraph/paddleseg/core/infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9e671e05b873ace440b48102959423851b3aa80
--- /dev/null
+++ b/dygraph/paddleseg/core/infer.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from paddle.fluid.dygraph.base import to_variable
+import numpy as np
+import paddle.fluid as fluid
+import cv2
+import tqdm
+
+from paddleseg import utils
+import paddleseg.utils.logger as logger
+
+
+def mkdir(path):
+    sub_dir = os.path.dirname(path)
+    if not os.path.exists(sub_dir):
+        os.makedirs(sub_dir)
+
+
+def infer(model, test_dataset=None, model_dir=None, save_dir='output'):
+    ckpt_path = os.path.join(model_dir, 'model')
+    para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path)
+    model.set_dict(para_state_dict)
+    model.eval()
+
+    added_saved_dir = os.path.join(save_dir, 'added')
+    pred_saved_dir = os.path.join(save_dir, 'prediction')
+
+    logger.info("Start to predict...")
+    for im, im_info, im_path in tqdm.tqdm(test_dataset):
+        im = to_variable(im)
+        pred, _ = model(im)
+        pred = pred.numpy()
+        pred = np.squeeze(pred).astype('uint8')
+        for info in im_info[::-1]:
+            if info[0] == 'resize':
+                h, w = info[1][0], info[1][1]
+                pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST)
+            elif info[0] == 'padding':
+                h, w = info[1][0], info[1][1]
+                pred = pred[0:h, 0:w]
+            else:
+                raise Exception("Unexpected info '{}' in im_info".format(
+                    info[0]))
+
+        im_file = im_path.replace(test_dataset.dataset_root, '')
+        if im_file[0] == '/':
+            im_file = im_file[1:]
+        # save added image
+        added_image = utils.visualize(im_path, pred, weight=0.6)
+        added_image_path = os.path.join(added_saved_dir, im_file)
+        mkdir(added_image_path)
+        cv2.imwrite(added_image_path, added_image)
+
+        # save prediction
+        pred_im = utils.visualize(im_path, pred, weight=0.0)
+        pred_saved_path = os.path.join(pred_saved_dir, im_file)
+        mkdir(pred_saved_path)
+        cv2.imwrite(pred_saved_path, pred_im)
diff --git a/dygraph/paddleseg/core/train.py b/dygraph/paddleseg/core/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2621a622ac5bb5a995a260c4cdb67f16f5a1203
--- /dev/null
+++ b/dygraph/paddleseg/core/train.py
@@ -0,0 +1,193 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.dygraph.parallel import ParallelEnv
+from paddle.fluid.io import DataLoader
+# from paddle.incubate.hapi.distributed import DistributedBatchSampler
+from paddle.io import DistributedBatchSampler
+import paddle.nn.functional as F
+
+import paddleseg.utils.logger as logger
+from paddleseg.utils import load_pretrained_model
+from paddleseg.utils import resume
+from paddleseg.utils import Timer, calculate_eta
+from .val import evaluate
+
+
+def check_logits_losses(logits, losses):
+    len_logits = len(logits)
+    len_losses = len(losses['types'])
+    if len_logits != len_losses:
+        raise RuntimeError(
+            'The length of logits should equal to the types of loss config: {} != {}.'
+            .format(len_logits, len_losses))
+
+
+def loss_computation(logits, label, losses):
+    check_logits_losses(logits, losses)
+    loss = 0
+    for i in range(len(logits)):
+        logit = logits[i]
+        if logit.shape[-2:] != label.shape[-2:]:
+            logit = F.resize_bilinear(logit, label.shape[-2:])
+        loss_i = losses['types'][i](logit, label)
+        loss += losses['coef'][i] * loss_i
+    return loss
+
+
+def train(model,
+          train_dataset,
+          places=None,
+          eval_dataset=None,
+          optimizer=None,
+          save_dir='output',
+          iters=10000,
+          batch_size=2,
+          resume_model=None,
+          save_interval_iters=1000,
+          log_iters=10,
+          num_classes=None,
+          num_workers=8,
+          use_vdl=False,
+          losses=None,
+          ignore_index=255):
+
+    nranks = ParallelEnv().nranks
+
+    start_iter = 0
+    if resume_model is not None:
+        start_iter = resume(model, optimizer, resume_model)
+
+    if not os.path.isdir(save_dir):
+        if os.path.exists(save_dir):
+            os.remove(save_dir)
+        os.makedirs(save_dir)
+
+    if nranks > 1:
+        strategy = fluid.dygraph.prepare_context()
+        ddp_model = fluid.dygraph.DataParallel(model, strategy)
+
+    batch_sampler = DistributedBatchSampler(
+        train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
+    loader = DataLoader(
+        train_dataset,
+        batch_sampler=batch_sampler,
+        places=places,
+        num_workers=num_workers,
+        return_list=True,
+    )
+
+    if use_vdl:
+        from visualdl import LogWriter
+        log_writer = LogWriter(save_dir)
+
+    timer = Timer()
+    avg_loss = 0.0
+    iters_per_epoch = len(batch_sampler)
+    best_mean_iou = -1.0
+    best_model_iter = -1
+    train_reader_cost = 0.0
+    train_batch_cost = 0.0
+    timer.start()
+
+    iter = start_iter
+    while iter < iters:
+        for data in loader:
+            iter += 1
+            if iter > iters:
+                break
+            train_reader_cost += timer.elapsed_time()
+            images = data[0]
+            labels = data[1].astype('int64')
+            if nranks > 1:
+                logits = ddp_model(images)
+                loss = loss_computation(logits, labels, losses)
+                # loss = ddp_model(images, labels)
+                # apply_collective_grads sum grads over multiple gpus.
+                loss = ddp_model.scale_loss(loss)
+                loss.backward()
+                ddp_model.apply_collective_grads()
+            else:
+                logits = model(images)
+                loss = loss_computation(logits, labels, losses)
+                # loss = model(images, labels)
+                loss.backward()
+            optimizer.minimize(loss)
+            model.clear_gradients()
+            avg_loss += loss.numpy()[0]
+            lr = optimizer.current_step_lr()
+            train_batch_cost += timer.elapsed_time()
+            if (iter) % log_iters == 0 and ParallelEnv().local_rank == 0:
+                avg_loss /= log_iters
+                avg_train_reader_cost = train_reader_cost / log_iters
+                avg_train_batch_cost = train_batch_cost / log_iters
+                train_reader_cost = 0.0
+                train_batch_cost = 0.0
+                remain_iters = iters - iter
+                eta = calculate_eta(remain_iters, avg_train_batch_cost)
+                logger.info(
+                    "[TRAIN] epoch={}, iter={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.4f} | ETA {}"
+                    .format((iter - 1) // iters_per_epoch + 1, iter, iters,
+                            avg_loss * nranks, lr, avg_train_batch_cost,
+                            avg_train_reader_cost, eta))
+                if use_vdl:
+                    log_writer.add_scalar('Train/loss', avg_loss * nranks, iter)
+                    log_writer.add_scalar('Train/lr', lr, iter)
+                    log_writer.add_scalar('Train/batch_cost',
+                                          avg_train_batch_cost, iter)
+                    log_writer.add_scalar('Train/reader_cost',
+                                          avg_train_reader_cost, iter)
+                avg_loss = 0.0
+
+            if (iter % save_interval_iters == 0
+                    or iter == iters) and ParallelEnv().local_rank == 0:
+                current_save_dir = os.path.join(save_dir,
+                                                "iter_{}".format(iter))
+                if not os.path.isdir(current_save_dir):
+                    os.makedirs(current_save_dir)
+                fluid.save_dygraph(model.state_dict(),
+                                   os.path.join(current_save_dir, 'model'))
+                fluid.save_dygraph(optimizer.state_dict(),
+                                   os.path.join(current_save_dir, 'model'))
+
+                if eval_dataset is not None:
+                    mean_iou, avg_acc = evaluate(
+                        model,
+                        eval_dataset,
+                        model_dir=current_save_dir,
+                        num_classes=num_classes,
+                        ignore_index=ignore_index,
+                        iter_id=iter)
+                    if mean_iou > best_mean_iou:
+                        best_mean_iou = mean_iou
+                        best_model_iter = iter
+                        best_model_dir = os.path.join(save_dir, "best_model")
+                        fluid.save_dygraph(
+                            model.state_dict(),
+                            os.path.join(best_model_dir, 'model'))
+                    logger.info(
+                        'Current evaluated best model in eval_dataset is iter_{}, miou={:4f}'
+                        .format(best_model_iter, best_mean_iou))
+
+                    if use_vdl:
+                        log_writer.add_scalar('Evaluate/mIoU', mean_iou, iter)
+                        log_writer.add_scalar('Evaluate/aAcc', avg_acc, iter)
+                    model.train()
+            timer.restart()
+    if use_vdl:
+        log_writer.close()
diff --git a/dygraph/paddleseg/core/val.py b/dygraph/paddleseg/core/val.py
new file mode 100644
index 0000000000000000000000000000000000000000..c104b2d8bf67419c58f15ba75989720662b0a2d8
--- /dev/null
+++ b/dygraph/paddleseg/core/val.py
@@ -0,0 +1,89 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import numpy as np
+import tqdm
+import cv2
+from paddle.fluid.dygraph.base import to_variable
+import paddle.fluid as fluid
+import paddle.nn.functional as F
+import paddle
+
+import paddleseg.utils.logger as logger
+from paddleseg.utils import ConfusionMatrix
+from paddleseg.utils import Timer, calculate_eta
+
+
+def evaluate(model,
+             eval_dataset=None,
+             model_dir=None,
+             num_classes=None,
+             ignore_index=255,
+             iter_id=None):
+    ckpt_path = os.path.join(model_dir, 'model')
+    para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path)
+    model.set_dict(para_state_dict)
+    model.eval()
+
+    total_iters = len(eval_dataset)
+    conf_mat = ConfusionMatrix(num_classes, streaming=True)
+
+    logger.info(
+        "Start to evaluating(total_samples={}, total_iters={})...".format(
+            len(eval_dataset), total_iters))
+    timer = Timer()
+    timer.start()
+    for iter, (im, im_info, label) in tqdm.tqdm(
+            enumerate(eval_dataset), total=total_iters):
+        im = to_variable(im)
+        # pred, _ = model(im)
+        logits = model(im)
+        pred = paddle.argmax(logits[0], axis=1)
+        pred = pred.numpy().astype('float32')
+        pred = np.squeeze(pred)
+        for info in im_info[::-1]:
+            if info[0] == 'resize':
+                h, w = info[1][0], info[1][1]
+                pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST)
+            elif info[0] == 'padding':
+                h, w = info[1][0], info[1][1]
+                pred = pred[0:h, 0:w]
+            else:
+                raise Exception("Unexpected info '{}' in im_info".format(
+                    info[0]))
+        pred = pred[np.newaxis, :, :, np.newaxis]
+        pred = pred.astype('int64')
+        mask = label != ignore_index
+
+        conf_mat.calculate(pred=pred, label=label, ignore=mask)
+        _, iou = conf_mat.mean_iou()
+
+        time_iter = timer.elapsed_time()
+        remain_iter = total_iters - iter - 1
+        logger.debug(
+            "[EVAL] iter_id={}, iter={}/{}, iou={:4f}, sec/iter={:.4f} | ETA {}"
+            .format(iter_id, iter + 1, total_iters, iou, time_iter,
+                    calculate_eta(remain_iter, time_iter)))
+        timer.restart()
+
+    category_iou, miou = conf_mat.mean_iou()
+    category_acc, macc = conf_mat.accuracy()
+    logger.info("[EVAL] #Images={} mAcc={:.4f} mIoU={:.4f}".format(
+        len(eval_dataset), macc, miou))
+    logger.info("[EVAL] Category IoU: " + str(category_iou))
+    logger.info("[EVAL] Category Acc: " + str(category_acc))
+    logger.info("[EVAL] Kappa:{:.4f} ".format(conf_mat.kappa()))
+    return miou, macc
diff --git a/dygraph/paddleseg/cvlibs/__init__.py b/dygraph/paddleseg/cvlibs/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..18812001388cbfd1ecf7dc4d38398ddd91711af4
--- /dev/null
+++ b/dygraph/paddleseg/cvlibs/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import manager
+from . import param_init
diff --git a/dygraph/paddleseg/cvlibs/manager.py b/dygraph/paddleseg/cvlibs/manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..339070069c7e39532ec7fe2c826851a8d0f53df6
--- /dev/null
+++ b/dygraph/paddleseg/cvlibs/manager.py
@@ -0,0 +1,118 @@
+# -*- encoding: utf-8 -*-
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections.abc import Sequence
+import inspect
+
+
+class ComponentManager:
+    """
+    Implement a manager class to add the new component properly.
+    The component can be added as either class or function type.
+    For example:
+        >>> model_manager = ComponentManager()
+        >>> class AlexNet: ...
+        >>> class ResNet: ...
+        >>> model_manager.add_component(AlexNet)
+        >>> model_manager.add_component(ResNet)
+        or pass a sequence alliteratively:
+        >>> model_manager.add_component([AlexNet, ResNet])
+        >>> print(model_manager.components_dict)
+    output: {'AlexNet': <class '__main__.AlexNet'>, 'ResNet': <class '__main__.ResNet'>}
+
+    Or an easier way, using it as a Python decorator, while just add it above the class declaration.
+        >>> model_manager = ComponentManager()
+        >>> @model_manager.add_component
+        >>> class AlexNet: ...
+        >>> @model_manager.add_component
+        >>> class ResNet: ...
+        >>> print(model_manager.components_dict)
+    output: {'AlexNet': <class '__main__.AlexNet'>, 'ResNet': <class '__main__.ResNet'>}
+    """
+
+    def __init__(self):
+        self._components_dict = dict()
+
+    def __len__(self):
+        return len(self._components_dict)
+
+    def __repr__(self):
+        return "{}:{}".format(self.__class__.__name__,
+                              list(self._components_dict.keys()))
+
+    def __getitem__(self, item):
+        if item not in self._components_dict.keys():
+            raise KeyError("{} does not exist in the current {}".format(
+                item, self))
+        return self._components_dict[item]
+
+    @property
+    def components_dict(self):
+        return self._components_dict
+
+    def _add_single_component(self, component):
+        """
+        Add a single component into the corresponding manager
+
+        Args:
+        component (function | class): a new component
+
+        Returns:
+        None
+        """
+
+        # Currently only support class or function type
+        if not (inspect.isclass(component) or inspect.isfunction(component)):
+            raise TypeError(
+                "Expect class/function type, but received {}".format(
+                    type(component)))
+
+        # Obtain the internal name of the component
+        component_name = component.__name__
+
+        # Check whether the component was added already
+        if component_name in self._components_dict.keys():
+            raise KeyError("{} exists already!".format(component_name))
+        else:
+            # Take the internal name of the component as its key
+            self._components_dict[component_name] = component
+
+    def add_component(self, components):
+        """
+        Add component(s) into the corresponding manager
+
+        Args:
+        components (function | class | list | tuple): support three types of components
+
+        Returns:
+        None
+        """
+
+        # Check whether the type is a sequence
+        if isinstance(components, Sequence):
+            for component in components:
+                self._add_single_component(component)
+        else:
+            component = components
+            self._add_single_component(component)
+
+        return components
+
+
+MODELS = ComponentManager()
+BACKBONES = ComponentManager()
+DATASETS = ComponentManager()
+TRANSFORMS = ComponentManager()
+LOSSES = ComponentManager()
diff --git a/dygraph/paddleseg/cvlibs/param_init.py b/dygraph/paddleseg/cvlibs/param_init.py
new file mode 100644
index 0000000000000000000000000000000000000000..567399c0a0c7d2310931b1c0ccae13cd0d5422b1
--- /dev/null
+++ b/dygraph/paddleseg/cvlibs/param_init.py
@@ -0,0 +1,25 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.fluid as fluid
+
+
+def constant_init(param, **kwargs):
+    initializer = fluid.initializer.Constant(**kwargs)
+    initializer(param, param.block)
+
+
+def normal_init(param, **kwargs):
+    initializer = fluid.initializer.Normal(**kwargs)
+    initializer(param, param.block)
diff --git a/dygraph/paddleseg/datasets/__init__.py b/dygraph/paddleseg/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..37d8da36997b3ec2a74b92199242eba126a0cefc
--- /dev/null
+++ b/dygraph/paddleseg/datasets/__init__.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .dataset import Dataset
+from .optic_disc_seg import OpticDiscSeg
+from .cityscapes import Cityscapes
+from .voc import PascalVOC
+from .ade import ADE20K
+
+DATASETS = {
+    "OpticDiscSeg": OpticDiscSeg,
+    "Cityscapes": Cityscapes,
+    "PascalVOC": PascalVOC,
+    "ADE20K": ADE20K
+}
diff --git a/dygraph/paddleseg/datasets/ade.py b/dygraph/paddleseg/datasets/ade.py
new file mode 100644
index 0000000000000000000000000000000000000000..6614739899789e8fd8b13db4b7cb9ee798acaeae
--- /dev/null
+++ b/dygraph/paddleseg/datasets/ade.py
@@ -0,0 +1,100 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import numpy as np
+from PIL import Image
+
+from .dataset import Dataset
+from paddleseg.utils.download import download_file_and_uncompress
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
+
+DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
+URL = "http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip"
+
+
+@manager.DATASETS.add_component
+class ADE20K(Dataset):
+    """ADE20K dataset `http://sceneparsing.csail.mit.edu/`.
+    Args:
+        dataset_root: The dataset directory.
+        mode: Which part of dataset to use.. it is one of ('train', 'val'). Default: 'train'.
+        transforms: Transforms for image.
+        download: Whether to download dataset if `dataset_root` is None.
+    """
+
+    def __init__(self,
+                 dataset_root=None,
+                 mode='train',
+                 transforms=None,
+                 download=True):
+        self.dataset_root = dataset_root
+        self.transforms = Compose(transforms)
+        self.mode = mode
+        self.file_list = list()
+        self.num_classes = 150
+
+        if mode.lower() not in ['train', 'val']:
+            raise Exception(
+                "`mode` should be one of ('train', 'val') in ADE20K dataset, but got {}."
+                .format(mode))
+
+        if self.transforms is None:
+            raise Exception("`transforms` is necessary, but it is None.")
+
+        if self.dataset_root is None:
+            if not download:
+                raise Exception(
+                    "`dataset_root` not set and auto download disabled.")
+            self.dataset_root = download_file_and_uncompress(
+                url=URL,
+                savepath=DATA_HOME,
+                extrapath=DATA_HOME,
+                extraname='ADEChallengeData2016')
+        elif not os.path.exists(self.dataset_root):
+            raise Exception('there is not `dataset_root`: {}.'.format(
+                self.dataset_root))
+
+        if mode == 'train':
+            img_dir = os.path.join(self.dataset_root, 'images/training')
+            grt_dir = os.path.join(self.dataset_root, 'annotations/training')
+        elif mode == 'val':
+            img_dir = os.path.join(self.dataset_root, 'images/validation')
+            grt_dir = os.path.join(self.dataset_root, 'annotations/validation')
+        img_files = os.listdir(img_dir)
+        grt_files = [i.replace('.jpg', '.png') for i in img_files]
+        for i in range(len(img_files)):
+            img_path = os.path.join(img_dir, img_files[i])
+            grt_path = os.path.join(grt_dir, grt_files[i])
+            self.file_list.append([img_path, grt_path])
+
+    def __getitem__(self, idx):
+        image_path, grt_path = self.file_list[idx]
+        if self.mode == 'test':
+            im, im_info, _ = self.transforms(im=image_path)
+            im = im[np.newaxis, ...]
+            return im, im_info, image_path
+        elif self.mode == 'val':
+            im, im_info, _ = self.transforms(im=image_path)
+            im = im[np.newaxis, ...]
+            label = np.asarray(Image.open(grt_path))
+            label = label - 1
+            label = label[np.newaxis, np.newaxis, :, :]
+            return im, im_info, label
+        else:
+            im, im_info, label = self.transforms(im=image_path, label=grt_path)
+            label = label - 1
+            return im, label
diff --git a/dygraph/paddleseg/datasets/cityscapes.py b/dygraph/paddleseg/datasets/cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3045d74fe621d165047bbba02a5a1908a7ebd23
--- /dev/null
+++ b/dygraph/paddleseg/datasets/cityscapes.py
@@ -0,0 +1,77 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import glob
+
+from .dataset import Dataset
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
+
+
+@manager.DATASETS.add_component
+class Cityscapes(Dataset):
+    """Cityscapes dataset `https://www.cityscapes-dataset.com/`.
+    The folder structure is as follow:
+    cityscapes
+    |
+    |--leftImg8bit
+    |  |--train
+    |  |--val
+    |  |--test
+    |
+    |--gtFine
+    |  |--train
+    |  |--val
+    |  |--test
+    Make sure there are **labelTrainIds.png in gtFine directory. If not, please run the conver_cityscapes.py in tools.
+
+    Args:
+        dataset_root: Cityscapes dataset directory.
+        mode: Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
+        transforms: Transforms for image.
+    """
+
+    def __init__(self, dataset_root, transforms=None, mode='train'):
+        self.dataset_root = dataset_root
+        self.transforms = Compose(transforms)
+        self.file_list = list()
+        self.mode = mode
+        self.num_classes = 19
+
+        if mode.lower() not in ['train', 'val', 'test']:
+            raise Exception(
+                "mode should be 'train', 'val' or 'test', but got {}.".format(
+                    mode))
+
+        if self.transforms is None:
+            raise Exception("`transforms` is necessary, but it is None.")
+
+        img_dir = os.path.join(self.dataset_root, 'leftImg8bit')
+        grt_dir = os.path.join(self.dataset_root, 'gtFine')
+        if self.dataset_root is None or not os.path.isdir(
+                self.dataset_root) or not os.path.isdir(
+                    img_dir) or not os.path.isdir(grt_dir):
+            raise Exception(
+                "The dataset is not Found or the folder structure is nonconfoumance."
+            )
+
+        grt_files = sorted(
+            glob.glob(
+                os.path.join(grt_dir, mode, '*', '*_gtFine_labelTrainIds.png')))
+        img_files = sorted(
+            glob.glob(os.path.join(img_dir, mode, '*', '*_leftImg8bit.png')))
+
+        self.file_list = [[img_path, grt_path]
+                          for img_path, grt_path in zip(img_files, grt_files)]
diff --git a/dygraph/paddleseg/datasets/dataset.py b/dygraph/paddleseg/datasets/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..30af28c3d03c43194e4d58e267aa8ed6c46c8156
--- /dev/null
+++ b/dygraph/paddleseg/datasets/dataset.py
@@ -0,0 +1,139 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import paddle.fluid as fluid
+import numpy as np
+from PIL import Image
+
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
+
+
+@manager.DATASETS.add_component
+class Dataset(fluid.io.Dataset):
+    """Pass in a custom dataset that conforms to the format.
+
+    Args:
+        dataset_root: The dataset directory.
+        num_classes: Number of classes.
+        mode: which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
+        train_list: The train dataset file. When image_set is 'train', train_list is necessary.
+            The contents of train_list file are as follow:
+            image1.jpg ground_truth1.png
+            image2.jpg ground_truth2.png
+        val_list: The evaluation dataset file. When image_set is 'val', val_list is necessary.
+            The contents is the same as train_list
+        test_list: The test dataset file. When image_set is 'test', test_list is necessary.
+            The annotation file is not necessary in test_list file.
+        separator: The separator of dataset list. Default: ' '.
+        transforms: Transforms for image.
+
+        Examples:
+            todo
+
+    """
+
+    def __init__(self,
+                 dataset_root,
+                 num_classes,
+                 mode='train',
+                 train_list=None,
+                 val_list=None,
+                 test_list=None,
+                 separator=' ',
+                 transforms=None):
+        self.dataset_root = dataset_root
+        self.transforms = Compose(transforms)
+        self.file_list = list()
+        self.mode = mode
+        self.num_classes = num_classes
+
+        if mode.lower() not in ['train', 'val', 'test']:
+            raise Exception(
+                "mode should be 'train', 'val' or 'test', but got {}.".format(
+                    mode))
+
+        if self.transforms is None:
+            raise Exception("`transforms` is necessary, but it is None.")
+
+        self.dataset_root = dataset_root
+        if not os.path.exists(self.dataset_root):
+            raise Exception('there is not `dataset_root`: {}.'.format(
+                self.dataset_root))
+
+        if mode == 'train':
+            if train_list is None:
+                raise Exception(
+                    'When `mode` is "train", `train_list` is necessary, but it is None.'
+                )
+            elif not os.path.exists(train_list):
+                raise Exception(
+                    '`train_list` is not found: {}'.format(train_list))
+            else:
+                file_list = train_list
+        elif mode == 'val':
+            if val_list is None:
+                raise Exception(
+                    'When `mode` is "val", `val_list` is necessary, but it is None.'
+                )
+            elif not os.path.exists(val_list):
+                raise Exception('`val_list` is not found: {}'.format(val_list))
+            else:
+                file_list = val_list
+        else:
+            if test_list is None:
+                raise Exception(
+                    'When `mode` is "test", `test_list` is necessary, but it is None.'
+                )
+            elif not os.path.exists(test_list):
+                raise Exception(
+                    '`test_list` is not found: {}'.format(test_list))
+            else:
+                file_list = test_list
+
+        with open(file_list, 'r') as f:
+            for line in f:
+                items = line.strip().split(separator)
+                if len(items) != 2:
+                    if mode == 'train' or mode == 'val':
+                        raise Exception(
+                            "File list format incorrect! In training or evaluation task it should be"
+                            " image_name{}label_name\\n".format(separator))
+                    image_path = os.path.join(self.dataset_root, items[0])
+                    grt_path = None
+                else:
+                    image_path = os.path.join(self.dataset_root, items[0])
+                    grt_path = os.path.join(self.dataset_root, items[1])
+                self.file_list.append([image_path, grt_path])
+
+    def __getitem__(self, idx):
+        image_path, grt_path = self.file_list[idx]
+        if self.mode == 'test':
+            im, im_info, _ = self.transforms(im=image_path)
+            im = im[np.newaxis, ...]
+            return im, im_info, image_path
+        elif self.mode == 'val':
+            im, im_info, _ = self.transforms(im=image_path)
+            im = im[np.newaxis, ...]
+            label = np.asarray(Image.open(grt_path))
+            label = label[np.newaxis, np.newaxis, :, :]
+            return im, im_info, label
+        else:
+            im, im_info, label = self.transforms(im=image_path, label=grt_path)
+            return im, label
+
+    def __len__(self):
+        return len(self.file_list)
diff --git a/dygraph/paddleseg/datasets/optic_disc_seg.py b/dygraph/paddleseg/datasets/optic_disc_seg.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c1dedde24f0dd4a9bf9d922912da3c57bd37569
--- /dev/null
+++ b/dygraph/paddleseg/datasets/optic_disc_seg.py
@@ -0,0 +1,77 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from .dataset import Dataset
+from paddleseg.utils.download import download_file_and_uncompress
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
+
+DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
+URL = "https://paddleseg.bj.bcebos.com/dataset/optic_disc_seg.zip"
+
+
+@manager.DATASETS.add_component
+class OpticDiscSeg(Dataset):
+    def __init__(self,
+                 dataset_root=None,
+                 transforms=None,
+                 mode='train',
+                 download=True):
+        self.dataset_root = dataset_root
+        self.transforms = Compose(transforms)
+        self.file_list = list()
+        self.mode = mode
+        self.num_classes = 2
+
+        if mode.lower() not in ['train', 'val', 'test']:
+            raise Exception(
+                "`mode` should be 'train', 'val' or 'test', but got {}.".format(
+                    mode))
+
+        if self.transforms is None:
+            raise Exception("`transforms` is necessary, but it is None.")
+
+        if self.dataset_root is None:
+            if not download:
+                raise Exception(
+                    "`data_root` not set and auto download disabled.")
+            self.dataset_root = download_file_and_uncompress(
+                url=URL, savepath=DATA_HOME, extrapath=DATA_HOME)
+        elif not os.path.exists(self.dataset_root):
+            raise Exception('there is not `dataset_root`: {}.'.format(
+                self.dataset_root))
+
+        if mode == 'train':
+            file_list = os.path.join(self.dataset_root, 'train_list.txt')
+        elif mode == 'val':
+            file_list = os.path.join(self.dataset_root, 'val_list.txt')
+        else:
+            file_list = os.path.join(self.dataset_root, 'test_list.txt')
+
+        with open(file_list, 'r') as f:
+            for line in f:
+                items = line.strip().split()
+                if len(items) != 2:
+                    if mode == 'train' or mode == 'val':
+                        raise Exception(
+                            "File list format incorrect! It should be"
+                            " image_name label_name\\n")
+                    image_path = os.path.join(self.dataset_root, items[0])
+                    grt_path = None
+                else:
+                    image_path = os.path.join(self.dataset_root, items[0])
+                    grt_path = os.path.join(self.dataset_root, items[1])
+                self.file_list.append([image_path, grt_path])
diff --git a/dygraph/paddleseg/datasets/rice.py b/dygraph/paddleseg/datasets/rice.py
new file mode 100644
index 0000000000000000000000000000000000000000..f8041526fa2e265e0eac70709e9c295e860df9ad
--- /dev/null
+++ b/dygraph/paddleseg/datasets/rice.py
@@ -0,0 +1,56 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from .dataset import Dataset
+
+
+class Rice(Dataset):
+    def __init__(self, transforms=None, mode='train', download=True):
+        self.data_dir = "/mnt/liuyi22/PaddlePaddle/POC/rice_dataset"
+        self.transforms = transforms
+        self.file_list = list()
+        self.mode = mode
+        self.num_classes = 2
+
+        if mode.lower() not in ['train', 'eval', 'test']:
+            raise Exception(
+                "mode should be 'train', 'eval' or 'test', but got {}.".format(
+                    mode))
+
+        if self.transforms is None:
+            raise Exception("transform is necessary, but it is None.")
+
+        if mode == 'train':
+            file_list = os.path.join(self.data_dir, 'train_list.txt')
+        elif mode == 'eval':
+            file_list = os.path.join(self.data_dir, 'val_list.txt')
+        else:
+            file_list = os.path.join(self.data_dir, 'test_list.txt')
+
+        with open(file_list, 'r') as f:
+            for line in f:
+                items = line.strip().split()
+                if len(items) != 2:
+                    if mode == 'train' or mode == 'eval':
+                        raise Exception(
+                            "File list format incorrect! It should be"
+                            " image_name label_name\\n")
+                    image_path = os.path.join(self.data_dir, items[0])
+                    grt_path = None
+                else:
+                    image_path = os.path.join(self.data_dir, items[0])
+                    grt_path = os.path.join(self.data_dir, items[1])
+                self.file_list.append([image_path, grt_path])
diff --git a/dygraph/paddleseg/datasets/voc.py b/dygraph/paddleseg/datasets/voc.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6ac4b6a3e3540ae1b89fe0d1bac580acb0333e9
--- /dev/null
+++ b/dygraph/paddleseg/datasets/voc.py
@@ -0,0 +1,106 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from .dataset import Dataset
+from paddleseg.utils.download import download_file_and_uncompress
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
+
+DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
+URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar"
+
+
+@manager.DATASETS.add_component
+class PascalVOC(Dataset):
+    """Pascal VOC dataset `http://host.robots.ox.ac.uk/pascal/VOC/`. If you want to augment the dataset,
+    please run the voc_augment.py in tools.
+    Args:
+        dataset_root: The dataset directory.
+        mode: Which part of dataset to use.. it is one of ('train', 'val', 'test'). Default: 'train'.
+        transforms: Transforms for image.
+        download: Whether to download dataset if dataset_root is None.
+    """
+
+    def __init__(self,
+                 dataset_root=None,
+                 mode='train',
+                 transforms=None,
+                 download=True):
+        self.dataset_root = dataset_root
+        self.transforms = Compose(transforms)
+        self.mode = mode
+        self.file_list = list()
+        self.num_classes = 21
+
+        if mode.lower() not in ['train', 'trainval', 'trainaug', 'val']:
+            raise Exception(
+                "`mode` should be one of ('train', 'trainval', 'trainaug', 'val') in PascalVOC dataset, but got {}."
+                .format(mode))
+
+        if self.transforms is None:
+            raise Exception("`transforms` is necessary, but it is None.")
+
+        if self.dataset_root is None:
+            if not download:
+                raise Exception(
+                    "`dataset_root` not set and auto download disabled.")
+            self.dataset_root = download_file_and_uncompress(
+                url=URL,
+                savepath=DATA_HOME,
+                extrapath=DATA_HOME,
+                extraname='VOCdevkit')
+        elif not os.path.exists(self.dataset_root):
+            raise Exception('there is not `dataset_root`: {}.'.format(
+                self.dataset_root))
+
+        image_set_dir = os.path.join(self.dataset_root, 'VOC2012', 'ImageSets',
+                                     'Segmentation')
+        if mode == 'train':
+            file_list = os.path.join(image_set_dir, 'train.txt')
+        elif mode == 'val':
+            file_list = os.path.join(image_set_dir, 'val.txt')
+        elif mode == 'trainval':
+            file_list = os.path.join(image_set_dir, 'trainval.txt')
+        elif mode == 'trainaug':
+            file_list = os.path.join(image_set_dir, 'train.txt')
+            file_list_aug = os.path.join(image_set_dir, 'aug.txt')
+
+            if not os.path.exists(file_list_aug):
+                raise Exception(
+                    "When `mode` is 'trainaug', Pascal Voc dataset should be augmented, "
+                    "Please make sure voc_augment.py has been properly run when using this mode."
+                )
+
+        img_dir = os.path.join(self.dataset_root, 'VOC2012', 'JPEGImages')
+        grt_dir = os.path.join(self.dataset_root, 'VOC2012',
+                               'SegmentationClass')
+        grt_dir_aug = os.path.join(self.dataset_root, 'VOC2012',
+                                   'SegmentationClassAug')
+
+        with open(file_list, 'r') as f:
+            for line in f:
+                line = line.strip()
+                image_path = os.path.join(img_dir, ''.join([line, '.jpg']))
+                grt_path = os.path.join(grt_dir, ''.join([line, '.png']))
+                self.file_list.append([image_path, grt_path])
+        if mode == 'trainaug':
+            with open(file_list_aug, 'r') as f:
+                for line in f:
+                    line = line.strip()
+                    image_path = os.path.join(img_dir, ''.join([line, '.jpg']))
+                    grt_path = os.path.join(grt_dir_aug, ''.join([line,
+                                                                  '.png']))
+                    self.file_list.append([image_path, grt_path])
diff --git a/dygraph/paddleseg/models/__init__.py b/dygraph/paddleseg/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..939b855d5aababdf06216fbd29d3cd7334db7823
--- /dev/null
+++ b/dygraph/paddleseg/models/__init__.py
@@ -0,0 +1,24 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .backbones import *
+from .losses import *
+from .unet import UNet
+from .deeplab import *
+from .fcn import *
+from .pspnet import *
+from .ocrnet import *
+from .fast_scnn import *
+from .gcnet import *
+from .ann import *
diff --git a/dygraph/paddleseg/models/ann.py b/dygraph/paddleseg/models/ann.py
new file mode 100644
index 0000000000000000000000000000000000000000..48c381d26308ac6c6632abcd202b84409e22e7f7
--- /dev/null
+++ b/dygraph/paddleseg/models/ann.py
@@ -0,0 +1,439 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import paddle
+import paddle.nn.functional as F
+from paddle import nn
+from paddleseg.cvlibs import manager
+from paddleseg.models.common import layer_utils, model_utils
+from paddleseg.utils import utils
+
+
+@manager.MODELS.add_component
+class ANN(nn.Layer):
+    """
+    The ANN implementation based on PaddlePaddle.
+
+    The orginal artile refers to 
+        Zhen, Zhu, et al. "Asymmetric Non-local Neural Networks for Semantic Segmentation."
+        (https://arxiv.org/pdf/1908.07678.pdf)
+
+    It mainly consists of AFNB and APNB modules.
+
+    Args:
+
+        num_classes (int): the unique number of target classes.
+
+        backbone (Paddle.nn.Layer): backbone network, currently support Resnet50/101.
+
+        model_pretrained (str): the path of pretrained model. Defaullt to None.
+
+        backbone_indices (tuple): two values in the tuple indicte the indices of output of backbone.
+                        the first index will be taken as low-level features; the second one will be 
+                        taken as high-level features in AFNB module. Usually backbone consists of four 
+                        downsampling stage, and return an output of each stage, so we set default (2, 3), 
+                        which means taking feature map of the third stage and the fourth stage in backbone.
+
+        backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index.
+
+        key_value_channels (int): the key and value channels of self-attention map in both AFNB and APNB modules.
+        Default to 256.
+
+        inter_channels (int): both input and output channels of APNB modules.
+
+        psp_size (tuple): the out size of pooled feature maps. Default to (1, 3, 6, 8).
+
+        enable_auxiliary_loss (bool): a bool values indictes whether adding auxiliary loss. Default to True.
+
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 model_pretrained=None,
+                 backbone_indices=(2, 3),
+                 backbone_channels=(1024, 2048),
+                 key_value_channels=256,
+                 inter_channels=512,
+                 psp_size=(1, 3, 6, 8),
+                 enable_auxiliary_loss=True):
+        super(ANN, self).__init__()
+
+        self.backbone = backbone
+
+        low_in_channels = backbone_channels[0]
+        high_in_channels = backbone_channels[1]
+
+        self.fusion = AFNB(
+            low_in_channels=low_in_channels,
+            high_in_channels=high_in_channels,
+            out_channels=high_in_channels,
+            key_channels=key_value_channels,
+            value_channels=key_value_channels,
+            dropout_prob=0.05,
+            sizes=([1]),
+            psp_size=psp_size)
+
+        self.context = nn.Sequential(
+            layer_utils.ConvBnRelu(
+                in_channels=high_in_channels,
+                out_channels=inter_channels,
+                kernel_size=3,
+                padding=1),
+            APNB(
+                in_channels=inter_channels,
+                out_channels=inter_channels,
+                key_channels=key_value_channels,
+                value_channels=key_value_channels,
+                dropout_prob=0.05,
+                sizes=([1]),
+                psp_size=psp_size))
+
+        self.cls = nn.Conv2d(
+            in_channels=inter_channels,
+            out_channels=num_classes,
+            kernel_size=1)
+        self.auxlayer = model_utils.AuxLayer(
+            in_channels=low_in_channels,
+            inter_channels=low_in_channels // 2,
+            out_channels=num_classes,
+            dropout_prob=0.05)
+
+        self.backbone_indices = backbone_indices
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+
+        self.init_weight(model_pretrained)
+
+    def forward(self, input, label=None):
+
+        logit_list = []
+        _, feat_list = self.backbone(input)
+        low_level_x = feat_list[self.backbone_indices[0]]
+        high_level_x = feat_list[self.backbone_indices[1]]
+        x = self.fusion(low_level_x, high_level_x)
+        x = self.context(x)
+        logit = self.cls(x)
+        logit = F.resize_bilinear(logit, input.shape[2:])
+        logit_list.append(logit)
+
+        if self.enable_auxiliary_loss:
+            auxiliary_logit = self.auxlayer(low_level_x)
+            auxiliary_logit = F.resize_bilinear(auxiliary_logit, input.shape[2:])
+            logit_list.append(auxiliary_logit)
+
+        return logit_list
+
+    def init_weight(self, pretrained_model=None):
+        """
+        Initialize the parameters of model parts.
+
+        Args:
+            pretrained_model ([str], optional): the pretrained_model path of backbone. Defaults to None.
+        """
+
+        if pretrained_model is not None:
+            if os.path.exists(pretrained_model):
+                utils.load_pretrained_model(self.backbone, pretrained_model)
+
+
+class AFNB(nn.Layer):
+    """
+    Asymmetric Fusion Non-local Block
+
+    Args:
+        low_in_channels (int): low-level-feature channels.
+
+        high_in_channels (int): high-level-feature channels.
+
+        out_channels (int): out channels of AFNB module.
+
+        key_channels (int): the key channels in self-attention block.
+
+        value_channels (int): the value channels in self-attention block.
+
+        dropout_prob (float): the dropout rate of output.
+
+        sizes (tuple): the number of AFNB modules. Default to ([1]).
+
+        psp_size (tuple): the out size of pooled feature maps. Default to (1, 3, 6, 8).
+
+    """
+
+    def __init__(self,
+                 low_in_channels,
+                 high_in_channels,
+                 out_channels,
+                 key_channels,
+                 value_channels,
+                 dropout_prob,
+                 sizes=([1]),
+                 psp_size=(1, 3, 6, 8)):
+        super(AFNB, self).__init__()
+
+        self.psp_size = psp_size
+        self.stages = nn.LayerList([
+            SelfAttentionBlock_AFNB(low_in_channels, high_in_channels,
+                                    key_channels, value_channels, out_channels,
+                                    size) for size in sizes
+        ])
+        self.conv_bn = layer_utils.ConvBn(
+            in_channels=out_channels + high_in_channels,
+            out_channels=out_channels,
+            kernel_size=1)
+        self.dropout_prob = dropout_prob
+
+    def forward(self, low_feats, high_feats):
+        priors = [stage(low_feats, high_feats) for stage in self.stages]
+        context = priors[0]
+        for i in range(1, len(priors)):
+            context += priors[i]
+
+        output = self.conv_bn(paddle.concat([context, high_feats], axis=1))
+        output = F.dropout(output, p=self.dropout_prob)  # dropout_prob
+
+        return output
+
+
+class APNB(nn.Layer):
+    """
+    Asymmetric Pyramid Non-local Block
+
+    Args:
+        in_channels (int): the input channels of APNB module.
+
+        out_channels (int): out channels of APNB module.
+
+        key_channels (int): the key channels in self-attention block.
+
+        value_channels (int): the value channels in self-attention block.
+
+        dropout_prob (float): the dropout rate of output.
+
+        sizes (tuple): the number of AFNB modules. Default to ([1]).
+
+        psp_size (tuple): the out size of pooled feature maps. Default to (1, 3, 6, 8).
+
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 key_channels,
+                 value_channels,
+                 dropout_prob,
+                 sizes=([1]),
+                 psp_size=(1, 3, 6, 8)):
+        super(APNB, self).__init__()
+
+        self.psp_size = psp_size
+        self.stages = nn.LayerList([
+            SelfAttentionBlock_APNB(in_channels, out_channels, key_channels,
+                                    value_channels, size) for size in sizes
+        ])
+        self.conv_bn = layer_utils.ConvBnRelu(
+            in_channels=in_channels * 2,
+            out_channels=out_channels,
+            kernel_size=1)
+        self.dropout_prob = dropout_prob
+
+    def forward(self, feats):
+        priors = [stage(feats) for stage in self.stages]
+        context = priors[0]
+        for i in range(1, len(priors)):
+            context += priors[i]
+
+        output = self.conv_bn(paddle.concat([context, feats], axis=1))
+        output = F.dropout(output, p=self.dropout_prob)  # dropout_prob
+
+        return output
+
+
+def _pp_module(x, psp_size):
+    n, c, h, w = x.shape
+    priors = []
+    for size in psp_size:
+        feat = F.adaptive_pool2d(x, pool_size=size, pool_type="avg")
+        feat = paddle.reshape(feat, shape=(n, c, -1))
+        priors.append(feat)
+    center = paddle.concat(priors, axis=-1)
+    return center
+
+
+class SelfAttentionBlock_AFNB(nn.Layer):
+    """
+    Self-Attention Block for AFNB module.
+
+    Args:
+        low_in_channels (int): low-level-feature channels.
+
+        high_in_channels (int): high-level-feature channels.
+
+        key_channels (int): the key channels in self-attention block.
+
+        value_channels (int): the value channels in self-attention block.
+
+        out_channels (int): out channels of AFNB module.
+
+        scale (int): pooling size. Defaut to 1.
+
+        psp_size (tuple): the out size of pooled feature maps. Default to (1, 3, 6, 8).
+    """
+
+    def __init__(self,
+                 low_in_channels,
+                 high_in_channels,
+                 key_channels,
+                 value_channels,
+                 out_channels=None,
+                 scale=1,
+                 psp_size=(1, 3, 6, 8)):
+        super(SelfAttentionBlock_AFNB, self).__init__()
+
+        self.scale = scale
+        self.in_channels = low_in_channels
+        self.out_channels = out_channels
+        self.key_channels = key_channels
+        self.value_channels = value_channels
+        if out_channels == None:
+            self.out_channels = high_in_channels
+        self.pool = nn.Pool2D(pool_size=(scale, scale), pool_type="max")
+        self.f_key = layer_utils.ConvBnRelu(
+            in_channels=low_in_channels,
+            out_channels=key_channels,
+            kernel_size=1)
+        self.f_query = layer_utils.ConvBnRelu(
+            in_channels=high_in_channels,
+            out_channels=key_channels,
+            kernel_size=1)
+        self.f_value = nn.Conv2d(
+            in_channels=low_in_channels,
+            out_channels=value_channels,
+            kernel_size=1)
+
+        self.W = nn.Conv2d(
+            in_channels=value_channels,
+            out_channels=out_channels,
+            kernel_size=1)
+
+        self.psp_size = psp_size
+
+    def forward(self, low_feats, high_feats):
+        batch_size, _, h, w = high_feats.shape
+
+        value = self.f_value(low_feats)
+        value = _pp_module(value, self.psp_size)
+        value = paddle.transpose(value, (0, 2, 1))
+
+        query = self.f_query(high_feats)
+        query = paddle.reshape(query, shape=(batch_size, self.key_channels, -1))
+        query = paddle.transpose(query, perm=(0, 2, 1))
+
+        key = self.f_key(low_feats)
+        key = _pp_module(key, self.psp_size)
+
+        sim_map = paddle.matmul(query, key)
+        sim_map = (self.key_channels ** -.5) * sim_map
+        sim_map = F.softmax(sim_map, axis=-1)
+
+        context = paddle.matmul(sim_map, value)
+        context = paddle.transpose(context, perm=(0, 2, 1))
+        context = paddle.reshape(
+            context,
+            shape=[batch_size, self.value_channels, *high_feats.shape[2:]])
+
+        context = self.W(context)
+
+        return context
+
+
+class SelfAttentionBlock_APNB(nn.Layer):
+    """
+    Self-Attention Block for APNB module.
+
+    Args:
+        in_channels (int): the input channels of APNB module.
+
+        out_channels (int): out channels of APNB module.
+
+        key_channels (int): the key channels in self-attention block.
+
+        value_channels (int): the value channels in self-attention block.
+
+        scale (int): pooling size. Defaut to 1.
+
+        psp_size (tuple): the out size of pooled feature maps. Default to (1, 3, 6, 8).
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 key_channels,
+                 value_channels,
+                 scale=1,
+                 psp_size=(1, 3, 6, 8)):
+        super(SelfAttentionBlock_APNB, self).__init__()
+
+        self.scale = scale
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.key_channels = key_channels
+        self.value_channels = value_channels
+
+        self.pool = nn.Pool2D(pool_size=(scale, scale), pool_type="max")
+        self.f_key = layer_utils.ConvBnRelu(
+            in_channels=self.in_channels,
+            out_channels=self.key_channels,
+            kernel_size=1)
+        self.f_query = self.f_key
+        self.f_value = nn.Conv2d(
+            in_channels=self.in_channels,
+            out_channels=self.value_channels,
+            kernel_size=1)
+        self.W = nn.Conv2d(
+            in_channels=self.value_channels,
+            out_channels=self.out_channels,
+            kernel_size=1)
+
+        self.psp_size = psp_size
+
+    def forward(self, x):
+        batch_size, _, h, w = x.shape
+        if self.scale > 1:
+            x = self.pool(x)
+
+        value = self.f_value(x)
+        value = _pp_module(value, self.psp_size)
+        value = paddle.transpose(value, perm=(0, 2, 1))
+
+        query = self.f_query(x)
+        query = paddle.reshape(
+            query, shape=(batch_size, self.key_channels, -1))
+        query = paddle.transpose(query, perm=(0, 2, 1))
+
+        key = self.f_key(x)
+        key = _pp_module(key, self.psp_size)
+
+        sim_map = paddle.matmul(query, key)
+        sim_map = (self.key_channels ** -.5) * sim_map
+        sim_map = F.softmax(sim_map, axis=-1)
+
+        context = paddle.matmul(sim_map, value)
+        context = paddle.transpose(context, perm=(0, 2, 1))
+        context = paddle.reshape(
+            context, shape=[batch_size, self.value_channels, *x.shape[2:]])
+        context = self.W(context)
+
+        return context
diff --git a/dygraph/paddleseg/models/backbones/__init__.py b/dygraph/paddleseg/models/backbones/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8bc32c14b408c2048a394eb1fbf525c5fe91ffa7
--- /dev/null
+++ b/dygraph/paddleseg/models/backbones/__init__.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .hrnet import *
+from .resnet_vd import *
+from .xception_deeplab import *
+from .mobilenetv3 import *
diff --git a/dygraph/paddleseg/models/backbones/hrnet.py b/dygraph/paddleseg/models/backbones/hrnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..d66f1c6de5efc96adf9f919583e4ccafda986222
--- /dev/null
+++ b/dygraph/paddleseg/models/backbones/hrnet.py
@@ -0,0 +1,850 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import os
+
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.param_attr import ParamAttr
+from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
+from paddle.fluid.initializer import Normal
+from paddle.nn import SyncBatchNorm as BatchNorm
+
+from paddleseg.cvlibs import manager
+from paddleseg.utils import utils
+from paddleseg.cvlibs import param_init
+
+__all__ = [
+    "HRNet_W18_Small_V1", "HRNet_W18_Small_V2", "HRNet_W18", "HRNet_W30",
+    "HRNet_W32", "HRNet_W40", "HRNet_W44", "HRNet_W48", "HRNet_W60", "HRNet_W64"
+]
+
+
+class HRNet(fluid.dygraph.Layer):
+    """
+    HRNet：Deep High-Resolution Representation Learning for Visual Recognition
+    https://arxiv.org/pdf/1908.07919.pdf.
+
+    Args:
+        backbone_pretrained (str): the path of pretrained model.
+        stage1_num_modules (int): number of modules for stage1. Default 1.
+        stage1_num_blocks (list): number of blocks per module for stage1. Default [4].
+        stage1_num_channels (list): number of channels per branch for stage1. Default [64].
+        stage2_num_modules (int): number of modules for stage2. Default 1.
+        stage2_num_blocks (list): number of blocks per module for stage2. Default [4, 4]
+        stage2_num_channels (list): number of channels per branch for stage2. Default [18, 36].
+        stage3_num_modules (int): number of modules for stage3. Default 4.
+        stage3_num_blocks (list): number of blocks per module for stage3. Default [4, 4, 4]
+        stage3_num_channels (list): number of channels per branch for stage3. Default [18, 36, 72].
+        stage4_num_modules (int): number of modules for stage4. Default 3.
+        stage4_num_blocks (list): number of blocks per module for stage4. Default [4, 4, 4, 4]
+        stage4_num_channels (list): number of channels per branch for stage4. Default [18, 36, 72. 144].
+        has_se (bool): whether to use Squeeze-and-Excitation module. Default False.
+    """
+
+    def __init__(self,
+                 stage1_num_modules=1,
+                 stage1_num_blocks=[4],
+                 stage1_num_channels=[64],
+                 stage2_num_modules=1,
+                 stage2_num_blocks=[4, 4],
+                 stage2_num_channels=[18, 36],
+                 stage3_num_modules=4,
+                 stage3_num_blocks=[4, 4, 4],
+                 stage3_num_channels=[18, 36, 72],
+                 stage4_num_modules=3,
+                 stage4_num_blocks=[4, 4, 4, 4],
+                 stage4_num_channels=[18, 36, 72, 144],
+                 has_se=False):
+        super(HRNet, self).__init__()
+
+        self.stage1_num_modules = stage1_num_modules
+        self.stage1_num_blocks = stage1_num_blocks
+        self.stage1_num_channels = stage1_num_channels
+        self.stage2_num_modules = stage2_num_modules
+        self.stage2_num_blocks = stage2_num_blocks
+        self.stage2_num_channels = stage2_num_channels
+        self.stage3_num_modules = stage3_num_modules
+        self.stage3_num_blocks = stage3_num_blocks
+        self.stage3_num_channels = stage3_num_channels
+        self.stage4_num_modules = stage4_num_modules
+        self.stage4_num_blocks = stage4_num_blocks
+        self.stage4_num_channels = stage4_num_channels
+        self.has_se = has_se
+
+        self.conv_layer1_1 = ConvBNLayer(
+            num_channels=3,
+            num_filters=64,
+            filter_size=3,
+            stride=2,
+            act='relu',
+            name="layer1_1")
+
+        self.conv_layer1_2 = ConvBNLayer(
+            num_channels=64,
+            num_filters=64,
+            filter_size=3,
+            stride=2,
+            act='relu',
+            name="layer1_2")
+
+        self.la1 = Layer1(
+            num_channels=64,
+            num_blocks=self.stage1_num_blocks[0],
+            num_filters=self.stage1_num_channels[0],
+            has_se=has_se,
+            name="layer2")
+
+        self.tr1 = TransitionLayer(
+            in_channels=[self.stage1_num_channels[0] * 4],
+            out_channels=self.stage2_num_channels,
+            name="tr1")
+
+        self.st2 = Stage(
+            num_channels=self.stage2_num_channels,
+            num_modules=self.stage2_num_modules,
+            num_blocks=self.stage2_num_blocks,
+            num_filters=self.stage2_num_channels,
+            has_se=self.has_se,
+            name="st2")
+
+        self.tr2 = TransitionLayer(
+            in_channels=self.stage2_num_channels,
+            out_channels=self.stage3_num_channels,
+            name="tr2")
+        self.st3 = Stage(
+            num_channels=self.stage3_num_channels,
+            num_modules=self.stage3_num_modules,
+            num_blocks=self.stage3_num_blocks,
+            num_filters=self.stage3_num_channels,
+            has_se=self.has_se,
+            name="st3")
+
+        self.tr3 = TransitionLayer(
+            in_channels=self.stage3_num_channels,
+            out_channels=self.stage4_num_channels,
+            name="tr3")
+        self.st4 = Stage(
+            num_channels=self.stage4_num_channels,
+            num_modules=self.stage4_num_modules,
+            num_blocks=self.stage4_num_blocks,
+            num_filters=self.stage4_num_channels,
+            has_se=self.has_se,
+            name="st4")
+
+    def forward(self, x, label=None, mode='train'):
+        input_shape = x.shape[2:]
+        conv1 = self.conv_layer1_1(x)
+        conv2 = self.conv_layer1_2(conv1)
+
+        la1 = self.la1(conv2)
+
+        tr1 = self.tr1([la1])
+        st2 = self.st2(tr1)
+
+        tr2 = self.tr2(st2)
+        st3 = self.st3(tr2)
+
+        tr3 = self.tr3(st3)
+        st4 = self.st4(tr3)
+
+        x0_h, x0_w = st4[0].shape[2:]
+        x1 = fluid.layers.resize_bilinear(st4[1], out_shape=(x0_h, x0_w))
+        x2 = fluid.layers.resize_bilinear(st4[2], out_shape=(x0_h, x0_w))
+        x3 = fluid.layers.resize_bilinear(st4[3], out_shape=(x0_h, x0_w))
+        x = fluid.layers.concat([st4[0], x1, x2, x3], axis=1)
+
+        return [x]
+
+
+class ConvBNLayer(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 filter_size,
+                 stride=1,
+                 groups=1,
+                 act="relu",
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+
+        self._conv = Conv2D(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2,
+            groups=groups,
+            bias_attr=False)
+        self._batch_norm = BatchNorm(num_filters)
+        self.act = act
+
+    def forward(self, input):
+        y = self._conv(input)
+        y = self._batch_norm(y)
+        if self.act == 'relu':
+            y = fluid.layers.relu(y)
+        return y
+
+
+class Layer1(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 num_blocks,
+                 has_se=False,
+                 name=None):
+        super(Layer1, self).__init__()
+
+        self.bottleneck_block_list = []
+
+        for i in range(num_blocks):
+            bottleneck_block = self.add_sublayer(
+                "bb_{}_{}".format(name, i + 1),
+                BottleneckBlock(
+                    num_channels=num_channels if i == 0 else num_filters * 4,
+                    num_filters=num_filters,
+                    has_se=has_se,
+                    stride=1,
+                    downsample=True if i == 0 else False,
+                    name=name + '_' + str(i + 1)))
+            self.bottleneck_block_list.append(bottleneck_block)
+
+    def forward(self, input):
+        conv = input
+        for block_func in self.bottleneck_block_list:
+            conv = block_func(conv)
+        return conv
+
+
+class TransitionLayer(fluid.dygraph.Layer):
+    def __init__(self, in_channels, out_channels, name=None):
+        super(TransitionLayer, self).__init__()
+
+        num_in = len(in_channels)
+        num_out = len(out_channels)
+        self.conv_bn_func_list = []
+        for i in range(num_out):
+            residual = None
+            if i < num_in:
+                if in_channels[i] != out_channels[i]:
+                    residual = self.add_sublayer(
+                        "transition_{}_layer_{}".format(name, i + 1),
+                        ConvBNLayer(
+                            num_channels=in_channels[i],
+                            num_filters=out_channels[i],
+                            filter_size=3,
+                            name=name + '_layer_' + str(i + 1)))
+            else:
+                residual = self.add_sublayer(
+                    "transition_{}_layer_{}".format(name, i + 1),
+                    ConvBNLayer(
+                        num_channels=in_channels[-1],
+                        num_filters=out_channels[i],
+                        filter_size=3,
+                        stride=2,
+                        name=name + '_layer_' + str(i + 1)))
+            self.conv_bn_func_list.append(residual)
+
+    def forward(self, input):
+        outs = []
+        for idx, conv_bn_func in enumerate(self.conv_bn_func_list):
+            if conv_bn_func is None:
+                outs.append(input[idx])
+            else:
+                if idx < len(input):
+                    outs.append(conv_bn_func(input[idx]))
+                else:
+                    outs.append(conv_bn_func(input[-1]))
+        return outs
+
+
+class Branches(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_blocks,
+                 in_channels,
+                 out_channels,
+                 has_se=False,
+                 name=None):
+        super(Branches, self).__init__()
+
+        self.basic_block_list = []
+
+        for i in range(len(out_channels)):
+            self.basic_block_list.append([])
+            for j in range(num_blocks[i]):
+                in_ch = in_channels[i] if j == 0 else out_channels[i]
+                basic_block_func = self.add_sublayer(
+                    "bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1),
+                    BasicBlock(
+                        num_channels=in_ch,
+                        num_filters=out_channels[i],
+                        has_se=has_se,
+                        name=name + '_branch_layer_' + str(i + 1) + '_' +
+                        str(j + 1)))
+                self.basic_block_list[i].append(basic_block_func)
+
+    def forward(self, inputs):
+        outs = []
+        for idx, input in enumerate(inputs):
+            conv = input
+            for basic_block_func in self.basic_block_list[idx]:
+                conv = basic_block_func(conv)
+            outs.append(conv)
+        return outs
+
+
+class BottleneckBlock(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 has_se,
+                 stride=1,
+                 downsample=False,
+                 name=None):
+        super(BottleneckBlock, self).__init__()
+
+        self.has_se = has_se
+        self.downsample = downsample
+
+        self.conv1 = ConvBNLayer(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=1,
+            act="relu",
+            name=name + "_conv1",
+        )
+        self.conv2 = ConvBNLayer(
+            num_channels=num_filters,
+            num_filters=num_filters,
+            filter_size=3,
+            stride=stride,
+            act="relu",
+            name=name + "_conv2")
+        self.conv3 = ConvBNLayer(
+            num_channels=num_filters,
+            num_filters=num_filters * 4,
+            filter_size=1,
+            act=None,
+            name=name + "_conv3")
+
+        if self.downsample:
+            self.conv_down = ConvBNLayer(
+                num_channels=num_channels,
+                num_filters=num_filters * 4,
+                filter_size=1,
+                act=None,
+                name=name + "_downsample")
+
+        if self.has_se:
+            self.se = SELayer(
+                num_channels=num_filters * 4,
+                num_filters=num_filters * 4,
+                reduction_ratio=16,
+                name=name + '_fc')
+
+    def forward(self, input):
+        residual = input
+        conv1 = self.conv1(input)
+        conv2 = self.conv2(conv1)
+        conv3 = self.conv3(conv2)
+
+        if self.downsample:
+            residual = self.conv_down(input)
+
+        if self.has_se:
+            conv3 = self.se(conv3)
+
+        y = fluid.layers.elementwise_add(x=conv3, y=residual, act="relu")
+        return y
+
+
+class BasicBlock(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 stride=1,
+                 has_se=False,
+                 downsample=False,
+                 name=None):
+        super(BasicBlock, self).__init__()
+
+        self.has_se = has_se
+        self.downsample = downsample
+
+        self.conv1 = ConvBNLayer(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=3,
+            stride=stride,
+            act="relu",
+            name=name + "_conv1")
+        self.conv2 = ConvBNLayer(
+            num_channels=num_filters,
+            num_filters=num_filters,
+            filter_size=3,
+            stride=1,
+            act=None,
+            name=name + "_conv2")
+
+        if self.downsample:
+            self.conv_down = ConvBNLayer(
+                num_channels=num_channels,
+                num_filters=num_filters * 4,
+                filter_size=1,
+                act="relu",
+                name=name + "_downsample")
+
+        if self.has_se:
+            self.se = SELayer(
+                num_channels=num_filters,
+                num_filters=num_filters,
+                reduction_ratio=16,
+                name=name + '_fc')
+
+    def forward(self, input):
+        residual = input
+        conv1 = self.conv1(input)
+        conv2 = self.conv2(conv1)
+
+        if self.downsample:
+            residual = self.conv_down(input)
+
+        if self.has_se:
+            conv2 = self.se(conv2)
+
+        y = fluid.layers.elementwise_add(x=conv2, y=residual, act="relu")
+        return y
+
+
+class SELayer(fluid.dygraph.Layer):
+    def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
+        super(SELayer, self).__init__()
+
+        self.pool2d_gap = Pool2D(pool_type='avg', global_pooling=True)
+
+        self._num_channels = num_channels
+
+        med_ch = int(num_channels / reduction_ratio)
+        stdv = 1.0 / math.sqrt(num_channels * 1.0)
+        self.squeeze = Linear(
+            num_channels,
+            med_ch,
+            act="relu",
+            param_attr=ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=name + "_sqz_weights"),
+            bias_attr=ParamAttr(name=name + '_sqz_offset'))
+
+        stdv = 1.0 / math.sqrt(med_ch * 1.0)
+        self.excitation = Linear(
+            med_ch,
+            num_filters,
+            act="sigmoid",
+            param_attr=ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=name + "_exc_weights"),
+            bias_attr=ParamAttr(name=name + '_exc_offset'))
+
+    def forward(self, input):
+        pool = self.pool2d_gap(input)
+        pool = fluid.layers.reshape(pool, shape=[-1, self._num_channels])
+        squeeze = self.squeeze(pool)
+        excitation = self.excitation(squeeze)
+        excitation = fluid.layers.reshape(
+            excitation, shape=[-1, self._num_channels, 1, 1])
+        out = input * excitation
+        return out
+
+
+class Stage(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_modules,
+                 num_blocks,
+                 num_filters,
+                 has_se=False,
+                 multi_scale_output=True,
+                 name=None):
+        super(Stage, self).__init__()
+
+        self._num_modules = num_modules
+
+        self.stage_func_list = []
+        for i in range(num_modules):
+            if i == num_modules - 1 and not multi_scale_output:
+                stage_func = self.add_sublayer(
+                    "stage_{}_{}".format(name, i + 1),
+                    HighResolutionModule(
+                        num_channels=num_channels,
+                        num_blocks=num_blocks,
+                        num_filters=num_filters,
+                        has_se=has_se,
+                        multi_scale_output=False,
+                        name=name + '_' + str(i + 1)))
+            else:
+                stage_func = self.add_sublayer(
+                    "stage_{}_{}".format(name, i + 1),
+                    HighResolutionModule(
+                        num_channels=num_channels,
+                        num_blocks=num_blocks,
+                        num_filters=num_filters,
+                        has_se=has_se,
+                        name=name + '_' + str(i + 1)))
+
+            self.stage_func_list.append(stage_func)
+
+    def forward(self, input):
+        out = input
+        for idx in range(self._num_modules):
+            out = self.stage_func_list[idx](out)
+        return out
+
+
+class HighResolutionModule(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_blocks,
+                 num_filters,
+                 has_se=False,
+                 multi_scale_output=True,
+                 name=None):
+        super(HighResolutionModule, self).__init__()
+
+        self.branches_func = Branches(
+            num_blocks=num_blocks,
+            in_channels=num_channels,
+            out_channels=num_filters,
+            has_se=has_se,
+            name=name)
+
+        self.fuse_func = FuseLayers(
+            in_channels=num_filters,
+            out_channels=num_filters,
+            multi_scale_output=multi_scale_output,
+            name=name)
+
+    def forward(self, input):
+        out = self.branches_func(input)
+        out = self.fuse_func(out)
+        return out
+
+
+class FuseLayers(fluid.dygraph.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 multi_scale_output=True,
+                 name=None):
+        super(FuseLayers, self).__init__()
+
+        self._actual_ch = len(in_channels) if multi_scale_output else 1
+        self._in_channels = in_channels
+
+        self.residual_func_list = []
+        for i in range(self._actual_ch):
+            for j in range(len(in_channels)):
+                residual_func = None
+                if j > i:
+                    residual_func = self.add_sublayer(
+                        "residual_{}_layer_{}_{}".format(name, i + 1, j + 1),
+                        ConvBNLayer(
+                            num_channels=in_channels[j],
+                            num_filters=out_channels[i],
+                            filter_size=1,
+                            stride=1,
+                            act=None,
+                            name=name + '_layer_' + str(i + 1) + '_' +
+                            str(j + 1)))
+                    self.residual_func_list.append(residual_func)
+                elif j < i:
+                    pre_num_filters = in_channels[j]
+                    for k in range(i - j):
+                        if k == i - j - 1:
+                            residual_func = self.add_sublayer(
+                                "residual_{}_layer_{}_{}_{}".format(
+                                    name, i + 1, j + 1, k + 1),
+                                ConvBNLayer(
+                                    num_channels=pre_num_filters,
+                                    num_filters=out_channels[i],
+                                    filter_size=3,
+                                    stride=2,
+                                    act=None,
+                                    name=name + '_layer_' + str(i + 1) + '_' +
+                                    str(j + 1) + '_' + str(k + 1)))
+                            pre_num_filters = out_channels[i]
+                        else:
+                            residual_func = self.add_sublayer(
+                                "residual_{}_layer_{}_{}_{}".format(
+                                    name, i + 1, j + 1, k + 1),
+                                ConvBNLayer(
+                                    num_channels=pre_num_filters,
+                                    num_filters=out_channels[j],
+                                    filter_size=3,
+                                    stride=2,
+                                    act="relu",
+                                    name=name + '_layer_' + str(i + 1) + '_' +
+                                    str(j + 1) + '_' + str(k + 1)))
+                            pre_num_filters = out_channels[j]
+                        self.residual_func_list.append(residual_func)
+
+    def forward(self, input):
+        outs = []
+        residual_func_idx = 0
+        for i in range(self._actual_ch):
+            residual = input[i]
+            residual_shape = residual.shape[-2:]
+            for j in range(len(self._in_channels)):
+                if j > i:
+                    y = self.residual_func_list[residual_func_idx](input[j])
+                    residual_func_idx += 1
+
+                    y = fluid.layers.resize_bilinear(
+                        input=y, out_shape=residual_shape)
+                    residual = fluid.layers.elementwise_add(
+                        x=residual, y=y, act=None)
+                elif j < i:
+                    y = input[j]
+                    for k in range(i - j):
+                        y = self.residual_func_list[residual_func_idx](y)
+                        residual_func_idx += 1
+
+                    residual = fluid.layers.elementwise_add(
+                        x=residual, y=y, act=None)
+
+            layer_helper = LayerHelper(self.full_name(), act='relu')
+            residual = layer_helper.append_activation(residual)
+            outs.append(residual)
+
+        return outs
+
+
+class LastClsOut(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channel_list,
+                 has_se,
+                 num_filters_list=[32, 64, 128, 256],
+                 name=None):
+        super(LastClsOut, self).__init__()
+
+        self.func_list = []
+        for idx in range(len(num_channel_list)):
+            func = self.add_sublayer(
+                "conv_{}_conv_{}".format(name, idx + 1),
+                BottleneckBlock(
+                    num_channels=num_channel_list[idx],
+                    num_filters=num_filters_list[idx],
+                    has_se=has_se,
+                    downsample=True,
+                    name=name + 'conv_' + str(idx + 1)))
+            self.func_list.append(func)
+
+    def forward(self, inputs):
+        outs = []
+        for idx, input in enumerate(inputs):
+            out = self.func_list[idx](input)
+            outs.append(out)
+        return outs
+
+
+@manager.BACKBONES.add_component
+def HRNet_W18_Small_V1(**kwargs):
+    model = HRNet(
+        stage1_num_modules=1,
+        stage1_num_blocks=[1],
+        stage1_num_channels=[32],
+        stage2_num_modules=1,
+        stage2_num_blocks=[2, 2],
+        stage2_num_channels=[16, 32],
+        stage3_num_modules=1,
+        stage3_num_blocks=[2, 2, 2],
+        stage3_num_channels=[16, 32, 64],
+        stage4_num_modules=1,
+        stage4_num_blocks=[2, 2, 2, 2],
+        stage4_num_channels=[16, 32, 64, 128],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def HRNet_W18_Small_V2(**kwargs):
+    model = HRNet(
+        stage1_num_modules=1,
+        stage1_num_blocks=[2],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[2, 2],
+        stage2_num_channels=[18, 36],
+        stage3_num_modules=1,
+        stage3_num_blocks=[2, 2, 2],
+        stage3_num_channels=[18, 36, 72],
+        stage4_num_modules=1,
+        stage4_num_blocks=[2, 2, 2, 2],
+        stage4_num_channels=[18, 36, 72, 144],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def HRNet_W18(**kwargs):
+    model = HRNet(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[18, 36],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[18, 36, 72],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[18, 36, 72, 144],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def HRNet_W30(**kwargs):
+    model = HRNet(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[30, 60],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[30, 60, 120],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[30, 60, 120, 240],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def HRNet_W32(**kwargs):
+    model = HRNet(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[32, 64],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[32, 64, 128],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[32, 64, 128, 256],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def HRNet_W40(**kwargs):
+    model = HRNet(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[40, 80],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[40, 80, 160],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[40, 80, 160, 320],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def HRNet_W44(**kwargs):
+    model = HRNet(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[44, 88],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[44, 88, 176],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[44, 88, 176, 352],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def HRNet_W48(**kwargs):
+    model = HRNet(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[48, 96],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[48, 96, 192],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[48, 96, 192, 384],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def HRNet_W60(**kwargs):
+    model = HRNet(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[60, 120],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[60, 120, 240],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[60, 120, 240, 480],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def HRNet_W64(**kwargs):
+    model = HRNet(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[64, 128],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[64, 128, 256],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[64, 128, 256, 512],
+        **kwargs)
+    return model
diff --git a/dygraph/paddleseg/models/backbones/mobilenetv3.py b/dygraph/paddleseg/models/backbones/mobilenetv3.py
new file mode 100644
index 0000000000000000000000000000000000000000..6204d7733a45326a70b7cbc423820b987b046708
--- /dev/null
+++ b/dygraph/paddleseg/models/backbones/mobilenetv3.py
@@ -0,0 +1,451 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import os
+
+import numpy as np
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.param_attr import ParamAttr
+from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
+from paddle.nn import SyncBatchNorm as BatchNorm
+
+from paddleseg.models.common import layer_utils
+from paddleseg.cvlibs import manager
+from paddleseg.utils import utils
+
+__all__ = [
+    "MobileNetV3_small_x0_35", "MobileNetV3_small_x0_5",
+    "MobileNetV3_small_x0_75", "MobileNetV3_small_x1_0",
+    "MobileNetV3_small_x1_25", "MobileNetV3_large_x0_35",
+    "MobileNetV3_large_x0_5", "MobileNetV3_large_x0_75",
+    "MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25"
+]
+
+
+def make_divisible(v, divisor=8, min_value=None):
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+def get_padding_same(kernel_size, dilation_rate):
+    """
+    SAME padding implementation given kernel_size and dilation_rate.
+    The calculation formula as following:
+        (F-(k+(k -1)*(r-1))+2*p)/s + 1 = F_new
+        where F: a feature map
+              k: kernel size, r: dilation rate, p: padding value, s: stride
+              F_new: new feature map
+    Args:
+        kernel_size (int)
+        dilation_rate (int)
+
+    Returns:
+        padding_same (int): padding value
+    """
+    k = kernel_size
+    r = dilation_rate
+    padding_same = (k + (k - 1) * (r - 1) - 1) // 2
+
+    return padding_same
+
+
+class MobileNetV3(fluid.dygraph.Layer):
+    def __init__(self,
+                 backbone_pretrained=None,
+                 scale=1.0,
+                 model_name="small",
+                 class_dim=1000,
+                 output_stride=None):
+        super(MobileNetV3, self).__init__()
+
+        inplanes = 16
+        if model_name == "large":
+            self.cfg = [
+                # k, exp, c,  se,     nl,  s,
+                [3, 16, 16, False, "relu", 1],
+                [3, 64, 24, False, "relu", 2],
+                [3, 72, 24, False, "relu", 1],  # output 1 -> out_index=2
+                [5, 72, 40, True, "relu", 2],
+                [5, 120, 40, True, "relu", 1],
+                [5, 120, 40, True, "relu", 1],  # output 2 -> out_index=5
+                [3, 240, 80, False, "hard_swish", 2],
+                [3, 200, 80, False, "hard_swish", 1],
+                [3, 184, 80, False, "hard_swish", 1],
+                [3, 184, 80, False, "hard_swish", 1],
+                [3, 480, 112, True, "hard_swish", 1],
+                [3, 672, 112, True, "hard_swish",
+                 1],  # output 3 -> out_index=11
+                [5, 672, 160, True, "hard_swish", 2],
+                [5, 960, 160, True, "hard_swish", 1],
+                [5, 960, 160, True, "hard_swish",
+                 1],  # output 3 -> out_index=14
+            ]
+            self.out_indices = [2, 5, 11, 14]
+
+            self.cls_ch_squeeze = 960
+            self.cls_ch_expand = 1280
+        elif model_name == "small":
+            self.cfg = [
+                # k, exp, c,  se,     nl,  s,
+                [3, 16, 16, True, "relu", 2],  # output 1 -> out_index=0
+                [3, 72, 24, False, "relu", 2],
+                [3, 88, 24, False, "relu", 1],  # output 2 -> out_index=3
+                [5, 96, 40, True, "hard_swish", 2],
+                [5, 240, 40, True, "hard_swish", 1],
+                [5, 240, 40, True, "hard_swish", 1],
+                [5, 120, 48, True, "hard_swish", 1],
+                [5, 144, 48, True, "hard_swish", 1],  # output 3 -> out_index=7
+                [5, 288, 96, True, "hard_swish", 2],
+                [5, 576, 96, True, "hard_swish", 1],
+                [5, 576, 96, True, "hard_swish", 1],  # output 4 -> out_index=10
+            ]
+            self.out_indices = [0, 3, 7, 10]
+
+            self.cls_ch_squeeze = 576
+            self.cls_ch_expand = 1280
+        else:
+            raise NotImplementedError(
+                "mode[{}_model] is not implemented!".format(model_name))
+
+        ###################################################
+        # modify stride and dilation based on output_stride
+        self.dilation_cfg = [1] * len(self.cfg)
+        self.modify_bottle_params(output_stride=output_stride)
+        ###################################################
+
+        self.conv1 = ConvBNLayer(
+            in_c=3,
+            out_c=make_divisible(inplanes * scale),
+            filter_size=3,
+            stride=2,
+            padding=1,
+            num_groups=1,
+            if_act=True,
+            act="hard_swish",
+            name="conv1")
+
+        self.block_list = []
+
+        inplanes = make_divisible(inplanes * scale)
+        for i, (k, exp, c, se, nl, s) in enumerate(self.cfg):
+            ######################################
+            # add dilation rate
+            dilation_rate = self.dilation_cfg[i]
+            ######################################
+            self.block_list.append(
+                ResidualUnit(
+                    in_c=inplanes,
+                    mid_c=make_divisible(scale * exp),
+                    out_c=make_divisible(scale * c),
+                    filter_size=k,
+                    stride=s,
+                    dilation=dilation_rate,
+                    use_se=se,
+                    act=nl,
+                    name="conv" + str(i + 2)))
+            self.add_sublayer(
+                sublayer=self.block_list[-1], name="conv" + str(i + 2))
+            inplanes = make_divisible(scale * c)
+
+        self.last_second_conv = ConvBNLayer(
+            in_c=inplanes,
+            out_c=make_divisible(scale * self.cls_ch_squeeze),
+            filter_size=1,
+            stride=1,
+            padding=0,
+            num_groups=1,
+            if_act=True,
+            act="hard_swish",
+            name="conv_last")
+
+        self.pool = Pool2D(
+            pool_type="avg", global_pooling=True, use_cudnn=False)
+
+        self.last_conv = Conv2D(
+            num_channels=make_divisible(scale * self.cls_ch_squeeze),
+            num_filters=self.cls_ch_expand,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            act=None,
+            param_attr=ParamAttr(name="last_1x1_conv_weights"),
+            bias_attr=False)
+
+        self.out = Linear(
+            input_dim=self.cls_ch_expand,
+            output_dim=class_dim,
+            param_attr=ParamAttr("fc_weights"),
+            bias_attr=ParamAttr(name="fc_offset"))
+
+        self.init_weight(backbone_pretrained)
+
+    def modify_bottle_params(self, output_stride=None):
+
+        if output_stride is not None and output_stride % 2 != 0:
+            raise Exception("output stride must to be even number")
+        if output_stride is not None:
+            stride = 2
+            rate = 1
+            for i, _cfg in enumerate(self.cfg):
+                stride = stride * _cfg[-1]
+                if stride > output_stride:
+                    rate = rate * _cfg[-1]
+                    self.cfg[i][-1] = 1
+
+                self.dilation_cfg[i] = rate
+
+    def forward(self, inputs, label=None, dropout_prob=0.2):
+        x = self.conv1(inputs)
+        # A feature list saves each downsampling feature.
+        feat_list = []
+        for i, block in enumerate(self.block_list):
+            x = block(x)
+            if i in self.out_indices:
+                feat_list.append(x)
+            #print("block {}:".format(i),x.shape, self.dilation_cfg[i])
+        x = self.last_second_conv(x)
+        x = self.pool(x)
+        x = self.last_conv(x)
+        x = fluid.layers.hard_swish(x)
+        x = fluid.layers.dropout(x=x, dropout_prob=dropout_prob)
+        x = fluid.layers.reshape(x, shape=[x.shape[0], x.shape[1]])
+        x = self.out(x)
+
+        return x, feat_list
+
+    def init_weight(self, pretrained_model=None):
+        """
+        Initialize the parameters of model parts.
+        Args:
+            pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
+        """
+        if pretrained_model is not None:
+            if os.path.exists(pretrained_model):
+                utils.load_pretrained_model(self, pretrained_model)
+            else:
+                raise Exception('Pretrained model is not found: {}'.format(
+                    pretrained_model))
+
+
+class ConvBNLayer(fluid.dygraph.Layer):
+    def __init__(self,
+                 in_c,
+                 out_c,
+                 filter_size,
+                 stride,
+                 padding,
+                 dilation=1,
+                 num_groups=1,
+                 if_act=True,
+                 act=None,
+                 use_cudnn=True,
+                 name=""):
+        super(ConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+
+        self.conv = fluid.dygraph.Conv2D(
+            num_channels=in_c,
+            num_filters=out_c,
+            filter_size=filter_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=num_groups,
+            param_attr=ParamAttr(name=name + "_weights"),
+            bias_attr=False,
+            use_cudnn=use_cudnn,
+            act=None)
+        self.bn = BatchNorm(
+            num_features=out_c,
+            weight_attr=ParamAttr(
+                name=name + "_bn_scale",
+                regularizer=fluid.regularizer.L2DecayRegularizer(
+                    regularization_coeff=0.0)),
+            bias_attr=ParamAttr(
+                name=name + "_bn_offset",
+                regularizer=fluid.regularizer.L2DecayRegularizer(
+                    regularization_coeff=0.0)))
+
+        self._act_op = layer_utils.Activation(act=None)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        if self.if_act:
+            if self.act == "relu":
+                x = fluid.layers.relu(x)
+            elif self.act == "hard_swish":
+                x = fluid.layers.hard_swish(x)
+            else:
+                print("The activation function is selected incorrectly.")
+                exit()
+        return x
+
+
+class ResidualUnit(fluid.dygraph.Layer):
+    def __init__(self,
+                 in_c,
+                 mid_c,
+                 out_c,
+                 filter_size,
+                 stride,
+                 use_se,
+                 dilation=1,
+                 act=None,
+                 name=''):
+        super(ResidualUnit, self).__init__()
+        self.if_shortcut = stride == 1 and in_c == out_c
+        self.if_se = use_se
+
+        self.expand_conv = ConvBNLayer(
+            in_c=in_c,
+            out_c=mid_c,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            if_act=True,
+            act=act,
+            name=name + "_expand")
+
+        self.bottleneck_conv = ConvBNLayer(
+            in_c=mid_c,
+            out_c=mid_c,
+            filter_size=filter_size,
+            stride=stride,
+            padding=get_padding_same(
+                filter_size,
+                dilation),  #int((filter_size - 1) // 2) + (dilation - 1),
+            dilation=dilation,
+            num_groups=mid_c,
+            if_act=True,
+            act=act,
+            name=name + "_depthwise")
+        if self.if_se:
+            self.mid_se = SEModule(mid_c, name=name + "_se")
+        self.linear_conv = ConvBNLayer(
+            in_c=mid_c,
+            out_c=out_c,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            if_act=False,
+            act=None,
+            name=name + "_linear")
+        self.dilation = dilation
+
+    def forward(self, inputs):
+        x = self.expand_conv(inputs)
+        x = self.bottleneck_conv(x)
+        if self.if_se:
+            x = self.mid_se(x)
+        x = self.linear_conv(x)
+        if self.if_shortcut:
+            x = fluid.layers.elementwise_add(inputs, x)
+        return x
+
+
+class SEModule(fluid.dygraph.Layer):
+    def __init__(self, channel, reduction=4, name=""):
+        super(SEModule, self).__init__()
+        self.avg_pool = fluid.dygraph.Pool2D(
+            pool_type="avg", global_pooling=True, use_cudnn=False)
+        self.conv1 = fluid.dygraph.Conv2D(
+            num_channels=channel,
+            num_filters=channel // reduction,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            act="relu",
+            param_attr=ParamAttr(name=name + "_1_weights"),
+            bias_attr=ParamAttr(name=name + "_1_offset"))
+        self.conv2 = fluid.dygraph.Conv2D(
+            num_channels=channel // reduction,
+            num_filters=channel,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            act=None,
+            param_attr=ParamAttr(name + "_2_weights"),
+            bias_attr=ParamAttr(name=name + "_2_offset"))
+
+    def forward(self, inputs):
+        outputs = self.avg_pool(inputs)
+        outputs = self.conv1(outputs)
+        outputs = self.conv2(outputs)
+        outputs = fluid.layers.hard_sigmoid(outputs)
+        return fluid.layers.elementwise_mul(x=inputs, y=outputs, axis=0)
+
+
+def MobileNetV3_small_x0_35(**kwargs):
+    model = MobileNetV3(model_name="small", scale=0.35, **kwargs)
+    return model
+
+
+def MobileNetV3_small_x0_5(**kwargs):
+    model = MobileNetV3(model_name="small", scale=0.5, **kwargs)
+    return model
+
+
+def MobileNetV3_small_x0_75(**kwargs):
+    model = MobileNetV3(model_name="small", scale=0.75, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV3_small_x1_0(**kwargs):
+    model = MobileNetV3(model_name="small", scale=1.0, **kwargs)
+    return model
+
+
+def MobileNetV3_small_x1_25(**kwargs):
+    model = MobileNetV3(model_name="small", scale=1.25, **kwargs)
+    return model
+
+
+def MobileNetV3_large_x0_35(**kwargs):
+    model = MobileNetV3(model_name="large", scale=0.35, **kwargs)
+    return model
+
+
+def MobileNetV3_large_x0_5(**kwargs):
+    model = MobileNetV3(model_name="large", scale=0.5, **kwargs)
+    return model
+
+
+def MobileNetV3_large_x0_75(**kwargs):
+    model = MobileNetV3(model_name="large", scale=0.75, **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def MobileNetV3_large_x1_0(**kwargs):
+    model = MobileNetV3(model_name="large", scale=1.0, **kwargs)
+    return model
+
+
+def MobileNetV3_large_x1_25(**kwargs):
+    model = MobileNetV3(model_name="large", scale=1.25, **kwargs)
+    return model
diff --git a/dygraph/paddleseg/models/backbones/resnet_vd.py b/dygraph/paddleseg/models/backbones/resnet_vd.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7dfc66fd5dc44a6a27c04eea73dc692f857c61c
--- /dev/null
+++ b/dygraph/paddleseg/models/backbones/resnet_vd.py
@@ -0,0 +1,417 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import math
+
+import numpy as np
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.param_attr import ParamAttr
+from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
+from paddle.nn import SyncBatchNorm as BatchNorm
+
+from paddleseg.utils import utils
+from paddleseg.models.common import layer_utils
+from paddleseg.cvlibs import manager
+
+__all__ = [
+    "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd"
+]
+
+
+class ConvBNLayer(fluid.dygraph.Layer):
+    def __init__(
+            self,
+            num_channels,
+            num_filters,
+            filter_size,
+            stride=1,
+            dilation=1,
+            groups=1,
+            is_vd_mode=False,
+            act=None,
+            name=None,
+    ):
+        super(ConvBNLayer, self).__init__()
+
+        self.is_vd_mode = is_vd_mode
+        self._pool2d_avg = Pool2D(
+            pool_size=2,
+            pool_stride=2,
+            pool_padding=0,
+            pool_type='avg',
+            ceil_mode=True)
+        self._conv = Conv2D(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2 if dilation == 1 else 0,
+            dilation=dilation,
+            groups=groups,
+            act=None,
+            param_attr=ParamAttr(name=name + "_weights"),
+            bias_attr=False)
+        if name == "conv1":
+            bn_name = "bn_" + name
+        else:
+            bn_name = "bn" + name[3:]
+        self._batch_norm = BatchNorm(
+            num_filters,
+            weight_attr=ParamAttr(name=bn_name + '_scale'),
+            bias_attr=ParamAttr(bn_name + '_offset'))
+        self._act_op = layer_utils.Activation(act=act)
+
+    def forward(self, inputs):
+        if self.is_vd_mode:
+            inputs = self._pool2d_avg(inputs)
+        y = self._conv(inputs)
+        y = self._batch_norm(y)
+        y = self._act_op(y)
+
+        return y
+
+
+class BottleneckBlock(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 stride,
+                 shortcut=True,
+                 if_first=False,
+                 dilation=1,
+                 name=None):
+        super(BottleneckBlock, self).__init__()
+
+        self.conv0 = ConvBNLayer(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=1,
+            act='relu',
+            name=name + "_branch2a")
+
+        self.dilation = dilation
+
+        self.conv1 = ConvBNLayer(
+            num_channels=num_filters,
+            num_filters=num_filters,
+            filter_size=3,
+            stride=stride,
+            act='relu',
+            dilation=dilation,
+            name=name + "_branch2b")
+        self.conv2 = ConvBNLayer(
+            num_channels=num_filters,
+            num_filters=num_filters * 4,
+            filter_size=1,
+            act=None,
+            name=name + "_branch2c")
+
+        if not shortcut:
+            self.short = ConvBNLayer(
+                num_channels=num_channels,
+                num_filters=num_filters * 4,
+                filter_size=1,
+                stride=1,
+                is_vd_mode=False if if_first or stride == 1 else True,
+                name=name + "_branch1")
+
+        self.shortcut = shortcut
+
+    def forward(self, inputs):
+        y = self.conv0(inputs)
+
+        ####################################################################
+        # If given dilation rate > 1, using corresponding padding
+        if self.dilation > 1:
+            padding = self.dilation
+            y = fluid.layers.pad(
+                y, [0, 0, 0, 0, padding, padding, padding, padding])
+        #####################################################################
+        conv1 = self.conv1(y)
+        conv2 = self.conv2(conv1)
+
+        if self.shortcut:
+            short = inputs
+        else:
+            short = self.short(inputs)
+
+        y = fluid.layers.elementwise_add(x=short, y=conv2)
+        layer_helper = LayerHelper(self.full_name(), act='relu')
+        return layer_helper.append_activation(y)
+
+
+class BasicBlock(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 stride,
+                 shortcut=True,
+                 if_first=False,
+                 name=None):
+        super(BasicBlock, self).__init__()
+        self.stride = stride
+        self.conv0 = ConvBNLayer(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=3,
+            stride=stride,
+            act='relu',
+            name=name + "_branch2a")
+        self.conv1 = ConvBNLayer(
+            num_channels=num_filters,
+            num_filters=num_filters,
+            filter_size=3,
+            act=None,
+            name=name + "_branch2b")
+
+        if not shortcut:
+            self.short = ConvBNLayer(
+                num_channels=num_channels,
+                num_filters=num_filters,
+                filter_size=1,
+                stride=1,
+                is_vd_mode=False if if_first else True,
+                name=name + "_branch1")
+
+        self.shortcut = shortcut
+
+    def forward(self, inputs):
+        y = self.conv0(inputs)
+        conv1 = self.conv1(y)
+
+        if self.shortcut:
+            short = inputs
+        else:
+            short = self.short(inputs)
+        y = fluid.layers.elementwise_add(x=short, y=conv1)
+
+        layer_helper = LayerHelper(self.full_name(), act='relu')
+        return layer_helper.append_activation(y)
+
+
+class ResNet_vd(fluid.dygraph.Layer):
+    def __init__(self,
+                 backbone_pretrained=None,
+                 layers=50,
+                 class_dim=1000,
+                 output_stride=None,
+                 multi_grid=(1, 2, 4)):
+        super(ResNet_vd, self).__init__()
+
+        self.layers = layers
+        supported_layers = [18, 34, 50, 101, 152, 200]
+        assert layers in supported_layers, \
+            "supported layers are {} but input layer is {}".format(
+                supported_layers, layers)
+
+        if layers == 18:
+            depth = [2, 2, 2, 2]
+        elif layers == 34 or layers == 50:
+            depth = [3, 4, 6, 3]
+        elif layers == 101:
+            depth = [3, 4, 23, 3]
+        elif layers == 152:
+            depth = [3, 8, 36, 3]
+        elif layers == 200:
+            depth = [3, 12, 48, 3]
+        num_channels = [64, 256, 512, 1024
+                        ] if layers >= 50 else [64, 64, 128, 256]
+        num_filters = [64, 128, 256, 512]
+
+        dilation_dict = None
+        if output_stride == 8:
+            dilation_dict = {2: 2, 3: 4}
+        elif output_stride == 16:
+            dilation_dict = {3: 2}
+
+        self.conv1_1 = ConvBNLayer(
+            num_channels=3,
+            num_filters=32,
+            filter_size=3,
+            stride=2,
+            act='relu',
+            name="conv1_1")
+        self.conv1_2 = ConvBNLayer(
+            num_channels=32,
+            num_filters=32,
+            filter_size=3,
+            stride=1,
+            act='relu',
+            name="conv1_2")
+        self.conv1_3 = ConvBNLayer(
+            num_channels=32,
+            num_filters=64,
+            filter_size=3,
+            stride=1,
+            act='relu',
+            name="conv1_3")
+        self.pool2d_max = Pool2D(
+            pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
+
+        # self.block_list = []
+        self.stage_list = []
+        if layers >= 50:
+            for block in range(len(depth)):
+                shortcut = False
+                block_list = []
+                for i in range(depth[block]):
+                    if layers in [101, 152] and block == 2:
+                        if i == 0:
+                            conv_name = "res" + str(block + 2) + "a"
+                        else:
+                            conv_name = "res" + str(block + 2) + "b" + str(i)
+                    else:
+                        conv_name = "res" + str(block + 2) + chr(97 + i)
+
+                    ###############################################################################
+                    # Add dilation rate for some segmentation tasks, if dilation_dict is not None.
+                    dilation_rate = dilation_dict[
+                        block] if dilation_dict and block in dilation_dict else 1
+
+                    # Actually block here is 'stage', and i is 'block' in 'stage'
+                    # At the stage 4, expand the the dilation_rate using multi_grid, default (1, 2, 4)
+                    if block == 3:
+                        dilation_rate = dilation_rate * multi_grid[i]
+                    #print("stage {}, block {}: dilation rate".format(block, i), dilation_rate)
+                    ###############################################################################
+
+                    bottleneck_block = self.add_sublayer(
+                        'bb_%d_%d' % (block, i),
+                        BottleneckBlock(
+                            num_channels=num_channels[block]
+                            if i == 0 else num_filters[block] * 4,
+                            num_filters=num_filters[block],
+                            stride=2 if i == 0 and block != 0
+                            and dilation_rate == 1 else 1,
+                            shortcut=shortcut,
+                            if_first=block == i == 0,
+                            name=conv_name,
+                            dilation=dilation_rate))
+
+                    block_list.append(bottleneck_block)
+                    shortcut = True
+                self.stage_list.append(block_list)
+        else:
+            for block in range(len(depth)):
+                shortcut = False
+                block_list = []
+                for i in range(depth[block]):
+                    conv_name = "res" + str(block + 2) + chr(97 + i)
+                    basic_block = self.add_sublayer(
+                        'bb_%d_%d' % (block, i),
+                        BasicBlock(
+                            num_channels=num_channels[block]
+                            if i == 0 else num_filters[block],
+                            num_filters=num_filters[block],
+                            stride=2 if i == 0 and block != 0 else 1,
+                            shortcut=shortcut,
+                            if_first=block == i == 0,
+                            name=conv_name))
+                    block_list.append(basic_block)
+                    shortcut = True
+                self.stage_list.append(block_list)
+
+        self.pool2d_avg = Pool2D(
+            pool_size=7, pool_type='avg', global_pooling=True)
+
+        self.pool2d_avg_channels = num_channels[-1] * 2
+
+        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
+
+        self.out = Linear(
+            self.pool2d_avg_channels,
+            class_dim,
+            param_attr=ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name="fc_0.w_0"),
+            bias_attr=ParamAttr(name="fc_0.b_0"))
+
+        self.init_weight(backbone_pretrained)
+
+    def forward(self, inputs):
+        y = self.conv1_1(inputs)
+        y = self.conv1_2(y)
+        y = self.conv1_3(y)
+        y = self.pool2d_max(y)
+
+        # A feature list saves the output feature map of each stage.
+        feat_list = []
+        for i, stage in enumerate(self.stage_list):
+            for j, block in enumerate(stage):
+                y = block(y)
+                #print("stage {} block {}".format(i+1, j+1), y.shape)
+            feat_list.append(y)
+
+        y = self.pool2d_avg(y)
+        y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_channels])
+        y = self.out(y)
+        return y, feat_list
+
+    # def init_weight(self, pretrained_model=None):
+
+    #     if pretrained_model is not None:
+    #         if os.path.exists(pretrained_model):
+    #             utils.load_pretrained_model(self, pretrained_model)
+
+    def init_weight(self, pretrained_model=None):
+        """
+        Initialize the parameters of model parts.
+        Args:
+            pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
+        """
+        if pretrained_model is not None:
+            if os.path.exists(pretrained_model):
+                utils.load_pretrained_model(self, pretrained_model)
+            else:
+                raise Exception('Pretrained model is not found: {}'.format(
+                    pretrained_model))
+
+
+def ResNet18_vd(**args):
+    model = ResNet_vd(layers=18, **args)
+    return model
+
+
+def ResNet34_vd(**args):
+    model = ResNet_vd(layers=34, **args)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ResNet50_vd(**args):
+    model = ResNet_vd(layers=50, **args)
+    return model
+
+
+@manager.BACKBONES.add_component
+def ResNet101_vd(**args):
+    model = ResNet_vd(layers=101, **args)
+    return model
+
+
+def ResNet152_vd(**args):
+    model = ResNet_vd(layers=152, **args)
+    return model
+
+
+def ResNet200_vd(**args):
+    model = ResNet_vd(layers=200, **args)
+    return model
diff --git a/dygraph/paddleseg/models/backbones/xception_deeplab.py b/dygraph/paddleseg/models/backbones/xception_deeplab.py
new file mode 100644
index 0000000000000000000000000000000000000000..f512e31ab372b8bc453d8d0506bbc45839a08d27
--- /dev/null
+++ b/dygraph/paddleseg/models/backbones/xception_deeplab.py
@@ -0,0 +1,452 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.param_attr import ParamAttr
+from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
+from paddle.nn import SyncBatchNorm as BatchNorm
+
+from paddleseg.models.common import layer_utils
+from paddleseg.cvlibs import manager
+from paddleseg.utils import utils
+
+__all__ = ["Xception41_deeplab", "Xception65_deeplab", "Xception71_deeplab"]
+
+
+def check_data(data, number):
+    if type(data) == int:
+        return [data] * number
+    assert len(data) == number
+    return data
+
+
+def check_stride(s, os):
+    if s <= os:
+        return True
+    else:
+        return False
+
+
+def check_points(count, points):
+    if points is None:
+        return False
+    else:
+        if isinstance(points, list):
+            return (True if count in points else False)
+        else:
+            return (True if count == points else False)
+
+
+def gen_bottleneck_params(backbone='xception_65'):
+    if backbone == 'xception_65':
+        bottleneck_params = {
+            "entry_flow": (3, [2, 2, 2], [128, 256, 728]),
+            "middle_flow": (16, 1, 728),
+            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
+        }
+    elif backbone == 'xception_41':
+        bottleneck_params = {
+            "entry_flow": (3, [2, 2, 2], [128, 256, 728]),
+            "middle_flow": (8, 1, 728),
+            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
+        }
+    elif backbone == 'xception_71':
+        bottleneck_params = {
+            "entry_flow": (5, [2, 1, 2, 1, 2], [128, 256, 256, 728, 728]),
+            "middle_flow": (16, 1, 728),
+            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
+        }
+    else:
+        raise Exception(
+            "xception backbont only support xception_41/xception_65/xception_71"
+        )
+    return bottleneck_params
+
+
+class ConvBNLayer(fluid.dygraph.Layer):
+    def __init__(self,
+                 input_channels,
+                 output_channels,
+                 filter_size,
+                 stride=1,
+                 padding=0,
+                 act=None,
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+
+        self._conv = Conv2D(
+            num_channels=input_channels,
+            num_filters=output_channels,
+            filter_size=filter_size,
+            stride=stride,
+            padding=padding,
+            param_attr=ParamAttr(name=name + "/weights"),
+            bias_attr=False)
+        self._bn = BatchNorm(
+            num_features=output_channels,
+            epsilon=1e-3,
+            momentum=0.99,
+            weight_attr=ParamAttr(name=name + "/BatchNorm/gamma"),
+            bias_attr=ParamAttr(name=name + "/BatchNorm/beta"))
+
+        self._act_op = layer_utils.Activation(act=act)
+
+    def forward(self, inputs):
+
+        return self._act_op(self._bn(self._conv(inputs)))
+
+
+class Seperate_Conv(fluid.dygraph.Layer):
+    def __init__(self,
+                 input_channels,
+                 output_channels,
+                 stride,
+                 filter,
+                 dilation=1,
+                 act=None,
+                 name=None):
+        super(Seperate_Conv, self).__init__()
+
+        self._conv1 = Conv2D(
+            num_channels=input_channels,
+            num_filters=input_channels,
+            filter_size=filter,
+            stride=stride,
+            groups=input_channels,
+            padding=(filter) // 2 * dilation,
+            dilation=dilation,
+            param_attr=ParamAttr(name=name + "/depthwise/weights"),
+            bias_attr=False)
+        self._bn1 = BatchNorm(
+            input_channels,
+            epsilon=1e-3,
+            momentum=0.99,
+            weight_attr=ParamAttr(name=name + "/depthwise/BatchNorm/gamma"),
+            bias_attr=ParamAttr(name=name + "/depthwise/BatchNorm/beta"))
+
+        self._act_op1 = layer_utils.Activation(act=act)
+
+        self._conv2 = Conv2D(
+            input_channels,
+            output_channels,
+            1,
+            stride=1,
+            groups=1,
+            padding=0,
+            param_attr=ParamAttr(name=name + "/pointwise/weights"),
+            bias_attr=False)
+        self._bn2 = BatchNorm(
+            output_channels,
+            epsilon=1e-3,
+            momentum=0.99,
+            weight_attr=ParamAttr(name=name + "/pointwise/BatchNorm/gamma"),
+            bias_attr=ParamAttr(name=name + "/pointwise/BatchNorm/beta"))
+
+        self._act_op2 = layer_utils.Activation(act=act)
+
+    def forward(self, inputs):
+        x = self._conv1(inputs)
+        x = self._bn1(x)
+        x = self._act_op1(x)
+        x = self._conv2(x)
+        x = self._bn2(x)
+        x = self._act_op2(x)
+        return x
+
+
+class Xception_Block(fluid.dygraph.Layer):
+    def __init__(self,
+                 input_channels,
+                 output_channels,
+                 strides=1,
+                 filter_size=3,
+                 dilation=1,
+                 skip_conv=True,
+                 has_skip=True,
+                 activation_fn_in_separable_conv=False,
+                 name=None):
+        super(Xception_Block, self).__init__()
+
+        repeat_number = 3
+        output_channels = check_data(output_channels, repeat_number)
+        filter_size = check_data(filter_size, repeat_number)
+        strides = check_data(strides, repeat_number)
+
+        self.has_skip = has_skip
+        self.skip_conv = skip_conv
+        self.activation_fn_in_separable_conv = activation_fn_in_separable_conv
+        if not activation_fn_in_separable_conv:
+            self._conv1 = Seperate_Conv(
+                input_channels,
+                output_channels[0],
+                stride=strides[0],
+                filter=filter_size[0],
+                dilation=dilation,
+                name=name + "/separable_conv1")
+            self._conv2 = Seperate_Conv(
+                output_channels[0],
+                output_channels[1],
+                stride=strides[1],
+                filter=filter_size[1],
+                dilation=dilation,
+                name=name + "/separable_conv2")
+            self._conv3 = Seperate_Conv(
+                output_channels[1],
+                output_channels[2],
+                stride=strides[2],
+                filter=filter_size[2],
+                dilation=dilation,
+                name=name + "/separable_conv3")
+        else:
+            self._conv1 = Seperate_Conv(
+                input_channels,
+                output_channels[0],
+                stride=strides[0],
+                filter=filter_size[0],
+                act="relu",
+                dilation=dilation,
+                name=name + "/separable_conv1")
+            self._conv2 = Seperate_Conv(
+                output_channels[0],
+                output_channels[1],
+                stride=strides[1],
+                filter=filter_size[1],
+                act="relu",
+                dilation=dilation,
+                name=name + "/separable_conv2")
+            self._conv3 = Seperate_Conv(
+                output_channels[1],
+                output_channels[2],
+                stride=strides[2],
+                filter=filter_size[2],
+                act="relu",
+                dilation=dilation,
+                name=name + "/separable_conv3")
+
+        if has_skip and skip_conv:
+            self._short = ConvBNLayer(
+                input_channels,
+                output_channels[-1],
+                1,
+                stride=strides[-1],
+                padding=0,
+                name=name + "/shortcut")
+
+    def forward(self, inputs):
+        layer_helper = LayerHelper(self.full_name(), act='relu')
+        if not self.activation_fn_in_separable_conv:
+            x = layer_helper.append_activation(inputs)
+            x = self._conv1(x)
+            x = layer_helper.append_activation(x)
+            x = self._conv2(x)
+            x = layer_helper.append_activation(x)
+            x = self._conv3(x)
+        else:
+            x = self._conv1(inputs)
+            x = self._conv2(x)
+            x = self._conv3(x)
+        if self.has_skip is False:
+            return x
+        if self.skip_conv:
+            skip = self._short(inputs)
+        else:
+            skip = inputs
+        return fluid.layers.elementwise_add(x, skip)
+
+
+class XceptionDeeplab(fluid.dygraph.Layer):
+
+    #def __init__(self, backbone, class_dim=1000):
+    # add output_stride
+    def __init__(self,
+                 backbone,
+                 backbone_pretrained=None,
+                 output_stride=16,
+                 class_dim=1000):
+
+        super(XceptionDeeplab, self).__init__()
+
+        bottleneck_params = gen_bottleneck_params(backbone)
+        self.backbone = backbone
+
+        self._conv1 = ConvBNLayer(
+            3,
+            32,
+            3,
+            stride=2,
+            padding=1,
+            act="relu",
+            name=self.backbone + "/entry_flow/conv1")
+        self._conv2 = ConvBNLayer(
+            32,
+            64,
+            3,
+            stride=1,
+            padding=1,
+            act="relu",
+            name=self.backbone + "/entry_flow/conv2")
+        """
+            bottleneck_params = {
+            "entry_flow": (3, [2, 2, 2], [128, 256, 728]),
+            "middle_flow": (16, 1, 728),
+            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
+        }
+
+        if output_stride == 16:
+            entry_block3_stride = 2
+            middle_block_dilation = 1
+            exit_block_dilations = (1, 2)
+        elif output_stride == 8:
+            entry_block3_stride = 1
+            middle_block_dilation = 2
+            exit_block_dilations = (2, 4)
+
+        """
+        self.block_num = bottleneck_params["entry_flow"][0]
+        self.strides = bottleneck_params["entry_flow"][1]
+        self.chns = bottleneck_params["entry_flow"][2]
+        self.strides = check_data(self.strides, self.block_num)
+        self.chns = check_data(self.chns, self.block_num)
+
+        self.entry_flow = []
+        self.middle_flow = []
+
+        self.stride = 2
+        self.output_stride = output_stride
+        s = self.stride
+
+        for i in range(self.block_num):
+            stride = self.strides[i] if check_stride(s * self.strides[i],
+                                                     self.output_stride) else 1
+            xception_block = self.add_sublayer(
+                self.backbone + "/entry_flow/block" + str(i + 1),
+                Xception_Block(
+                    input_channels=64 if i == 0 else self.chns[i - 1],
+                    output_channels=self.chns[i],
+                    strides=[1, 1, self.stride],
+                    name=self.backbone + "/entry_flow/block" + str(i + 1)))
+            self.entry_flow.append(xception_block)
+            s = s * stride
+        self.stride = s
+
+        self.block_num = bottleneck_params["middle_flow"][0]
+        self.strides = bottleneck_params["middle_flow"][1]
+        self.chns = bottleneck_params["middle_flow"][2]
+        self.strides = check_data(self.strides, self.block_num)
+        self.chns = check_data(self.chns, self.block_num)
+        s = self.stride
+
+        for i in range(self.block_num):
+            stride = self.strides[i] if check_stride(s * self.strides[i],
+                                                     self.output_stride) else 1
+            xception_block = self.add_sublayer(
+                self.backbone + "/middle_flow/block" + str(i + 1),
+                Xception_Block(
+                    input_channels=728,
+                    output_channels=728,
+                    strides=[1, 1, self.strides[i]],
+                    skip_conv=False,
+                    name=self.backbone + "/middle_flow/block" + str(i + 1)))
+            self.middle_flow.append(xception_block)
+            s = s * stride
+        self.stride = s
+
+        self.block_num = bottleneck_params["exit_flow"][0]
+        self.strides = bottleneck_params["exit_flow"][1]
+        self.chns = bottleneck_params["exit_flow"][2]
+        self.strides = check_data(self.strides, self.block_num)
+        self.chns = check_data(self.chns, self.block_num)
+        s = self.stride
+        stride = self.strides[0] if check_stride(s * self.strides[0],
+                                                 self.output_stride) else 1
+        self._exit_flow_1 = Xception_Block(
+            728,
+            self.chns[0], [1, 1, stride],
+            name=self.backbone + "/exit_flow/block1")
+        s = s * stride
+        stride = self.strides[1] if check_stride(s * self.strides[1],
+                                                 self.output_stride) else 1
+        self._exit_flow_2 = Xception_Block(
+            self.chns[0][-1],
+            self.chns[1], [1, 1, stride],
+            dilation=2,
+            has_skip=False,
+            activation_fn_in_separable_conv=True,
+            name=self.backbone + "/exit_flow/block2")
+        s = s * stride
+
+        self.stride = s
+
+        self._drop = Dropout(p=0.5)
+        self._pool = Pool2D(pool_type="avg", global_pooling=True)
+        self._fc = Linear(
+            self.chns[1][-1],
+            class_dim,
+            param_attr=ParamAttr(name="fc_weights"),
+            bias_attr=ParamAttr(name="fc_bias"))
+
+        self.init_weight(backbone_pretrained)
+
+    def forward(self, inputs):
+        x = self._conv1(inputs)
+        x = self._conv2(x)
+        feat_list = []
+        for i, ef in enumerate(self.entry_flow):
+            x = ef(x)
+            if i == 0:
+                feat_list.append(x)
+        for mf in self.middle_flow:
+            x = mf(x)
+        x = self._exit_flow_1(x)
+        x = self._exit_flow_2(x)
+        feat_list.append(x)
+
+        x = self._drop(x)
+        x = self._pool(x)
+        x = fluid.layers.squeeze(x, axes=[2, 3])
+        x = self._fc(x)
+        return x, feat_list
+
+    def init_weight(self, pretrained_model=None):
+        """
+        Initialize the parameters of model parts.
+        Args:
+            pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
+        """
+        if pretrained_model is not None:
+            if os.path.exists(pretrained_model):
+                utils.load_pretrained_model(self, pretrained_model)
+            else:
+                raise Exception('Pretrained model is not found: {}'.format(
+                    pretrained_model))
+
+
+def Xception41_deeplab(**args):
+    model = XceptionDeeplab('xception_41', **args)
+    return model
+
+
+@manager.BACKBONES.add_component
+def Xception65_deeplab(**args):
+    model = XceptionDeeplab("xception_65", **args)
+    return model
+
+
+def Xception71_deeplab(**args):
+    model = XceptionDeeplab("xception_71", **args)
+    return model
diff --git a/dygraph/paddleseg/models/common/__init__.py b/dygraph/paddleseg/models/common/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f30b50f2fc80c9effd59dbf3c134de66de04c44
--- /dev/null
+++ b/dygraph/paddleseg/models/common/__init__.py
@@ -0,0 +1,17 @@
+# -*- encoding: utf-8 -*-
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import layer_utils
+from . import model_utils
\ No newline at end of file
diff --git a/dygraph/paddleseg/models/common/layer_utils.py b/dygraph/paddleseg/models/common/layer_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d41ebb130cbbca11feebfd87e030628ea44cd27
--- /dev/null
+++ b/dygraph/paddleseg/models/common/layer_utils.py
@@ -0,0 +1,143 @@
+# -*- encoding: utf-8 -*-
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle.nn import Conv2d
+from paddle.nn import SyncBatchNorm as BatchNorm
+from paddle.nn.layer import activation
+
+
+class ConvBnRelu(nn.Layer):
+    def __init__(self, in_channels, out_channels, kernel_size, **kwargs):
+
+        super(ConvBnRelu, self).__init__()
+
+        self.conv = Conv2d(in_channels, out_channels, kernel_size, **kwargs)
+
+        self.batch_norm = BatchNorm(out_channels)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.batch_norm(x)
+        x = F.relu(x)
+        return x
+
+
+class ConvBn(nn.Layer):
+    def __init__(self, in_channels, out_channels, kernel_size, **kwargs):
+
+        super(ConvBn, self).__init__()
+
+        self.conv = Conv2d(in_channels, out_channels, kernel_size, **kwargs)
+
+        self.batch_norm = BatchNorm(out_channels)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.batch_norm(x)
+        return x
+
+
+class ConvReluPool(nn.Layer):
+    def __init__(self, in_channels, out_channels):
+        super(ConvReluPool, self).__init__()
+        self.conv = Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            dilation=1)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = F.relu(x)
+        x = F.pool2d(x, pool_size=2, pool_type="max", pool_stride=2)
+        return x
+
+
+# class ConvBnReluUpsample(nn.Layer):
+#     def __init__(self, in_channels, out_channels):
+#         super(ConvBnReluUpsample, self).__init__()
+#         self.conv_bn_relu = ConvBnRelu(in_channels, out_channels)
+
+#     def forward(self, x, upsample_scale=2):
+#         x = self.conv_bn_relu(x)
+#         new_shape = [x.shape[2] * upsample_scale, x.shape[3] * upsample_scale]
+#         x = F.resize_bilinear(x, new_shape)
+#         return x
+
+
+class DepthwiseConvBnRelu(nn.Layer):
+    def __init__(self, in_channels, out_channels, kernel_size, **kwargs):
+        super(DepthwiseConvBnRelu, self).__init__()
+        self.depthwise_conv = ConvBn(
+            in_channels,
+            out_channels=in_channels,
+            kernel_size=kernel_size,
+            groups=in_channels,
+            **kwargs)
+        self.piontwise_conv = ConvBnRelu(
+            in_channels, out_channels, kernel_size=1, groups=1)
+
+    def forward(self, x):
+        x = self.depthwise_conv(x)
+        x = self.piontwise_conv(x)
+        return x
+
+
+class Activation(nn.Layer):
+    """
+    The wrapper of activations
+    For example:
+        >>> relu = Activation("relu")
+        >>> print(relu)
+        <class 'paddle.nn.layer.activation.ReLU'>
+        >>> sigmoid = Activation("sigmoid")
+        >>> print(sigmoid)
+        <class 'paddle.nn.layer.activation.Sigmoid'>
+        >>> not_exit_one = Activation("not_exit_one")
+        KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', 
+        'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', 
+        'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])"
+
+    Args:
+        act (str): the activation name in lowercase
+    """
+
+    def __init__(self, act=None):
+        super(Activation, self).__init__()
+
+        self._act = act
+        upper_act_names = activation.__all__
+        lower_act_names = [act.lower() for act in upper_act_names]
+        act_dict = dict(zip(lower_act_names, upper_act_names))
+
+        if act is not None:
+            if act in act_dict.keys():
+                act_name = act_dict[act]
+                self.act_func = eval("activation.{}()".format(act_name))
+            else:
+                raise KeyError("{} does not exist in the current {}".format(
+                    act, act_dict.keys()))
+
+    def forward(self, x):
+
+        if self._act is not None:
+            return self.act_func(x)
+        else:
+            return x
diff --git a/dygraph/paddleseg/models/common/model_utils.py b/dygraph/paddleseg/models/common/model_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..7de39c8e77fad0021d3e910a9c02f3f6d774c32d
--- /dev/null
+++ b/dygraph/paddleseg/models/common/model_utils.py
@@ -0,0 +1,170 @@
+# -*- encoding: utf-8 -*-
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle.nn import SyncBatchNorm as BatchNorm
+
+from paddleseg.models.common import layer_utils
+
+
+class FCNHead(nn.Layer):
+    """
+    The FCNHead implementation used in auxilary layer
+
+    Args:
+        in_channels (int): the number of input channels
+        out_channels (int): the number of output channels
+    """
+
+    def __init__(self, in_channels, out_channels):
+        super(FCNHead, self).__init__()
+
+        inter_channels = in_channels // 4
+        self.conv_bn_relu = layer_utils.ConvBnRelu(
+            in_channels=in_channels,
+            out_channels=inter_channels,
+            kernel_size=3,
+            padding=1)
+
+        self.conv = nn.Conv2d(
+            in_channels=inter_channels,
+            out_channels=out_channels,
+            kernel_size=1)
+
+    def forward(self, x):
+        x = self.conv_bn_relu(x)
+        x = F.dropout(x, p=0.1)
+        x = self.conv(x)
+        return x
+
+
+class AuxLayer(nn.Layer):
+    """
+    The auxilary layer implementation for auxilary loss
+
+    Args:
+        in_channels (int): the number of input channels.
+        inter_channels (int): intermediate channels.
+        out_channels (int): the number of output channels, which is usually num_classes.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 inter_channels,
+                 out_channels,
+                 dropout_prob=0.1):
+        super(AuxLayer, self).__init__()
+
+        self.conv_bn_relu = layer_utils.ConvBnRelu(
+            in_channels=in_channels,
+            out_channels=inter_channels,
+            kernel_size=3,
+            padding=1)
+
+        self.conv = nn.Conv2d(
+            in_channels=inter_channels,
+            out_channels=out_channels,
+            kernel_size=1)
+
+        self.dropout_prob = dropout_prob
+
+    def forward(self, x):
+        x = self.conv_bn_relu(x)
+        x = F.dropout(x, p=self.dropout_prob)
+        x = self.conv(x)
+        return x
+
+
+class PPModule(nn.Layer):
+    """
+    Pyramid pooling module
+
+    Args:
+        in_channels (int): the number of intput channels to pyramid pooling module.
+
+        out_channels (int): the number of output channels after pyramid pooling module.
+
+        bin_sizes (tuple): the out size of pooled feature maps. Default to (1,2,3,6).
+
+        dim_reduction (bool): a bool value represent if reduing dimention after pooling. Default to True.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 bin_sizes=(1, 2, 3, 6),
+                 dim_reduction=True):
+        super(PPModule, self).__init__()
+        self.bin_sizes = bin_sizes
+
+        inter_channels = in_channels
+        if dim_reduction:
+            inter_channels = in_channels // len(bin_sizes)
+
+        # we use dimension reduction after pooling mentioned in original implementation.
+        self.stages = nn.LayerList([
+            self._make_stage(in_channels, inter_channels, size)
+            for size in bin_sizes
+        ])
+
+        self.conv_bn_relu2 = layer_utils.ConvBnRelu(
+            in_channels=in_channels + inter_channels * len(bin_sizes),
+            out_channels=out_channels,
+            kernel_size=3,
+            padding=1)
+
+    def _make_stage(self, in_channels, out_channels, size):
+        """
+        Create one pooling layer.
+
+        In our implementation, we adopt the same dimention reduction as the original paper that might be
+        slightly different with other implementations. 
+
+        After pooling, the channels are reduced to 1/len(bin_sizes) immediately, while some other implementations
+        keep the channels to be same.
+
+
+        Args:
+            in_channels (int): the number of intput channels to pyramid pooling module.
+
+            size (int): the out size of the pooled layer.
+
+        Returns:
+            conv (tensor): a tensor after Pyramid Pooling Module
+        """
+
+        # this paddle version does not support AdaptiveAvgPool2d, so skip it here.
+        # prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
+        conv = layer_utils.ConvBnRelu(
+            in_channels=in_channels, out_channels=out_channels, kernel_size=1)
+
+        return conv
+
+    def forward(self, input):
+        cat_layers = []
+        for i, stage in enumerate(self.stages):
+            size = self.bin_sizes[i]
+            x = F.adaptive_pool2d(
+                input, pool_size=(size, size), pool_type="max")
+            x = stage(x)
+            x = F.resize_bilinear(x, out_shape=input.shape[2:])
+            cat_layers.append(x)
+        cat_layers = [input] + cat_layers[::-1]
+        cat = paddle.concat(cat_layers, axis=1)
+        out = self.conv_bn_relu2(cat)
+
+        return out
diff --git a/dygraph/paddleseg/models/deeplab.py b/dygraph/paddleseg/models/deeplab.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c7e0cb187808baa8c7543d8eda7773a53c5b0fc
--- /dev/null
+++ b/dygraph/paddleseg/models/deeplab.py
@@ -0,0 +1,287 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import paddle
+import paddle.nn.functional as F
+from paddle import nn
+from paddleseg.cvlibs import manager
+from paddleseg.models.common import layer_utils
+from paddleseg.utils import utils
+
+__all__ = ['DeepLabV3P', 'DeepLabV3']
+
+
+@manager.MODELS.add_component
+class DeepLabV3P(nn.Layer):
+    """
+    The DeepLabV3Plus implementation based on PaddlePaddle.
+
+    The orginal artile refers to
+    "Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation"
+     Liang-Chieh Chen, Yukun Zhu, George Papandreou, Florian Schroff, Hartwig Adam.
+     (https://arxiv.org/abs/1802.02611)
+
+     The DeepLabV3P consists of three main components, Backbone, ASPP and Decoder.
+
+    Args:
+        num_classes (int): the unique number of target classes.
+
+        backbone (paddle.nn.Layer): backbone network, currently support Xception65, Resnet101_vd.
+
+        model_pretrained (str): the path of pretrained model.
+
+        output_stride (int): the ratio of input size and final feature size. 
+        Support 16 or 8. Default to 16.
+
+        backbone_indices (tuple): two values in the tuple indicte the indices of output of backbone.
+                        the first index will be taken as a low-level feature in Deconder component;
+                        the second one will be taken as input of ASPP component.
+                        Usually backbone consists of four downsampling stage, and return an output of
+                        each stage, so we set default (0, 3), which means taking feature map of the first
+                        stage in backbone as low-level feature used in Decoder, and feature map of the fourth
+                        stage as input of ASPP.
+
+        backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index.
+
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 model_pretrained=None,
+                 backbone_indices=(0, 3),
+                 backbone_channels=(256, 2048),
+                 output_stride=16):
+
+        super(DeepLabV3P, self).__init__()
+
+        self.backbone = backbone
+        self.aspp = ASPP(output_stride, backbone_channels[1])
+        self.decoder = Decoder(num_classes, backbone_channels[0])
+        self.backbone_indices = backbone_indices
+        self.init_weight(model_pretrained)
+
+    def forward(self, input, label=None):
+
+        logit_list = []
+        _, feat_list = self.backbone(input)
+        low_level_feat = feat_list[self.backbone_indices[0]]
+        x = feat_list[self.backbone_indices[1]]
+        x = self.aspp(x)
+        logit = self.decoder(x, low_level_feat)
+        logit = F.resize_bilinear(logit, input.shape[2:])
+        logit_list.append(logit)
+
+        return logit_list
+
+    def init_weight(self, pretrained_model=None):
+        """
+        Initialize the parameters of model parts.
+        Args:
+            pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
+        """
+        if pretrained_model is not None:
+            if os.path.exists(pretrained_model):
+                utils.load_pretrained_model(self, pretrained_model)
+            else:
+                raise Exception('Pretrained model is not found: {}'.format(
+                    pretrained_model))
+
+
+@manager.MODELS.add_component
+class DeepLabV3(nn.Layer):
+    """
+    The DeepLabV3 implementation based on PaddlePaddle.
+
+    The orginal article refers to
+    "Rethinking Atrous Convolution for Semantic Image Segmentation"
+     Liang-Chieh Chen, George Papandreou, Florian Schroff, Hartwig Adam.
+     (https://arxiv.org/pdf/1706.05587.pdf)
+
+    Args:
+        Refer to DeepLabV3P above 
+
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 model_pretrained=None,
+                 backbone_indices=(3,),
+                 backbone_channels=(2048,),
+                 output_stride=16):
+
+        super(DeepLabV3, self).__init__()
+
+        self.backbone = backbone
+        self.aspp = ASPP(output_stride, backbone_channels[0])
+        self.cls = nn.Conv2d(
+            in_channels=backbone_channels[0],
+            out_channels=num_classes,
+            kernel_size=1)
+
+        self.backbone_indices = backbone_indices
+        self.init_weight(model_pretrained)
+
+    def forward(self, input, label=None):
+
+        logit_list = []
+        _, feat_list = self.backbone(input)
+        x = feat_list[self.backbone_indices[0]]
+        logit = self.cls(x)
+        logit = F.resize_bilinear(logit, input.shape[2:])
+        logit_list.append(logit)
+
+        return logit_list
+
+    def init_weight(self, pretrained_model=None):
+        """
+        Initialize the parameters of model parts.
+        Args:
+            pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
+        """
+        if pretrained_model is not None:
+            if os.path.exists(pretrained_model):
+                utils.load_pretrained_model(self, pretrained_model)
+            else:
+                raise Exception('Pretrained model is not found: {}'.format(
+                    pretrained_model))
+
+
+class ImageAverage(nn.Layer):
+    """
+    Global average pooling
+
+    Args:
+        in_channels (int): the number of input channels.
+
+    """
+
+    def __init__(self, in_channels):
+        super(ImageAverage, self).__init__()
+        self.conv_bn_relu = layer_utils.ConvBnRelu(
+            in_channels, out_channels=256, kernel_size=1)
+
+    def forward(self, input):
+        x = paddle.reduce_mean(input, dim=[2, 3], keep_dim=True)
+        x = self.conv_bn_relu(x)
+        x = F.resize_bilinear(x, out_shape=input.shape[2:])
+        return x
+
+
+class ASPP(nn.Layer):
+    """
+     Decoder module of DeepLabV3P model
+
+    Args:
+        output_stride (int): the ratio of input size and final feature size. Support 16 or 8.
+
+        in_channels (int): the number of input channels in decoder module.
+
+    """
+
+    def __init__(self, output_stride, in_channels):
+        super(ASPP, self).__init__()
+
+        if output_stride == 16:
+            aspp_ratios = (6, 12, 18)
+        elif output_stride == 8:
+            aspp_ratios = (12, 24, 36)
+        else:
+            raise NotImplementedError(
+                "Only support output_stride is 8 or 16, but received{}".format(
+                    output_stride))
+
+        self.image_average = ImageAverage(in_channels=in_channels)
+
+        # The first aspp using 1*1 conv
+        self.aspp1 = layer_utils.DepthwiseConvBnRelu(
+            in_channels=in_channels, out_channels=256, kernel_size=1)
+
+        # The second aspp using 3*3 (separable) conv at dilated rate aspp_ratios[0]
+        self.aspp2 = layer_utils.DepthwiseConvBnRelu(
+            in_channels=in_channels,
+            out_channels=256,
+            kernel_size=3,
+            dilation=aspp_ratios[0],
+            padding=aspp_ratios[0])
+
+        # The Third aspp using 3*3 (separable) conv at dilated rate aspp_ratios[1]
+        self.aspp3 = layer_utils.DepthwiseConvBnRelu(
+            in_channels=in_channels,
+            out_channels=256,
+            kernel_size=3,
+            dilation=aspp_ratios[1],
+            padding=aspp_ratios[1])
+
+        # The Third aspp using 3*3 (separable) conv at dilated rate aspp_ratios[2]
+        self.aspp4 = layer_utils.DepthwiseConvBnRelu(
+            in_channels=in_channels,
+            out_channels=256,
+            kernel_size=3,
+            dilation=aspp_ratios[2],
+            padding=aspp_ratios[2])
+
+        # After concat op, using 1*1 conv
+        self.conv_bn_relu = layer_utils.ConvBnRelu(
+            in_channels=1280, out_channels=256, kernel_size=1)
+
+    def forward(self, x):
+
+        x1 = self.image_average(x)
+        x2 = self.aspp1(x)
+        x3 = self.aspp2(x)
+        x4 = self.aspp3(x)
+        x5 = self.aspp4(x)
+        x = paddle.concat([x1, x2, x3, x4, x5], axis=1)
+
+        x = self.conv_bn_relu(x)
+        x = F.dropout(x, p=0.1)  # dropout_prob
+        return x
+
+
+class Decoder(nn.Layer):
+    """
+    Decoder module of DeepLabV3P model
+
+    Args:
+        num_classes (int): the number of classes.
+
+        in_channels (int): the number of input channels in decoder module.
+
+    """
+
+    def __init__(self, num_classes, in_channels):
+        super(Decoder, self).__init__()
+
+        self.conv_bn_relu1 = layer_utils.ConvBnRelu(
+            in_channels=in_channels, out_channels=48, kernel_size=1)
+
+        self.conv_bn_relu2 = layer_utils.DepthwiseConvBnRelu(
+            in_channels=304, out_channels=256, kernel_size=3, padding=1)
+        self.conv_bn_relu3 = layer_utils.DepthwiseConvBnRelu(
+            in_channels=256, out_channels=256, kernel_size=3, padding=1)
+        self.conv = nn.Conv2d(
+            in_channels=256, out_channels=num_classes, kernel_size=1)
+
+    def forward(self, x, low_level_feat):
+        low_level_feat = self.conv_bn_relu1(low_level_feat)
+        x = F.resize_bilinear(x, low_level_feat.shape[2:])
+        x = paddle.concat([x, low_level_feat], axis=1)
+        x = self.conv_bn_relu2(x)
+        x = self.conv_bn_relu3(x)
+        x = self.conv(x)
+        return x
diff --git a/dygraph/paddleseg/models/fast_scnn.py b/dygraph/paddleseg/models/fast_scnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..434f083e99d5337a51b3581f906b0a1fc518676e
--- /dev/null
+++ b/dygraph/paddleseg/models/fast_scnn.py
@@ -0,0 +1,340 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.nn.functional as F
+from paddle import nn
+from paddleseg.cvlibs import manager
+from paddleseg.models.common import layer_utils, model_utils
+
+
+@manager.MODELS.add_component
+class FastSCNN(nn.Layer):
+    """
+    The FastSCNN implementation based on PaddlePaddle.
+
+    As mentioned in the original paper, FastSCNN is a real-time segmentation algorithm (123.5fps) 
+    even for high resolution images (1024x2048).
+
+    The orginal artile refers to 
+        Poudel, Rudra PK, et al. "Fast-scnn: Fast semantic segmentation network."
+        (https://arxiv.org/pdf/1902.04502.pdf)
+
+    Args:
+
+        num_classes (int): the unique number of target classes. Default to 2.
+
+        model_pretrained (str): the path of pretrained model. Defaullt to None.
+
+        enable_auxiliary_loss (bool): a bool values indictes whether adding auxiliary loss.
+        if true, auxiliary loss will be added after LearningToDownsample module, where the weight is 0.4. Default to False.
+
+    """
+
+    def __init__(self,
+                 num_classes,
+                 model_pretrained=None,
+                 enable_auxiliary_loss=True):
+
+        super(FastSCNN, self).__init__()
+
+        self.learning_to_downsample = LearningToDownsample(32, 48, 64)
+        self.global_feature_extractor = GlobalFeatureExtractor(
+            64, [64, 96, 128], 128, 6, [3, 3, 3])
+        self.feature_fusion = FeatureFusionModule(64, 128, 128)
+        self.classifier = Classifier(128, num_classes)
+
+        if enable_auxiliary_loss:
+            self.auxlayer = model_utils.AuxLayer(64, 32, num_classes)
+
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+
+        self.init_weight(model_pretrained)
+
+    def forward(self, input, label=None):
+
+        logit_list = []
+        higher_res_features = self.learning_to_downsample(input)
+        x = self.global_feature_extractor(higher_res_features)
+        x = self.feature_fusion(higher_res_features, x)
+        logit = self.classifier(x)
+        logit = F.resize_bilinear(logit, input.shape[2:])
+        logit_list.append(logit)
+
+        if self.enable_auxiliary_loss:
+            auxiliary_logit = self.auxlayer(higher_res_features)
+            auxiliary_logit = F.resize_bilinear(auxiliary_logit,
+                                                input.shape[2:])
+            logit_list.append(auxiliary_logit)
+
+        return logit_list
+
+    def init_weight(self, pretrained_model=None):
+        """
+        Initialize the parameters of model parts.
+        Args:
+            pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
+        """
+        if pretrained_model is not None:
+            if os.path.exists(pretrained_model):
+                utils.load_pretrained_model(self, pretrained_model)
+            else:
+                raise Exception('Pretrained model is not found: {}'.format(
+                    pretrained_model))
+
+
+class LearningToDownsample(nn.Layer):
+    """
+    Learning to downsample module.
+
+    This module consists of three downsampling blocks (one Conv and two separable Conv)
+
+    Args:
+        dw_channels1 (int): the input channels of the first sep conv. Default to 32.
+
+        dw_channels2 (int): the input channels of the second sep conv. Default to 48.
+
+        out_channels (int): the output channels of LearningToDownsample module. Default to 64.
+    """
+
+    def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64):
+        super(LearningToDownsample, self).__init__()
+
+        self.conv_bn_relu = layer_utils.ConvBnRelu(
+            in_channels=3, out_channels=dw_channels1, kernel_size=3, stride=2)
+        self.dsconv_bn_relu1 = layer_utils.DepthwiseConvBnRelu(
+            in_channels=dw_channels1,
+            out_channels=dw_channels2,
+            kernel_size=3,
+            stride=2,
+            padding=1)
+        self.dsconv_bn_relu2 = layer_utils.DepthwiseConvBnRelu(
+            in_channels=dw_channels2,
+            out_channels=out_channels,
+            kernel_size=3,
+            stride=2,
+            padding=1)
+
+    def forward(self, x):
+        x = self.conv_bn_relu(x)
+        x = self.dsconv_bn_relu1(x)
+        x = self.dsconv_bn_relu2(x)
+        return x
+
+
+class GlobalFeatureExtractor(nn.Layer):
+    """
+    Global feature extractor module
+
+    This module consists of three LinearBottleneck blocks (like inverted residual introduced by MobileNetV2) and 
+    a PPModule (introduced by PSPNet).
+
+    Args:
+        in_channels (int): the number of input channels to the module. Default to 64.
+
+        block_channels (tuple): a tuple represents output channels of each bottleneck block. Default to (64, 96, 128).
+
+        out_channels (int): the number of output channels of the module. Default to 128.
+
+        expansion (int): the expansion factor in bottleneck. Default to 6.
+
+        num_blocks (tuple): it indicates the repeat time of each bottleneck. Default to (3, 3, 3).
+    """
+
+    def __init__(self,
+                 in_channels=64,
+                 block_channels=(64, 96, 128),
+                 out_channels=128,
+                 expansion=6,
+                 num_blocks=(3, 3, 3)):
+        super(GlobalFeatureExtractor, self).__init__()
+
+        self.bottleneck1 = self._make_layer(LinearBottleneck, in_channels,
+                                            block_channels[0], num_blocks[0],
+                                            expansion, 2)
+        self.bottleneck2 = self._make_layer(LinearBottleneck, block_channels[0],
+                                            block_channels[1], num_blocks[1],
+                                            expansion, 2)
+        self.bottleneck3 = self._make_layer(LinearBottleneck, block_channels[1],
+                                            block_channels[2], num_blocks[2],
+                                            expansion, 1)
+
+        self.ppm = model_utils.PPModule(
+            block_channels[2], out_channels, dim_reduction=True)
+
+    def _make_layer(self,
+                    block,
+                    in_channels,
+                    out_channels,
+                    blocks,
+                    expansion=6,
+                    stride=1):
+        layers = []
+        layers.append(block(in_channels, out_channels, expansion, stride))
+        for i in range(1, blocks):
+            layers.append(block(out_channels, out_channels, expansion, 1))
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.bottleneck1(x)
+        x = self.bottleneck2(x)
+        x = self.bottleneck3(x)
+        x = self.ppm(x)
+        return x
+
+
+class LinearBottleneck(nn.Layer):
+    """
+    Single bottleneck implementation.
+
+    Args:
+        in_channels (int): the number of input channels to bottleneck block.
+
+        out_channels (int): the number of output channels of bottleneck block.
+
+        expansion (int). the expansion factor in bottleneck. Default to 6.
+
+        stride (int). the stride used in depth-wise conv.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 expansion=6,
+                 stride=2,
+                 **kwargs):
+        super(LinearBottleneck, self).__init__()
+
+        self.use_shortcut = stride == 1 and in_channels == out_channels
+
+        expand_channels = in_channels * expansion
+        self.block = nn.Sequential(
+            # pw
+            layer_utils.ConvBnRelu(
+                in_channels=in_channels,
+                out_channels=expand_channels,
+                kernel_size=1,
+                bias_attr=False),
+            # dw
+            layer_utils.ConvBnRelu(
+                in_channels=expand_channels,
+                out_channels=expand_channels,
+                kernel_size=3,
+                stride=stride,
+                padding=1,
+                groups=expand_channels,
+                bias_attr=False),
+            # pw-linear
+            nn.Conv2d(
+                in_channels=expand_channels,
+                out_channels=out_channels,
+                kernel_size=1,
+                bias_attr=False),
+            nn.SyncBatchNorm(out_channels))
+
+    def forward(self, x):
+        out = self.block(x)
+        if self.use_shortcut:
+            out = x + out
+        return out
+
+
+class FeatureFusionModule(nn.Layer):
+    """
+    Feature Fusion Module Implememtation.
+
+    This module fuses high-resolution feature and low-resolution feature.
+
+    Args:
+        high_in_channels (int): the channels of high-resolution feature (output of LearningToDownsample).
+
+        low_in_channels (int). the channels of low-resolution feature (output of GlobalFeatureExtractor).
+
+        out_channels (int). the output channels of this module.
+    """
+
+    def __init__(self, high_in_channels, low_in_channels, out_channels):
+        super(FeatureFusionModule, self).__init__()
+
+        # There only depth-wise conv is used WITHOUT point-wise conv
+        self.dwconv = layer_utils.ConvBnRelu(
+            in_channels=low_in_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            padding=1,
+            groups=128,
+            bias_attr=False)
+
+        self.conv_low_res = nn.Sequential(
+            nn.Conv2d(
+                in_channels=out_channels,
+                out_channels=out_channels,
+                kernel_size=1), nn.SyncBatchNorm(out_channels))
+
+        self.conv_high_res = nn.Sequential(
+            nn.Conv2d(
+                in_channels=high_in_channels,
+                out_channels=out_channels,
+                kernel_size=1), nn.SyncBatchNorm(out_channels))
+
+        self.relu = nn.ReLU(True)
+
+    def forward(self, high_res_input, low_res_input):
+        low_res_input = F.resize_bilinear(input=low_res_input, scale=4)
+        low_res_input = self.dwconv(low_res_input)
+        low_res_input = self.conv_low_res(low_res_input)
+
+        high_res_input = self.conv_high_res(high_res_input)
+
+        x = high_res_input + low_res_input
+
+        return self.relu(x)
+
+
+class Classifier(nn.Layer):
+    """
+    The Classifier module implemetation.
+
+    This module consists of two depth-wsie conv and one conv.
+
+    Args:
+        input_channels (int): the input channels to this module.
+
+        num_classes (int). the unique number of target classes.
+
+    """
+
+    def __init__(self, input_channels, num_classes):
+        super(Classifier, self).__init__()
+
+        self.dsconv1 = layer_utils.DepthwiseConvBnRelu(
+            in_channels=input_channels,
+            out_channels=input_channels,
+            kernel_size=3,
+            padding=1)
+
+        self.dsconv2 = layer_utils.DepthwiseConvBnRelu(
+            in_channels=input_channels,
+            out_channels=input_channels,
+            kernel_size=3,
+            padding=1)
+
+        self.conv = nn.Conv2d(
+            in_channels=input_channels, out_channels=num_classes, kernel_size=1)
+
+    def forward(self, x):
+        x = self.dsconv1(x)
+        x = self.dsconv2(x)
+        x = F.dropout(x, p=0.1)  # dropout_prob
+        x = self.conv(x)
+        return x
diff --git a/dygraph/paddleseg/models/fcn.py b/dygraph/paddleseg/models/fcn.py
new file mode 100644
index 0000000000000000000000000000000000000000..87446e017d142aa15aa373b9c17976701576a387
--- /dev/null
+++ b/dygraph/paddleseg/models/fcn.py
@@ -0,0 +1,204 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import os
+
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.param_attr import ParamAttr
+from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
+from paddle.fluid.initializer import Normal
+from paddle.nn import SyncBatchNorm as BatchNorm
+
+from paddleseg.cvlibs import manager
+from paddleseg import utils
+from paddleseg.cvlibs import param_init
+from paddleseg.utils import logger
+
+__all__ = [
+    "fcn_hrnet_w18_small_v1", "fcn_hrnet_w18_small_v2", "fcn_hrnet_w18",
+    "fcn_hrnet_w30", "fcn_hrnet_w32", "fcn_hrnet_w40", "fcn_hrnet_w44",
+    "fcn_hrnet_w48", "fcn_hrnet_w60", "fcn_hrnet_w64"
+]
+
+
+@manager.MODELS.add_component
+class FCN(fluid.dygraph.Layer):
+    """
+    Fully Convolutional Networks for Semantic Segmentation.
+    https://arxiv.org/abs/1411.4038
+
+    Args:
+        num_classes (int): the unique number of target classes.
+
+        backbone (paddle.nn.Layer): backbone networks.
+
+        model_pretrained (str): the path of pretrained model.
+
+        backbone_indices (tuple): one values in the tuple indicte the indices of output of backbone.Default -1.
+
+        backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index.
+
+        channels (int): channels after conv layer before the last one.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 backbone_pretrained=None,
+                 model_pretrained=None,
+                 backbone_indices=(-1, ),
+                 backbone_channels=(270, ),
+                 channels=None):
+        super(FCN, self).__init__()
+
+        self.num_classes = num_classes
+        self.backbone_pretrained = backbone_pretrained
+        self.model_pretrained = model_pretrained
+        self.backbone_indices = backbone_indices
+        if channels is None:
+            channels = backbone_channels[backbone_indices[0]]
+
+        self.backbone = backbone
+        self.conv_last_2 = ConvBNLayer(
+            num_channels=backbone_channels[backbone_indices[0]],
+            num_filters=channels,
+            filter_size=1,
+            stride=1)
+        self.conv_last_1 = Conv2D(
+            num_channels=channels,
+            num_filters=self.num_classes,
+            filter_size=1,
+            stride=1,
+            padding=0)
+        if self.training:
+            self.init_weight()
+
+    def forward(self, x):
+        input_shape = x.shape[2:]
+        fea_list = self.backbone(x)
+        x = fea_list[self.backbone_indices[0]]
+        x = self.conv_last_2(x)
+        logit = self.conv_last_1(x)
+        logit = fluid.layers.resize_bilinear(logit, input_shape)
+        return [logit]
+
+    def init_weight(self):
+        params = self.parameters()
+        for param in params:
+            param_name = param.name
+            if 'batch_norm' in param_name:
+                if 'w_0' in param_name:
+                    param_init.constant_init(param, value=1.0)
+                elif 'b_0' in param_name:
+                    param_init.constant_init(param, value=0.0)
+            if 'conv' in param_name and 'w_0' in param_name:
+                param_init.normal_init(param, scale=0.001)
+
+        if self.model_pretrained is not None:
+            if os.path.exists(self.model_pretrained):
+                utils.load_pretrained_model(self, self.model_pretrained)
+            else:
+                raise Exception('Pretrained model is not found: {}'.format(
+                    self.model_pretrained))
+        elif self.backbone_pretrained is not None:
+            if os.path.exists(self.backbone_pretrained):
+                utils.load_pretrained_model(self.backbone,
+                                            self.backbone_pretrained)
+            else:
+                raise Exception('Pretrained model is not found: {}'.format(
+                    self.backbone_pretrained))
+        else:
+            logger.warning('No pretrained model to load, train from scratch')
+
+
+class ConvBNLayer(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 filter_size,
+                 stride=1,
+                 groups=1,
+                 act="relu"):
+        super(ConvBNLayer, self).__init__()
+
+        self._conv = Conv2D(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2,
+            groups=groups,
+            bias_attr=False)
+        self._batch_norm = BatchNorm(num_filters)
+        self.act = act
+
+    def forward(self, input):
+        y = self._conv(input)
+        y = self._batch_norm(y)
+        if self.act == 'relu':
+            y = fluid.layers.relu(y)
+        return y
+
+
+@manager.MODELS.add_component
+def fcn_hrnet_w18_small_v1(*args, **kwargs):
+    return FCN(backbone='HRNet_W18_Small_V1', backbone_channels=(240), **kwargs)
+
+
+@manager.MODELS.add_component
+def fcn_hrnet_w18_small_v2(*args, **kwargs):
+    return FCN(backbone='HRNet_W18_Small_V2', backbone_channels=(270), **kwargs)
+
+
+@manager.MODELS.add_component
+def fcn_hrnet_w18(*args, **kwargs):
+    return FCN(backbone='HRNet_W18', backbone_channels=(270), **kwargs)
+
+
+@manager.MODELS.add_component
+def fcn_hrnet_w30(*args, **kwargs):
+    return FCN(backbone='HRNet_W30', backbone_channels=(450), **kwargs)
+
+
+@manager.MODELS.add_component
+def fcn_hrnet_w32(*args, **kwargs):
+    return FCN(backbone='HRNet_W32', backbone_channels=(480), **kwargs)
+
+
+@manager.MODELS.add_component
+def fcn_hrnet_w40(*args, **kwargs):
+    return FCN(backbone='HRNet_W40', backbone_channels=(600), **kwargs)
+
+
+@manager.MODELS.add_component
+def fcn_hrnet_w44(*args, **kwargs):
+    return FCN(backbone='HRNet_W44', backbone_channels=(660), **kwargs)
+
+
+@manager.MODELS.add_component
+def fcn_hrnet_w48(*args, **kwargs):
+    return FCN(backbone='HRNet_W48', backbone_channels=(720), **kwargs)
+
+
+@manager.MODELS.add_component
+def fcn_hrnet_w60(*args, **kwargs):
+    return FCN(backbone='HRNet_W60', backbone_channels=(900), **kwargs)
+
+
+@manager.MODELS.add_component
+def fcn_hrnet_w64(*args, **kwargs):
+    return FCN(backbone='HRNet_W64', backbone_channels=(960), **kwargs)
diff --git a/dygraph/paddleseg/models/gcnet.py b/dygraph/paddleseg/models/gcnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..97a70d13f6c1f53a6123425f42db1315385d61d1
--- /dev/null
+++ b/dygraph/paddleseg/models/gcnet.py
@@ -0,0 +1,205 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import paddle
+import paddle.nn.functional as F
+from paddle import nn
+from paddleseg.cvlibs import manager
+from paddleseg.models.common import layer_utils, model_utils
+from paddleseg.utils import utils
+
+
+@manager.MODELS.add_component
+class GCNet(nn.Layer):
+    """
+    The GCNet implementation based on PaddlePaddle.
+
+    The orginal artile refers to 
+        Cao, Yue, et al. "GCnet: Non-local networks meet squeeze-excitation networks and beyond."
+        (https://arxiv.org/pdf/1904.11492.pdf)
+
+    Args:
+
+        num_classes (int): the unique number of target classes.
+
+        backbone (Paddle.nn.Layer): backbone network, currently support Resnet50/101.
+
+        model_pretrained (str): the path of pretrained model. Defaullt to None.
+
+        backbone_indices (tuple): two values in the tuple indicte the indices of output of backbone.
+                        the first index will be taken as a deep-supervision feature in auxiliary layer;
+                        the second one will be taken as input of GlobalContextBlock. Usually backbone 
+                        consists of four downsampling stage, and return an output of each stage, so we 
+                        set default (2, 3), which means taking feature map of the third stage (res4b22) 
+                        and the fourth stage (res5c) in backbone.
+
+        backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index.
+
+        gc_channels (int): input channels to Global Context Block. Default to 512.
+
+        ratio (float): it indictes the ratio of attention channels and gc_channels. Default to 1/4.
+
+        enable_auxiliary_loss (bool): a bool values indictes whether adding auxiliary loss. Default to True.
+
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 model_pretrained=None,
+                 backbone_indices=(2, 3),
+                 backbone_channels=(1024, 2048),
+                 gc_channels=512,
+                 ratio=1 / 4,
+                 enable_auxiliary_loss=True,
+                 pretrained_model=None):
+
+        super(GCNet, self).__init__()
+
+        self.backbone = backbone
+
+        in_channels = backbone_channels[1]
+        self.conv_bn_relu1 = layer_utils.ConvBnRelu(
+            in_channels=in_channels,
+            out_channels=gc_channels,
+            kernel_size=3,
+            padding=1)
+
+        self.gc_block = GlobalContextBlock(in_channels=gc_channels, ratio=ratio)
+
+        self.conv_bn_relu2 = layer_utils.ConvBnRelu(
+            in_channels=gc_channels,
+            out_channels=gc_channels,
+            kernel_size=3,
+            padding=1)
+
+        self.conv_bn_relu3 = layer_utils.ConvBnRelu(
+            in_channels=in_channels + gc_channels,
+            out_channels=gc_channels,
+            kernel_size=3,
+            padding=1)
+
+        self.conv = nn.Conv2d(
+            in_channels=gc_channels, out_channels=num_classes, kernel_size=1)
+
+        if enable_auxiliary_loss:
+            self.auxlayer = model_utils.AuxLayer(
+                in_channels=backbone_channels[0],
+                inter_channels=backbone_channels[0] // 4,
+                out_channels=num_classes)
+
+        self.backbone_indices = backbone_indices
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+
+        self.init_weight(model_pretrained)
+
+    def forward(self, input, label=None):
+
+        logit_list = []
+        _, feat_list = self.backbone(input)
+        x = feat_list[self.backbone_indices[1]]
+
+        output = self.conv_bn_relu1(x)
+        output = self.gc_block(output)
+        output = self.conv_bn_relu2(output)
+
+        output = paddle.concat([x, output], axis=1)
+        output = self.conv_bn_relu3(output)
+
+        output = F.dropout(output, p=0.1)  # dropout_prob
+        logit = self.conv(output)
+        logit = F.resize_bilinear(logit, input.shape[2:])
+        logit_list.append(logit)
+
+        if self.enable_auxiliary_loss:
+            low_level_feat = feat_list[self.backbone_indices[0]]
+            auxiliary_logit = self.auxlayer(low_level_feat)
+            auxiliary_logit = F.resize_bilinear(auxiliary_logit,
+                                                input.shape[2:])
+            logit_list.append(auxiliary_logit)
+
+        return logit_list
+
+    def init_weight(self, pretrained_model=None):
+        """
+        Initialize the parameters of model parts.
+        Args:
+            pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
+        """
+        if pretrained_model is not None:
+            if os.path.exists(pretrained_model):
+                utils.load_pretrained_model(self, pretrained_model)
+            else:
+                raise Exception('Pretrained model is not found: {}'.format(
+                    pretrained_model))
+
+
+class GlobalContextBlock(nn.Layer):
+    """
+    Global Context Block implementation.
+
+    Args:
+        in_channels (int): input channels of Global Context Block
+        ratio (float): the channels of attention map.
+    """
+
+    def __init__(self, in_channels, ratio):
+        super(GlobalContextBlock, self).__init__()
+
+        self.conv_mask = nn.Conv2d(
+            in_channels=in_channels, out_channels=1, kernel_size=1)
+        # current paddle version does not support Softmax class
+        # self.softmax = layer_utils.Activation("softmax", dim=2)
+
+        inter_channels = int(in_channels * ratio)
+        self.channel_add_conv = nn.Sequential(
+            nn.Conv2d(
+                in_channels=in_channels,
+                out_channels=inter_channels,
+                kernel_size=1),
+            nn.LayerNorm(normalized_shape=[inter_channels, 1, 1]), nn.ReLU(),
+            nn.Conv2d(
+                in_channels=inter_channels,
+                out_channels=in_channels,
+                kernel_size=1))
+
+    def global_context_block(self, x):
+        batch, channel, height, width = x.shape
+
+        # [N, C, H * W]
+        input_x = paddle.reshape(x, shape=[batch, channel, height * width])
+        # [N, 1, C, H * W]
+        input_x = paddle.unsqueeze(input_x, axis=1)
+        # [N, 1, H, W]
+        context_mask = self.conv_mask(x)
+        # [N, 1, H * W]
+        context_mask = paddle.reshape(
+            context_mask, shape=[batch, 1, height * width])
+        context_mask = F.softmax(context_mask)
+        # [N, 1, H * W, 1]
+        context_mask = paddle.unsqueeze(context_mask, axis=-1)
+        # [N, 1, C, 1]
+        context = paddle.matmul(input_x, context_mask)
+        # [N, C, 1, 1]
+        context = paddle.reshape(context, shape=[batch, channel, 1, 1])
+
+        return context
+
+    def forward(self, x):
+        context = self.global_context_block(x)
+        channel_add_term = self.channel_add_conv(context)
+        out = x + channel_add_term
+        return out
diff --git a/dygraph/paddleseg/models/losses/__init__.py b/dygraph/paddleseg/models/losses/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f58a9fe1dccce025fa5ee9dec8887fbfc3b9deb8
--- /dev/null
+++ b/dygraph/paddleseg/models/losses/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .cross_entroy_loss import CrossEntropyLoss
diff --git a/dygraph/paddleseg/models/losses/cross_entroy_loss.py b/dygraph/paddleseg/models/losses/cross_entroy_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9f49bcd4e84527ea812a608c3fb1e29de6416aa
--- /dev/null
+++ b/dygraph/paddleseg/models/losses/cross_entroy_loss.py
@@ -0,0 +1,141 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+import paddle.fluid as fluid
+
+from paddleseg.cvlibs import manager
+'''
+@manager.LOSSES.add_component
+class CrossEntropyLoss(nn.CrossEntropyLoss):
+    """
+    Implements the cross entropy loss function.
+
+    Args:
+        weight (Tensor): Weight tensor, a manual rescaling weight given
+            to each class and the shape is (C). It has the same dimensions as class
+	        number and the data type is float32, float64. Default ``'None'``.
+        ignore_index (int64): Specifies a target value that is ignored
+            and does not contribute to the input gradient. Default ``255``.
+        reduction (str): Indicate how to average the loss by batch_size,
+            the candicates are ``'none'`` | ``'mean'`` | ``'sum'``.
+            If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned;
+            If :attr:`size_average` is ``'sum'``, the reduced sum loss is returned.
+            If :attr:`reduction` is ``'none'``, the unreduced loss is returned.
+            Default ``'mean'``.
+
+    """
+
+    def __init__(self, weight=None, ignore_index=255, reduction='mean'):
+        self.weight = weight
+        self.ignore_index = ignore_index
+        self.reduction = reduction
+        self.EPS = 1e-5
+        if self.reduction not in ['sum', 'mean', 'none']:
+            raise ValueError(
+                "The value of 'reduction' in cross_entropy_loss should be 'sum', 'mean' or"
+                " 'none', but received %s, which is not allowed." %
+                self.reduction)
+
+    def forward(self, logit, label):
+        """
+        Forward computation.
+        Args:
+            logit (Tensor): logit tensor, the data type is float32, float64. Shape is
+	            (N, C), where C is number of classes, and if shape is more than 2D, this
+	            is (N, C, D1, D2,..., Dk), k >= 1.
+            label (Variable): label tensor, the data type is int64. Shape is (N), where each
+	            value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is
+	            (N, D1, D2,..., Dk), k >= 1.
+        """
+        loss = paddle.nn.functional.cross_entropy(
+            logit,
+            label,
+            weight=self.weight,
+            ignore_index=self.ignore_index,
+            reduction=self.reduction)
+
+        mask = label != self.ignore_index
+        mask = paddle.cast(mask, 'float32')
+        avg_loss = loss / (paddle.mean(mask) + self.EPS)
+
+        label.stop_gradient = True
+        mask.stop_gradient = True
+        return avg_loss
+'''
+
+
+@manager.LOSSES.add_component
+class CrossEntropyLoss(nn.Layer):
+    """
+    Implements the cross entropy loss function.
+
+    Args:
+        ignore_index (int64): Specifies a target value that is ignored
+            and does not contribute to the input gradient. Default ``255``.
+    """
+
+    def __init__(self, ignore_index=255):
+        super(CrossEntropyLoss, self).__init__()
+        self.ignore_index = ignore_index
+        self.EPS = 1e-5
+
+    def forward(self, logit, label):
+        """
+        Forward computation.
+        Args:
+            logit (Tensor): logit tensor, the data type is float32, float64. Shape is
+	            (N, C), where C is number of classes, and if shape is more than 2D, this
+	            is (N, C, D1, D2,..., Dk), k >= 1.
+            label (Variable): label tensor, the data type is int64. Shape is (N), where each
+	            value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is
+	            (N, D1, D2,..., Dk), k >= 1.
+        """
+        if len(label.shape) != len(logit.shape):
+            label = paddle.unsqueeze(label, 1)
+
+        # logit = paddle.transpose(logit, [0, 2, 3, 1])
+        # label = paddle.transpose(label, [0, 2, 3, 1])
+        # loss = F.softmax_with_cross_entropy(
+        #     logit, label, ignore_index=self.ignore_index, axis=-1)
+        # loss = paddle.reduce_mean(loss)
+
+        # mask = label != self.ignore_index
+        # mask = paddle.cast(mask, 'float32')
+        # avg_loss = loss / (paddle.mean(mask) + self.EPS)
+
+        # label.stop_gradient = True
+        # mask.stop_gradient = True
+        # return avg_loss
+
+        logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
+        label = fluid.layers.transpose(label, [0, 2, 3, 1])
+        mask = label != self.ignore_index
+        mask = fluid.layers.cast(mask, 'float32')
+        loss, probs = fluid.layers.softmax_with_cross_entropy(
+            logit,
+            label,
+            ignore_index=self.ignore_index,
+            return_softmax=True,
+            axis=-1)
+
+        loss = loss * mask
+        avg_loss = fluid.layers.mean(loss) / (
+            fluid.layers.mean(mask) + self.EPS)
+
+        label.stop_gradient = True
+        mask.stop_gradient = True
+        return avg_loss
diff --git a/dygraph/paddleseg/models/ocrnet.py b/dygraph/paddleseg/models/ocrnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..78dfd136d7aaf15aed50f598c66ddbf72ac1e242
--- /dev/null
+++ b/dygraph/paddleseg/models/ocrnet.py
@@ -0,0 +1,215 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import paddle.fluid as fluid
+from paddle.fluid.dygraph import Sequential, Conv2D
+
+from paddleseg.cvlibs import manager
+from paddleseg.models.common.layer_utils import ConvBnRelu
+from paddleseg import utils
+
+
+class SpatialGatherBlock(fluid.dygraph.Layer):
+    def forward(self, pixels, regions):
+        n, c, h, w = pixels.shape
+        _, k, _, _ = regions.shape
+
+        # pixels: from (n, c, h, w) to (n, h*w, c)
+        pixels = fluid.layers.reshape(pixels, (n, c, h * w))
+        pixels = fluid.layers.transpose(pixels, (0, 2, 1))
+
+        # regions: from (n, k, h, w) to (n, k, h*w)
+        regions = fluid.layers.reshape(regions, (n, k, h * w))
+        regions = fluid.layers.softmax(regions, axis=2)
+
+        # feats: from (n, k, c) to (n, c, k, 1)
+        feats = fluid.layers.matmul(regions, pixels)
+        feats = fluid.layers.transpose(feats, (0, 2, 1))
+        feats = fluid.layers.unsqueeze(feats, axes=[-1])
+
+        return feats
+
+
+class SpatialOCRModule(fluid.dygraph.Layer):
+    def __init__(self,
+                 in_channels,
+                 key_channels,
+                 out_channels,
+                 dropout_rate=0.1):
+        super(SpatialOCRModule, self).__init__()
+
+        self.attention_block = ObjectAttentionBlock(in_channels, key_channels)
+        self.dropout_rate = dropout_rate
+        self.conv1x1 = Conv2D(2 * in_channels, out_channels, 1)
+
+    def forward(self, pixels, regions):
+        context = self.attention_block(pixels, regions)
+        feats = fluid.layers.concat([context, pixels], axis=1)
+
+        feats = self.conv1x1(feats)
+        feats = fluid.layers.dropout(feats, self.dropout_rate)
+
+        return feats
+
+
+class ObjectAttentionBlock(fluid.dygraph.Layer):
+    def __init__(self, in_channels, key_channels):
+        super(ObjectAttentionBlock, self).__init__()
+
+        self.in_channels = in_channels
+        self.key_channels = key_channels
+
+        self.f_pixel = Sequential(
+            ConvBnRelu(in_channels, key_channels, 1),
+            ConvBnRelu(key_channels, key_channels, 1))
+
+        self.f_object = Sequential(
+            ConvBnRelu(in_channels, key_channels, 1),
+            ConvBnRelu(key_channels, key_channels, 1))
+
+        self.f_down = ConvBnRelu(in_channels, key_channels, 1)
+
+        self.f_up = ConvBnRelu(key_channels, in_channels, 1)
+
+    def forward(self, x, proxy):
+        n, _, h, w = x.shape
+
+        # query : from (n, c1, h1, w1) to (n, h1*w1, key_channels)
+        query = self.f_pixel(x)
+        query = fluid.layers.reshape(query, (n, self.key_channels, -1))
+        query = fluid.layers.transpose(query, (0, 2, 1))
+
+        # key : from (n, c2, h2, w2) to (n, key_channels, h2*w2)
+        key = self.f_object(proxy)
+        key = fluid.layers.reshape(key, (n, self.key_channels, -1))
+
+        # value : from (n, c2, h2, w2) to (n, h2*w2, key_channels)
+        value = self.f_down(proxy)
+        value = fluid.layers.reshape(value, (n, self.key_channels, -1))
+        value = fluid.layers.transpose(value, (0, 2, 1))
+
+        # sim_map (n, h1*w1, h2*w2)
+        sim_map = fluid.layers.matmul(query, key)
+        sim_map = (self.key_channels**-.5) * sim_map
+        sim_map = fluid.layers.softmax(sim_map, axis=-1)
+
+        # context from (n, h1*w1, key_channels) to (n , out_channels, h1, w1)
+        context = fluid.layers.matmul(sim_map, value)
+        context = fluid.layers.transpose(context, (0, 2, 1))
+        context = fluid.layers.reshape(context, (n, self.key_channels, h, w))
+        context = self.f_up(context)
+
+        return context
+
+
+@manager.MODELS.add_component
+class OCRNet(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 model_pretrained=None,
+                 in_channels=None,
+                 ocr_mid_channels=512,
+                 ocr_key_channels=256,
+                 ignore_index=255):
+        super(OCRNet, self).__init__()
+
+        self.ignore_index = ignore_index
+        self.num_classes = num_classes
+        self.EPS = 1e-5
+
+        self.backbone = backbone
+        self.spatial_gather = SpatialGatherBlock()
+        self.spatial_ocr = SpatialOCRModule(ocr_mid_channels, ocr_key_channels,
+                                            ocr_mid_channels)
+        self.conv3x3_ocr = ConvBnRelu(
+            in_channels, ocr_mid_channels, 3, padding=1)
+        self.cls_head = Conv2D(ocr_mid_channels, self.num_classes, 1)
+
+        self.aux_head = Sequential(
+            ConvBnRelu(in_channels, in_channels, 3, padding=1),
+            Conv2D(in_channels, self.num_classes, 1))
+
+        self.init_weight(model_pretrained)
+
+    def forward(self, x, label=None):
+        feats = self.backbone(x)
+
+        soft_regions = self.aux_head(feats)
+        pixels = self.conv3x3_ocr(feats)
+
+        object_regions = self.spatial_gather(pixels, soft_regions)
+        ocr = self.spatial_ocr(pixels, object_regions)
+
+        logit = self.cls_head(ocr)
+        logit = fluid.layers.resize_bilinear(logit, x.shape[2:])
+
+        if self.training:
+            soft_regions = fluid.layers.resize_bilinear(soft_regions,
+                                                        x.shape[2:])
+            cls_loss = self._get_loss(logit, label)
+            aux_loss = self._get_loss(soft_regions, label)
+            return cls_loss + 0.4 * aux_loss
+
+        score_map = fluid.layers.softmax(logit, axis=1)
+        score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1])
+        pred = fluid.layers.argmax(score_map, axis=3)
+        pred = fluid.layers.unsqueeze(pred, axes=[3])
+        return pred, score_map
+
+    def init_weight(self, pretrained_model=None):
+        """
+        Initialize the parameters of model parts.
+        Args:
+            pretrained_model ([str], optional): the path of pretrained model.. Defaults to None.
+        """
+        if pretrained_model is not None:
+            if os.path.exists(pretrained_model):
+                utils.load_pretrained_model(self, pretrained_model)
+            else:
+                raise Exception('Pretrained model is not found: {}'.format(
+                    pretrained_model))
+
+    def _get_loss(self, logit, label):
+        """
+        compute forward loss of the model
+
+        Args:
+            logit (tensor): the logit of model output
+            label (tensor): ground truth
+
+        Returns:
+            avg_loss (tensor): forward loss
+        """
+        logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
+        label = fluid.layers.transpose(label, [0, 2, 3, 1])
+        mask = label != self.ignore_index
+        mask = fluid.layers.cast(mask, 'float32')
+        loss, probs = fluid.layers.softmax_with_cross_entropy(
+            logit,
+            label,
+            ignore_index=self.ignore_index,
+            return_softmax=True,
+            axis=-1)
+
+        loss = loss * mask
+        avg_loss = fluid.layers.mean(loss) / (
+            fluid.layers.mean(mask) + self.EPS)
+
+        label.stop_gradient = True
+        mask.stop_gradient = True
+
+        return avg_loss
diff --git a/dygraph/paddleseg/models/pspnet.py b/dygraph/paddleseg/models/pspnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..764749ce09f4618420d142d1955cf52d9aa5c258
--- /dev/null
+++ b/dygraph/paddleseg/models/pspnet.py
@@ -0,0 +1,123 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import paddle.nn.functional as F
+from paddle import nn
+from paddleseg.cvlibs import manager
+from paddleseg.models.common import model_utils
+from paddleseg.utils import utils
+
+
+@manager.MODELS.add_component
+class PSPNet(nn.Layer):
+    """
+    The PSPNet implementation based on PaddlePaddle.
+
+    The orginal artile refers to
+        Zhao, Hengshuang, et al. "Pyramid scene parsing network."
+        Proceedings of the IEEE conference on computer vision and pattern recognition. 2017.
+        (https://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf)
+
+    Args:
+        num_classes (int): the unique number of target classes.
+
+        backbone (Paddle.nn.Layer): backbone network, currently support Resnet50/101.
+
+        model_pretrained (str): the path of pretrained model. Defaullt to None.
+
+        backbone_indices (tuple): two values in the tuple indicte the indices of output of backbone.
+                        the first index will be taken as a deep-supervision feature in auxiliary layer;
+                        the second one will be taken as input of Pyramid Pooling Module (PPModule).
+                        Usually backbone consists of four downsampling stage, and return an output of
+                        each stage, so we set default (2, 3), which means taking feature map of the third
+                        stage (res4b22) in backbone, and feature map of the fourth stage (res5c) as input of PPModule.
+
+        backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index.
+
+        pp_out_channels (int): output channels after Pyramid Pooling Module. Default to 1024.
+
+        bin_sizes (tuple): the out size of pooled feature maps. Default to (1,2,3,6).
+
+        enable_auxiliary_loss (bool): a bool values indictes whether adding auxiliary loss. Default to True.
+
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 model_pretrained=None,
+                 backbone_indices=(2, 3),
+                 backbone_channels=(1024, 2048),
+                 pp_out_channels=1024,
+                 bin_sizes=(1, 2, 3, 6),
+                 enable_auxiliary_loss=True):
+
+        super(PSPNet, self).__init__()
+
+        self.backbone = backbone
+        self.backbone_indices = backbone_indices
+
+        self.psp_module = model_utils.PPModule(
+            in_channels=backbone_channels[1],
+            out_channels=pp_out_channels,
+            bin_sizes=bin_sizes)
+
+        self.conv = nn.Conv2d(
+            in_channels=pp_out_channels,
+            out_channels=num_classes,
+            kernel_size=1)
+
+        if enable_auxiliary_loss:
+            self.fcn_head = model_utils.FCNHead(
+                in_channels=backbone_channels[0], out_channels=num_classes)
+
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+
+        self.init_weight(model_pretrained)
+
+    def forward(self, input, label=None):
+
+        logit_list = []
+        _, feat_list = self.backbone(input)
+
+        x = feat_list[self.backbone_indices[1]]
+        x = self.psp_module(x)
+        x = F.dropout(x, p=0.1)  # dropout_prob
+        logit = self.conv(x)
+        logit = F.resize_bilinear(logit, input.shape[2:])
+        logit_list.append(logit)
+
+        if self.enable_auxiliary_loss:
+            auxiliary_feat = feat_list[self.backbone_indices[0]]
+            auxiliary_logit = self.fcn_head(auxiliary_feat)
+            auxiliary_logit = F.resize_bilinear(auxiliary_logit,
+                                                input.shape[2:])
+            logit_list.append(auxiliary_logit)
+
+        return logit_list
+
+    def init_weight(self, pretrained_model=None):
+        """
+        Initialize the parameters of model parts.
+        Args:
+            pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
+        """
+        if pretrained_model is not None:
+            if os.path.exists(pretrained_model):
+                utils.load_pretrained_model(self, pretrained_model)
+            else:
+                raise Exception('Pretrained model is not found: {}'.format(
+                    pretrained_model))
diff --git a/dygraph/paddleseg/models/unet.py b/dygraph/paddleseg/models/unet.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7bd847cff52accdaeacdeadf861f4350d338700
--- /dev/null
+++ b/dygraph/paddleseg/models/unet.py
@@ -0,0 +1,203 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import paddle.fluid as fluid
+from paddle.fluid.dygraph import Conv2D, Pool2D
+from paddle.nn import SyncBatchNorm as BatchNorm
+
+from paddleseg.cvlibs import manager
+from paddleseg import utils
+
+
+class UNet(fluid.dygraph.Layer):
+    """
+    U-Net: Convolutional Networks for Biomedical Image Segmentation.
+    https://arxiv.org/abs/1505.04597
+
+    Args:
+        num_classes (int): the unique number of target classes.
+        pretrained_model (str): the path of pretrained model.
+        ignore_index (int): the value of ground-truth mask would be ignored while computing loss or doing evaluation. Default 255.
+    """
+
+    def __init__(self, num_classes, model_pretrained=None, ignore_index=255):
+        super(UNet, self).__init__()
+        self.encode = UnetEncoder()
+        self.decode = UnetDecode()
+        self.get_logit = GetLogit(64, num_classes)
+        self.ignore_index = ignore_index
+        self.EPS = 1e-5
+
+        self.init_weight(model_pretrained)
+
+    def forward(self, x, label=None):
+        encode_data, short_cuts = self.encode(x)
+        decode_data = self.decode(encode_data, short_cuts)
+        logit = self.get_logit(decode_data)
+        if self.training:
+            return self._get_loss(logit, label)
+        else:
+            score_map = fluid.layers.softmax(logit, axis=1)
+            score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1])
+            pred = fluid.layers.argmax(score_map, axis=3)
+            pred = fluid.layers.unsqueeze(pred, axes=[3])
+            return pred, score_map
+
+    def init_weight(self, pretrained_model=None):
+        """
+        Initialize the parameters of model parts.
+        Args:
+            pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
+        """
+        if pretrained_model is not None:
+            if os.path.exists(pretrained_model):
+                utils.load_pretrained_model(self, pretrained_model)
+            else:
+                raise Exception('Pretrained model is not found: {}'.format(
+                    pretrained_model))
+
+    def _get_loss(self, logit, label):
+        logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
+        label = fluid.layers.transpose(label, [0, 2, 3, 1])
+        mask = label != self.ignore_index
+        mask = fluid.layers.cast(mask, 'float32')
+        loss, probs = fluid.layers.softmax_with_cross_entropy(
+            logit,
+            label,
+            ignore_index=self.ignore_index,
+            return_softmax=True,
+            axis=-1)
+
+        loss = loss * mask
+        avg_loss = fluid.layers.mean(loss) / (
+            fluid.layers.mean(mask) + self.EPS)
+
+        label.stop_gradient = True
+        mask.stop_gradient = True
+        return avg_loss
+
+
+class UnetEncoder(fluid.dygraph.Layer):
+    def __init__(self):
+        super(UnetEncoder, self).__init__()
+        self.double_conv = DoubleConv(3, 64)
+        self.down1 = Down(64, 128)
+        self.down2 = Down(128, 256)
+        self.down3 = Down(256, 512)
+        self.down4 = Down(512, 512)
+
+    def forward(self, x):
+        short_cuts = []
+        x = self.double_conv(x)
+        short_cuts.append(x)
+        x = self.down1(x)
+        short_cuts.append(x)
+        x = self.down2(x)
+        short_cuts.append(x)
+        x = self.down3(x)
+        short_cuts.append(x)
+        x = self.down4(x)
+        return x, short_cuts
+
+
+class UnetDecode(fluid.dygraph.Layer):
+    def __init__(self):
+        super(UnetDecode, self).__init__()
+        self.up1 = Up(512, 256)
+        self.up2 = Up(256, 128)
+        self.up3 = Up(128, 64)
+        self.up4 = Up(64, 64)
+
+    def forward(self, x, short_cuts):
+        x = self.up1(x, short_cuts[3])
+        x = self.up2(x, short_cuts[2])
+        x = self.up3(x, short_cuts[1])
+        x = self.up4(x, short_cuts[0])
+        return x
+
+
+class DoubleConv(fluid.dygraph.Layer):
+    def __init__(self, num_channels, num_filters):
+        super(DoubleConv, self).__init__()
+        self.conv0 = Conv2D(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=3,
+            stride=1,
+            padding=1)
+        self.bn0 = BatchNorm(num_filters)
+        self.conv1 = Conv2D(
+            num_channels=num_filters,
+            num_filters=num_filters,
+            filter_size=3,
+            stride=1,
+            padding=1)
+        self.bn1 = BatchNorm(num_filters)
+
+    def forward(self, x):
+        x = self.conv0(x)
+        x = self.bn0(x)
+        x = fluid.layers.relu(x)
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = fluid.layers.relu(x)
+        return x
+
+
+class Down(fluid.dygraph.Layer):
+    def __init__(self, num_channels, num_filters):
+        super(Down, self).__init__()
+        self.max_pool = Pool2D(
+            pool_size=2, pool_type='max', pool_stride=2, pool_padding=0)
+        self.double_conv = DoubleConv(num_channels, num_filters)
+
+    def forward(self, x):
+        x = self.max_pool(x)
+        x = self.double_conv(x)
+        return x
+
+
+class Up(fluid.dygraph.Layer):
+    def __init__(self, num_channels, num_filters):
+        super(Up, self).__init__()
+        self.double_conv = DoubleConv(2 * num_channels, num_filters)
+
+    def forward(self, x, short_cut):
+        short_cut_shape = fluid.layers.shape(short_cut)
+        x = fluid.layers.resize_bilinear(x, short_cut_shape[2:])
+        x = fluid.layers.concat([x, short_cut], axis=1)
+        x = self.double_conv(x)
+        return x
+
+
+class GetLogit(fluid.dygraph.Layer):
+    def __init__(self, num_channels, num_classes):
+        super(GetLogit, self).__init__()
+        self.conv = Conv2D(
+            num_channels=num_channels,
+            num_filters=num_classes,
+            filter_size=3,
+            stride=1,
+            padding=1)
+
+    def forward(self, x):
+        x = self.conv(x)
+        return x
+
+
+@manager.MODELS.add_component
+def unet(*args, **kwargs):
+    return UNet(*args, **kwargs)
diff --git a/dygraph/paddleseg/transforms/__init__.py b/dygraph/paddleseg/transforms/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f1d5ae80aeb1eb77ac672b1cbcfedcbfbd643c4
--- /dev/null
+++ b/dygraph/paddleseg/transforms/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .transforms import *
+from . import functional
diff --git a/dygraph/paddleseg/transforms/functional.py b/dygraph/paddleseg/transforms/functional.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d5a9b10db15edb05692c8aa4249912652e0a745
--- /dev/null
+++ b/dygraph/paddleseg/transforms/functional.py
@@ -0,0 +1,99 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import cv2
+import numpy as np
+from PIL import Image, ImageEnhance
+
+
+def normalize(im, mean, std):
+    im = im.astype(np.float32, copy=False) / 255.0
+    im -= mean
+    im /= std
+    return im
+
+
+def permute(im):
+    im = np.transpose(im, (2, 0, 1))
+    return im
+
+
+def resize(im, target_size=608, interp=cv2.INTER_LINEAR):
+    if isinstance(target_size, list) or isinstance(target_size, tuple):
+        w = target_size[0]
+        h = target_size[1]
+    else:
+        w = target_size
+        h = target_size
+    im = cv2.resize(im, (w, h), interpolation=interp)
+    return im
+
+
+def resize_long(im, long_size=224, interpolation=cv2.INTER_LINEAR):
+    value = max(im.shape[0], im.shape[1])
+    scale = float(long_size) / float(value)
+    resized_width = int(round(im.shape[1] * scale))
+    resized_height = int(round(im.shape[0] * scale))
+
+    im = cv2.resize(
+        im, (resized_width, resized_height), interpolation=interpolation)
+    return im
+
+
+def horizontal_flip(im):
+    if len(im.shape) == 3:
+        im = im[:, ::-1, :]
+    elif len(im.shape) == 2:
+        im = im[:, ::-1]
+    return im
+
+
+def vertical_flip(im):
+    if len(im.shape) == 3:
+        im = im[::-1, :, :]
+    elif len(im.shape) == 2:
+        im = im[::-1, :]
+    return im
+
+
+def brightness(im, brightness_lower, brightness_upper):
+    brightness_delta = np.random.uniform(brightness_lower, brightness_upper)
+    im = ImageEnhance.Brightness(im).enhance(brightness_delta)
+    return im
+
+
+def contrast(im, contrast_lower, contrast_upper):
+    contrast_delta = np.random.uniform(contrast_lower, contrast_upper)
+    im = ImageEnhance.Contrast(im).enhance(contrast_delta)
+    return im
+
+
+def saturation(im, saturation_lower, saturation_upper):
+    saturation_delta = np.random.uniform(saturation_lower, saturation_upper)
+    im = ImageEnhance.Color(im).enhance(saturation_delta)
+    return im
+
+
+def hue(im, hue_lower, hue_upper):
+    hue_delta = np.random.uniform(hue_lower, hue_upper)
+    im = np.array(im.convert('HSV'))
+    im[:, :, 0] = im[:, :, 0] + hue_delta
+    im = Image.fromarray(im, mode='HSV').convert('RGB')
+    return im
+
+
+def rotate(im, rotate_lower, rotate_upper):
+    rotate_delta = np.random.uniform(rotate_lower, rotate_upper)
+    im = im.rotate(int(rotate_delta))
+    return im
diff --git a/dygraph/paddleseg/transforms/transforms.py b/dygraph/paddleseg/transforms/transforms.py
new file mode 100644
index 0000000000000000000000000000000000000000..4693d429d5b00236224c78165c2ef8dbf1ed088e
--- /dev/null
+++ b/dygraph/paddleseg/transforms/transforms.py
@@ -0,0 +1,576 @@
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import random
+from collections import OrderedDict
+
+import numpy as np
+from PIL import Image
+import cv2
+
+from .functional import *
+from paddleseg.cvlibs import manager
+
+
+@manager.TRANSFORMS.add_component
+class Compose:
+    def __init__(self, transforms, to_rgb=True):
+        if not isinstance(transforms, list):
+            raise TypeError('The transforms must be a list!')
+        if len(transforms) < 1:
+            raise ValueError('The length of transforms ' + \
+                            'must be equal or larger than 1!')
+        self.transforms = transforms
+        self.to_rgb = to_rgb
+
+    def __call__(self, im, im_info=None, label=None):
+        if im_info is None:
+            im_info = list()
+        if isinstance(im, str):
+            im = cv2.imread(im).astype('float32')
+        if isinstance(label, str):
+            label = np.asarray(Image.open(label))
+        if im is None:
+            raise ValueError('Can\'t read The image file {}!'.format(im))
+        if self.to_rgb:
+            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+
+        for op in self.transforms:
+            outputs = op(im, im_info, label)
+            im = outputs[0]
+            if len(outputs) >= 2:
+                im_info = outputs[1]
+            if len(outputs) == 3:
+                label = outputs[2]
+        im = permute(im)
+        # if len(outputs) == 3:
+        #     label = label[np.newaxis, :, :]
+        return (im, im_info, label)
+
+
+@manager.TRANSFORMS.add_component
+class RandomHorizontalFlip:
+    def __init__(self, prob=0.5):
+        self.prob = prob
+
+    def __call__(self, im, im_info=None, label=None):
+        if random.random() < self.prob:
+            im = horizontal_flip(im)
+            if label is not None:
+                label = horizontal_flip(label)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+
+
+@manager.TRANSFORMS.add_component
+class RandomVerticalFlip:
+    def __init__(self, prob=0.1):
+        self.prob = prob
+
+    def __call__(self, im, im_info=None, label=None):
+        if random.random() < self.prob:
+            im = vertical_flip(im)
+            if label is not None:
+                label = vertical_flip(label)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+
+
+@manager.TRANSFORMS.add_component
+class Resize:
+    # The interpolation mode
+    interp_dict = {
+        'NEAREST': cv2.INTER_NEAREST,
+        'LINEAR': cv2.INTER_LINEAR,
+        'CUBIC': cv2.INTER_CUBIC,
+        'AREA': cv2.INTER_AREA,
+        'LANCZOS4': cv2.INTER_LANCZOS4
+    }
+
+    def __init__(self, target_size=512, interp='LINEAR'):
+        self.interp = interp
+        if not (interp == "RANDOM" or interp in self.interp_dict):
+            raise ValueError("interp should be one of {}".format(
+                self.interp_dict.keys()))
+        if isinstance(target_size, list) or isinstance(target_size, tuple):
+            if len(target_size) != 2:
+                raise TypeError(
+                    'when target is list or tuple, it should include 2 elements, but it is {}'
+                    .format(target_size))
+        elif not isinstance(target_size, int):
+            raise TypeError(
+                "Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
+                .format(type(target_size)))
+
+        self.target_size = target_size
+
+    def __call__(self, im, im_info=None, label=None):
+        if im_info is None:
+            im_info = list()
+        im_info.append(('resize', im.shape[:2]))
+        if not isinstance(im, np.ndarray):
+            raise TypeError("Resize: image type is not numpy.")
+        if len(im.shape) != 3:
+            raise ValueError('Resize: image is not 3-dimensional.')
+        if self.interp == "RANDOM":
+            interp = random.choice(list(self.interp_dict.keys()))
+        else:
+            interp = self.interp
+        im = resize(im, self.target_size, self.interp_dict[interp])
+        if label is not None:
+            label = resize(label, self.target_size, cv2.INTER_NEAREST)
+
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+
+
+@manager.TRANSFORMS.add_component
+class ResizeByLong:
+    def __init__(self, long_size):
+        self.long_size = long_size
+
+    def __call__(self, im, im_info=None, label=None):
+        if im_info is None:
+            im_info = list()
+
+        im_info.append(('resize', im.shape[:2]))
+        im = resize_long(im, self.long_size)
+        if label is not None:
+            label = resize_long(label, self.long_size, cv2.INTER_NEAREST)
+
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+
+
+@manager.TRANSFORMS.add_component
+class ResizeRangeScaling:
+    def __init__(self, min_value=400, max_value=600):
+        if min_value > max_value:
+            raise ValueError('min_value must be less than max_value, '
+                             'but they are {} and {}.'.format(
+                                 min_value, max_value))
+        self.min_value = min_value
+        self.max_value = max_value
+
+    def __call__(self, im, im_info=None, label=None):
+        if self.min_value == self.max_value:
+            random_size = self.max_value
+        else:
+            random_size = int(
+                np.random.uniform(self.min_value, self.max_value) + 0.5)
+        im = resize_long(im, random_size, cv2.INTER_LINEAR)
+        if label is not None:
+            label = resize_long(label, random_size, cv2.INTER_NEAREST)
+
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+
+
+@manager.TRANSFORMS.add_component
+class ResizeStepScaling:
+    def __init__(self,
+                 min_scale_factor=0.75,
+                 max_scale_factor=1.25,
+                 scale_step_size=0.25):
+        if min_scale_factor > max_scale_factor:
+            raise ValueError(
+                'min_scale_factor must be less than max_scale_factor, '
+                'but they are {} and {}.'.format(min_scale_factor,
+                                                 max_scale_factor))
+        self.min_scale_factor = min_scale_factor
+        self.max_scale_factor = max_scale_factor
+        self.scale_step_size = scale_step_size
+
+    def __call__(self, im, im_info=None, label=None):
+        if self.min_scale_factor == self.max_scale_factor:
+            scale_factor = self.min_scale_factor
+
+        elif self.scale_step_size == 0:
+            scale_factor = np.random.uniform(self.min_scale_factor,
+                                             self.max_scale_factor)
+
+        else:
+            num_steps = int((self.max_scale_factor - self.min_scale_factor) /
+                            self.scale_step_size + 1)
+            scale_factors = np.linspace(self.min_scale_factor,
+                                        self.max_scale_factor,
+                                        num_steps).tolist()
+            np.random.shuffle(scale_factors)
+            scale_factor = scale_factors[0]
+        w = int(round(scale_factor * im.shape[1]))
+        h = int(round(scale_factor * im.shape[0]))
+
+        im = resize(im, (w, h), cv2.INTER_LINEAR)
+        if label is not None:
+            label = resize(label, (w, h), cv2.INTER_NEAREST)
+
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+
+
+@manager.TRANSFORMS.add_component
+class Normalize:
+    def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]):
+        self.mean = mean
+        self.std = std
+        if not (isinstance(self.mean, list) and isinstance(self.std, list)):
+            raise ValueError("{}: input type is invalid.".format(self))
+        from functools import reduce
+        if reduce(lambda x, y: x * y, self.std) == 0:
+            raise ValueError('{}: std is invalid!'.format(self))
+
+    def __call__(self, im, im_info=None, label=None):
+        mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
+        std = np.array(self.std)[np.newaxis, np.newaxis, :]
+        im = normalize(im, mean, std)
+
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+
+
+@manager.TRANSFORMS.add_component
+class Padding:
+    def __init__(self,
+                 target_size,
+                 im_padding_value=[127.5, 127.5, 127.5],
+                 label_padding_value=255):
+        if isinstance(target_size, list) or isinstance(target_size, tuple):
+            if len(target_size) != 2:
+                raise ValueError(
+                    'when target is list or tuple, it should include 2 elements, but it is {}'
+                    .format(target_size))
+        elif not isinstance(target_size, int):
+            raise TypeError(
+                "Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
+                .format(type(target_size)))
+        self.target_size = target_size
+        self.im_padding_value = im_padding_value
+        self.label_padding_value = label_padding_value
+
+    def __call__(self, im, im_info=None, label=None):
+        if im_info is None:
+            im_info = list()
+        im_info.append(('padding', im.shape[:2]))
+
+        im_height, im_width = im.shape[0], im.shape[1]
+        if isinstance(self.target_size, int):
+            target_height = self.target_size
+            target_width = self.target_size
+        else:
+            target_height = self.target_size[1]
+            target_width = self.target_size[0]
+        pad_height = target_height - im_height
+        pad_width = target_width - im_width
+        if pad_height < 0 or pad_width < 0:
+            raise ValueError(
+                'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})'
+                .format(im_width, im_height, target_width, target_height))
+        else:
+            im = cv2.copyMakeBorder(
+                im,
+                0,
+                pad_height,
+                0,
+                pad_width,
+                cv2.BORDER_CONSTANT,
+                value=self.im_padding_value)
+            if label is not None:
+                label = cv2.copyMakeBorder(
+                    label,
+                    0,
+                    pad_height,
+                    0,
+                    pad_width,
+                    cv2.BORDER_CONSTANT,
+                    value=self.label_padding_value)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+
+
+@manager.TRANSFORMS.add_component
+class RandomPaddingCrop:
+    def __init__(self,
+                 crop_size=512,
+                 im_padding_value=[127.5, 127.5, 127.5],
+                 label_padding_value=255):
+        if isinstance(crop_size, list) or isinstance(crop_size, tuple):
+            if len(crop_size) != 2:
+                raise ValueError(
+                    'when crop_size is list or tuple, it should include 2 elements, but it is {}'
+                    .format(crop_size))
+        elif not isinstance(crop_size, int):
+            raise TypeError(
+                "Type of crop_size is invalid. Must be Integer or List or tuple, now is {}"
+                .format(type(crop_size)))
+        self.crop_size = crop_size
+        self.im_padding_value = im_padding_value
+        self.label_padding_value = label_padding_value
+
+    def __call__(self, im, im_info=None, label=None):
+        if isinstance(self.crop_size, int):
+            crop_width = self.crop_size
+            crop_height = self.crop_size
+        else:
+            crop_width = self.crop_size[0]
+            crop_height = self.crop_size[1]
+
+        img_height = im.shape[0]
+        img_width = im.shape[1]
+
+        if img_height == crop_height and img_width == crop_width:
+            if label is None:
+                return (im, im_info)
+            else:
+                return (im, im_info, label)
+        else:
+            pad_height = max(crop_height - img_height, 0)
+            pad_width = max(crop_width - img_width, 0)
+            if (pad_height > 0 or pad_width > 0):
+                im = cv2.copyMakeBorder(
+                    im,
+                    0,
+                    pad_height,
+                    0,
+                    pad_width,
+                    cv2.BORDER_CONSTANT,
+                    value=self.im_padding_value)
+                if label is not None:
+                    label = cv2.copyMakeBorder(
+                        label,
+                        0,
+                        pad_height,
+                        0,
+                        pad_width,
+                        cv2.BORDER_CONSTANT,
+                        value=self.label_padding_value)
+                img_height = im.shape[0]
+                img_width = im.shape[1]
+
+            if crop_height > 0 and crop_width > 0:
+                h_off = np.random.randint(img_height - crop_height + 1)
+                w_off = np.random.randint(img_width - crop_width + 1)
+
+                im = im[h_off:(crop_height + h_off), w_off:(
+                    w_off + crop_width), :]
+                if label is not None:
+                    label = label[h_off:(crop_height + h_off), w_off:(
+                        w_off + crop_width)]
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+
+
+@manager.TRANSFORMS.add_component
+class RandomBlur:
+    def __init__(self, prob=0.1):
+        self.prob = prob
+
+    def __call__(self, im, im_info=None, label=None):
+        if self.prob <= 0:
+            n = 0
+        elif self.prob >= 1:
+            n = 1
+        else:
+            n = int(1.0 / self.prob)
+        if n > 0:
+            if np.random.randint(0, n) == 0:
+                radius = np.random.randint(3, 10)
+                if radius % 2 != 1:
+                    radius = radius + 1
+                if radius > 9:
+                    radius = 9
+                im = cv2.GaussianBlur(im, (radius, radius), 0, 0)
+
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+
+
+@manager.TRANSFORMS.add_component
+class RandomRotation:
+    def __init__(self,
+                 max_rotation=15,
+                 im_padding_value=[127.5, 127.5, 127.5],
+                 label_padding_value=255):
+        self.max_rotation = max_rotation
+        self.im_padding_value = im_padding_value
+        self.label_padding_value = label_padding_value
+
+    def __call__(self, im, im_info=None, label=None):
+        if self.max_rotation > 0:
+            (h, w) = im.shape[:2]
+            do_rotation = np.random.uniform(-self.max_rotation,
+                                            self.max_rotation)
+            pc = (w // 2, h // 2)
+            r = cv2.getRotationMatrix2D(pc, do_rotation, 1.0)
+            cos = np.abs(r[0, 0])
+            sin = np.abs(r[0, 1])
+
+            nw = int((h * sin) + (w * cos))
+            nh = int((h * cos) + (w * sin))
+
+            (cx, cy) = pc
+            r[0, 2] += (nw / 2) - cx
+            r[1, 2] += (nh / 2) - cy
+            dsize = (nw, nh)
+            im = cv2.warpAffine(
+                im,
+                r,
+                dsize=dsize,
+                flags=cv2.INTER_LINEAR,
+                borderMode=cv2.BORDER_CONSTANT,
+                borderValue=self.im_padding_value)
+            label = cv2.warpAffine(
+                label,
+                r,
+                dsize=dsize,
+                flags=cv2.INTER_NEAREST,
+                borderMode=cv2.BORDER_CONSTANT,
+                borderValue=self.label_padding_value)
+
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+
+
+@manager.TRANSFORMS.add_component
+class RandomScaleAspect:
+    def __init__(self, min_scale=0.5, aspect_ratio=0.33):
+        self.min_scale = min_scale
+        self.aspect_ratio = aspect_ratio
+
+    def __call__(self, im, im_info=None, label=None):
+        if self.min_scale != 0 and self.aspect_ratio != 0:
+            img_height = im.shape[0]
+            img_width = im.shape[1]
+            for i in range(0, 10):
+                area = img_height * img_width
+                target_area = area * np.random.uniform(self.min_scale, 1.0)
+                aspectRatio = np.random.uniform(self.aspect_ratio,
+                                                1.0 / self.aspect_ratio)
+
+                dw = int(np.sqrt(target_area * 1.0 * aspectRatio))
+                dh = int(np.sqrt(target_area * 1.0 / aspectRatio))
+                if (np.random.randint(10) < 5):
+                    tmp = dw
+                    dw = dh
+                    dh = tmp
+
+                if (dh < img_height and dw < img_width):
+                    h1 = np.random.randint(0, img_height - dh)
+                    w1 = np.random.randint(0, img_width - dw)
+
+                    im = im[h1:(h1 + dh), w1:(w1 + dw), :]
+                    label = label[h1:(h1 + dh), w1:(w1 + dw)]
+                    im = cv2.resize(
+                        im, (img_width, img_height),
+                        interpolation=cv2.INTER_LINEAR)
+                    label = cv2.resize(
+                        label, (img_width, img_height),
+                        interpolation=cv2.INTER_NEAREST)
+                    break
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+
+
+@manager.TRANSFORMS.add_component
+class RandomDistort:
+    def __init__(self,
+                 brightness_range=0.5,
+                 brightness_prob=0.5,
+                 contrast_range=0.5,
+                 contrast_prob=0.5,
+                 saturation_range=0.5,
+                 saturation_prob=0.5,
+                 hue_range=18,
+                 hue_prob=0.5):
+        self.brightness_range = brightness_range
+        self.brightness_prob = brightness_prob
+        self.contrast_range = contrast_range
+        self.contrast_prob = contrast_prob
+        self.saturation_range = saturation_range
+        self.saturation_prob = saturation_prob
+        self.hue_range = hue_range
+        self.hue_prob = hue_prob
+
+    def __call__(self, im, im_info=None, label=None):
+        brightness_lower = 1 - self.brightness_range
+        brightness_upper = 1 + self.brightness_range
+        contrast_lower = 1 - self.contrast_range
+        contrast_upper = 1 + self.contrast_range
+        saturation_lower = 1 - self.saturation_range
+        saturation_upper = 1 + self.saturation_range
+        hue_lower = -self.hue_range
+        hue_upper = self.hue_range
+        ops = [brightness, contrast, saturation, hue]
+        random.shuffle(ops)
+        params_dict = {
+            'brightness': {
+                'brightness_lower': brightness_lower,
+                'brightness_upper': brightness_upper
+            },
+            'contrast': {
+                'contrast_lower': contrast_lower,
+                'contrast_upper': contrast_upper
+            },
+            'saturation': {
+                'saturation_lower': saturation_lower,
+                'saturation_upper': saturation_upper
+            },
+            'hue': {
+                'hue_lower': hue_lower,
+                'hue_upper': hue_upper
+            }
+        }
+        prob_dict = {
+            'brightness': self.brightness_prob,
+            'contrast': self.contrast_prob,
+            'saturation': self.saturation_prob,
+            'hue': self.hue_prob
+        }
+        im = im.astype('uint8')
+        im = Image.fromarray(im)
+        for id in range(4):
+            params = params_dict[ops[id].__name__]
+            prob = prob_dict[ops[id].__name__]
+            params['im'] = im
+            if np.random.uniform(0, 1) < prob:
+                im = ops[id](**params)
+        im = np.asarray(im).astype('float32')
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
diff --git a/dygraph/paddleseg/utils/__init__.py b/dygraph/paddleseg/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a22f9e5ec0ff32a5e42b6c2d7d6bed14a56994a1
--- /dev/null
+++ b/dygraph/paddleseg/utils/__init__.py
@@ -0,0 +1,21 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import logger
+from . import download
+from .metrics import ConfusionMatrix
+from .utils import *
+from .timer import Timer, calculate_eta
+from .get_environ_info import get_environ_info
+from .config import Config
diff --git a/dygraph/paddleseg/utils/config.py b/dygraph/paddleseg/utils/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..270cabba6281980e79dfd1735756c49169e938ae
--- /dev/null
+++ b/dygraph/paddleseg/utils/config.py
@@ -0,0 +1,241 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import codecs
+import os
+from typing import Any, Callable
+
+import yaml
+import paddle.fluid as fluid
+
+import paddleseg.cvlibs.manager as manager
+
+
+class Config(object):
+    '''
+    Training config.
+
+    Args:
+        path(str) : the path of config file, supports yaml format only
+    '''
+
+    def __init__(self, path: str):
+        if not os.path.exists(path):
+            raise FileNotFoundError('File {} does not exist'.format(path))
+
+        if path.endswith('yml') or path.endswith('yaml'):
+            dic = self._parse_from_yaml(path)
+            print(dic)
+            self._build(dic)
+        else:
+            raise RuntimeError('Config file should in yaml format!')
+
+    def _update_dic(self, dic, base_dic):
+        """
+        update config from dic based base_dic
+        """
+        base_dic = base_dic.copy()
+        for key, val in dic.items():
+            if isinstance(val, dict) and key in base_dic:
+                base_dic[key] = self._update_dic(val, base_dic[key])
+            else:
+                base_dic[key] = val
+        dic = base_dic
+        return dic
+
+    def _parse_from_yaml(self, path: str):
+        '''Parse a yaml file and build config'''
+        with codecs.open(path, 'r', 'utf-8') as file:
+            dic = yaml.load(file, Loader=yaml.FullLoader)
+        if '_base_' in dic:
+            cfg_dir = os.path.dirname(path)
+            base_path = dic.pop('_base_')
+            base_path = os.path.join(cfg_dir, base_path)
+            base_dic = self._parse_from_yaml(base_path)
+            dic = self._update_dic(dic, base_dic)
+        return dic
+
+    def _build(self, dic: dict):
+        '''Build config from dictionary'''
+        dic = dic.copy()
+
+        self._batch_size = dic.get('batch_size', 1)
+        self._iters = dic.get('iters')
+
+        if 'model' not in dic:
+            raise RuntimeError()
+        self._model_cfg = dic['model']
+        self._model = None
+
+        self._train_dataset = dic.get('train_dataset')
+        self._val_dataset = dic.get('val_dataset')
+
+        self._learning_rate_cfg = dic.get('learning_rate', {})
+        self._learning_rate = self._learning_rate_cfg.get('value')
+        self._decay = self._learning_rate_cfg.get('decay', {
+            'type': 'poly',
+            'power': 0.9
+        })
+
+        self._loss_cfg = dic.get('loss', {})
+        self._losses = None
+
+        self._optimizer_cfg = dic.get('optimizer', {})
+
+    def update(self,
+               learning_rate: float = None,
+               batch_size: int = None,
+               iters: int = None):
+        '''Update config'''
+        if learning_rate:
+            self._learning_rate = learning_rate
+
+        if batch_size:
+            self._batch_size = batch_size
+
+        if iters:
+            self._iters = iters
+
+    @property
+    def batch_size(self) -> int:
+        return self._batch_size
+
+    @property
+    def iters(self) -> int:
+        if not self._iters:
+            raise RuntimeError('No iters specified in the configuration file.')
+        return self._iters
+
+    @property
+    def learning_rate(self) -> float:
+        if not self._learning_rate:
+            raise RuntimeError(
+                'No learning rate specified in the configuration file.')
+
+        if self.decay_type == 'poly':
+            lr = self._learning_rate
+            args = self.decay_args
+            args.setdefault('decay_steps', self.iters)
+            return fluid.layers.polynomial_decay(lr, **args)
+        else:
+            raise RuntimeError('Only poly decay support.')
+
+    @property
+    def optimizer(self) -> fluid.optimizer.Optimizer:
+        if self.optimizer_type == 'sgd':
+            lr = self.learning_rate
+            args = self.optimizer_args
+            args.setdefault('momentum', 0.9)
+            return fluid.optimizer.Momentum(
+                lr, parameter_list=self.model.parameters(), **args)
+        else:
+            raise RuntimeError('Only sgd optimizer support.')
+
+    @property
+    def optimizer_type(self) -> str:
+        otype = self._optimizer_cfg.get('type')
+        if not otype:
+            raise RuntimeError(
+                'No optimizer type specified in the configuration file.')
+        return otype
+
+    @property
+    def optimizer_args(self) -> dict:
+        args = self._optimizer_cfg.copy()
+        args.pop('type')
+        return args
+
+    @property
+    def decay_type(self) -> str:
+        return self._decay['type']
+
+    @property
+    def decay_args(self) -> dict:
+        args = self._decay.copy()
+        args.pop('type')
+        return args
+
+    @property
+    def loss(self) -> list:
+        if not self._losses:
+            args = self._loss_cfg.copy()
+            self._losses = dict()
+            for key, val in args.items():
+                if key == 'types':
+                    self._losses['types'] = []
+                    for item in args['types']:
+                        self._losses['types'].append(self._load_object(item))
+                else:
+                    self._losses[key] = val
+            if len(self._losses['coef']) != len(self._losses['types']):
+                raise RuntimeError(
+                    'The length of coef should equal to types in loss config: {} != {}.'
+                    .format(
+                        len(self._losses['coef']), len(self._losses['types'])))
+        return self._losses
+
+    @property
+    def model(self) -> Callable:
+        if not self._model:
+            self._model = self._load_object(self._model_cfg)
+        return self._model
+
+    @property
+    def train_dataset(self) -> Any:
+        if not self._train_dataset:
+            return None
+        return self._load_object(self._train_dataset)
+
+    @property
+    def val_dataset(self) -> Any:
+        if not self._val_dataset:
+            return None
+        return self._load_object(self._val_dataset)
+
+    def _load_component(self, com_name: str) -> Any:
+        com_list = [
+            manager.MODELS, manager.BACKBONES, manager.DATASETS,
+            manager.TRANSFORMS, manager.LOSSES
+        ]
+
+        for com in com_list:
+            if com_name in com.components_dict:
+                return com[com_name]
+        else:
+            raise RuntimeError(
+                'The specified component was not found {}.'.format(com_name))
+
+    def _load_object(self, cfg: dict) -> Any:
+        cfg = cfg.copy()
+        if 'type' not in cfg:
+            raise RuntimeError('No object information in {}.'.format(cfg))
+
+        component = self._load_component(cfg.pop('type'))
+
+        params = {}
+        for key, val in cfg.items():
+            if self._is_meta_type(val):
+                params[key] = self._load_object(val)
+            elif isinstance(val, list):
+                params[key] = [
+                    self._load_object(item)
+                    if self._is_meta_type(item) else item for item in val
+                ]
+            else:
+                params[key] = val
+
+        return component(**params)
+
+    def _is_meta_type(self, item: Any) -> bool:
+        return isinstance(item, dict) and 'type' in item
diff --git a/dygraph/paddleseg/utils/download.py b/dygraph/paddleseg/utils/download.py
new file mode 100644
index 0000000000000000000000000000000000000000..7bf6dd096a4b33587b47bed127673d8fe09aefbb
--- /dev/null
+++ b/dygraph/paddleseg/utils/download.py
@@ -0,0 +1,135 @@
+import os
+import sys
+import time
+import requests
+import tarfile
+import zipfile
+import shutil
+import functools
+
+lasttime = time.time()
+FLUSH_INTERVAL = 0.1
+
+
+def progress(str, end=False):
+    global lasttime
+    if end:
+        str += "\n"
+        lasttime = 0
+    if time.time() - lasttime >= FLUSH_INTERVAL:
+        sys.stdout.write("\r%s" % str)
+        lasttime = time.time()
+        sys.stdout.flush()
+
+
+def _download_file(url, savepath, print_progress):
+    r = requests.get(url, stream=True)
+    total_length = r.headers.get('content-length')
+
+    if total_length is None:
+        with open(savepath, 'wb') as f:
+            shutil.copyfileobj(r.raw, f)
+    else:
+        with open(savepath, 'wb') as f:
+            dl = 0
+            total_length = int(total_length)
+            starttime = time.time()
+            if print_progress:
+                print("Downloading %s" % os.path.basename(savepath))
+            for data in r.iter_content(chunk_size=4096):
+                dl += len(data)
+                f.write(data)
+                if print_progress:
+                    done = int(50 * dl / total_length)
+                    progress("[%-50s] %.2f%%" %
+                             ('=' * done, float(100 * dl) / total_length))
+        if print_progress:
+            progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True)
+
+
+def _uncompress_file_zip(filepath, extrapath):
+    files = zipfile.ZipFile(filepath, 'r')
+    filelist = files.namelist()
+    rootpath = filelist[0]
+    total_num = len(filelist)
+    for index, file in enumerate(filelist):
+        files.extract(file, extrapath)
+        yield total_num, index, rootpath
+    files.close()
+    yield total_num, index, rootpath
+
+
+def _uncompress_file_tar(filepath, extrapath, mode="r:gz"):
+    files = tarfile.open(filepath, mode)
+    filelist = files.getnames()
+    total_num = len(filelist)
+    rootpath = filelist[0]
+    for index, file in enumerate(filelist):
+        files.extract(file, extrapath)
+        yield total_num, index, rootpath
+    files.close()
+    yield total_num, index, rootpath
+
+
+def _uncompress_file(filepath, extrapath, delete_file, print_progress):
+    if print_progress:
+        print("Uncompress %s" % os.path.basename(filepath))
+
+    if filepath.endswith("zip"):
+        handler = _uncompress_file_zip
+    elif filepath.endswith("tgz"):
+        handler = _uncompress_file_tar
+    else:
+        handler = functools.partial(_uncompress_file_tar, mode="r")
+
+    for total_num, index, rootpath in handler(filepath, extrapath):
+        if print_progress:
+            done = int(50 * float(index) / total_num)
+            progress(
+                "[%-50s] %.2f%%" % ('=' * done, float(100 * index) / total_num))
+    if print_progress:
+        progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True)
+
+    if delete_file:
+        os.remove(filepath)
+
+    return rootpath
+
+
+def download_file_and_uncompress(url,
+                                 savepath=None,
+                                 extrapath=None,
+                                 extraname=None,
+                                 print_progress=True,
+                                 cover=False,
+                                 delete_file=True):
+    if savepath is None:
+        savepath = "."
+
+    if extrapath is None:
+        extrapath = "."
+
+    savename = url.split("/")[-1]
+    savepath = os.path.join(savepath, savename)
+    savename = ".".join(savename.split(".")[:-1])
+    savename = os.path.join(extrapath, savename)
+    extraname = savename if extraname is None else os.path.join(
+        extrapath, extraname)
+
+    if cover:
+        if os.path.exists(savepath):
+            shutil.rmtree(savepath)
+        if os.path.exists(savename):
+            shutil.rmtree(savename)
+        if os.path.exists(extraname):
+            shutil.rmtree(extraname)
+
+    if not os.path.exists(extraname):
+        if not os.path.exists(savename):
+            if not os.path.exists(savepath):
+                _download_file(url, savepath, print_progress)
+            savename = _uncompress_file(savepath, extrapath, delete_file,
+                                        print_progress)
+            savename = os.path.join(extrapath, savename)
+        shutil.move(savename, extraname)
+    return extraname
diff --git a/dygraph/paddleseg/utils/get_environ_info.py b/dygraph/paddleseg/utils/get_environ_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d789f4d60e875fd11514fa13b901885be7b0024
--- /dev/null
+++ b/dygraph/paddleseg/utils/get_environ_info.py
@@ -0,0 +1,118 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+from collections import OrderedDict
+import subprocess
+import glob
+
+import paddle
+import paddle.fluid as fluid
+import cv2
+
+IS_WINDOWS = sys.platform == 'win32'
+
+
+def _find_cuda_home():
+    '''Finds the CUDA install path. It refers to the implementation of
+    pytorch <https://github.com/pytorch/pytorch/blob/master/torch/utils/cpp_extension.py>.
+    '''
+    # Guess #1
+    cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
+    if cuda_home is None:
+        # Guess #2
+        try:
+            which = 'where' if IS_WINDOWS else 'which'
+            nvcc = subprocess.check_output([which,
+                                            'nvcc']).decode().rstrip('\r\n')
+            cuda_home = os.path.dirname(os.path.dirname(nvcc))
+        except Exception:
+            # Guess #3
+            if IS_WINDOWS:
+                cuda_homes = glob.glob(
+                    'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*')
+                if len(cuda_homes) == 0:
+                    cuda_home = ''
+                else:
+                    cuda_home = cuda_homes[0]
+            else:
+                cuda_home = '/usr/local/cuda'
+            if not os.path.exists(cuda_home):
+                cuda_home = None
+    return cuda_home
+
+
+def _get_nvcc_info(cuda_home):
+    if cuda_home is not None and os.path.isdir(cuda_home):
+        try:
+            nvcc = os.path.join(cuda_home, 'bin/nvcc')
+            nvcc = subprocess.check_output(
+                "{} -V".format(nvcc), shell=True).decode()
+            nvcc = nvcc.strip().split('\n')[-1]
+        except subprocess.SubprocessError:
+            nvcc = "Not Available"
+    return nvcc
+
+
+def _get_gpu_info():
+    try:
+        gpu_info = subprocess.check_output(['nvidia-smi',
+                                            '-L']).decode().strip()
+        gpu_info = gpu_info.split('\n')
+        for i in range(len(gpu_info)):
+            gpu_info[i] = ' '.join(gpu_info[i].split(' ')[:4])
+    except:
+        gpu_info = ' Can not get GPU information. Please make sure CUDA have been installed successfully.'
+    return gpu_info
+
+
+def get_environ_info():
+    """collect environment information"""
+    env_info = {}
+    env_info['System Platform'] = sys.platform
+    if env_info['System Platform'] == 'linux':
+        try:
+            lsb_v = subprocess.check_output(['lsb_release',
+                                             '-v']).decode().strip()
+            lsb_v = lsb_v.replace('\t', ' ')
+            lsb_d = subprocess.check_output(['lsb_release',
+                                             '-d']).decode().strip()
+            lsb_d = lsb_d.replace('\t', ' ')
+            env_info['LSB'] = [lsb_v, lsb_d]
+        except:
+            pass
+
+    env_info['Python'] = sys.version.replace('\n', '')
+
+    compiled_with_cuda = paddle.fluid.is_compiled_with_cuda()
+    env_info['Paddle compiled with cuda'] = compiled_with_cuda
+
+    if compiled_with_cuda:
+        cuda_home = _find_cuda_home()
+        env_info['NVCC'] = _get_nvcc_info(cuda_home)
+        gpu_nums = fluid.core.get_cuda_device_count()
+        env_info['GPUs used'] = gpu_nums
+        env_info['CUDA_VISIBLE_DEVICES'] = os.environ.get(
+            'CUDA_VISIBLE_DEVICES')
+        env_info['GPU'] = _get_gpu_info()
+
+    gcc = subprocess.check_output(['gcc', '--version']).decode()
+    gcc = gcc.strip().split('\n')[0]
+    env_info['GCC'] = gcc
+
+    env_info['PaddlePaddle'] = paddle.__version__
+    env_info['OpenCV'] = cv2.__version__
+
+    return env_info
diff --git a/dygraph/paddleseg/utils/logger.py b/dygraph/paddleseg/utils/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..015948f65090e40895f6d4a72a75a11f2b155447
--- /dev/null
+++ b/dygraph/paddleseg/utils/logger.py
@@ -0,0 +1,50 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time
+import os
+import sys
+
+from paddle.fluid.dygraph.parallel import ParallelEnv
+
+levels = {0: 'ERROR', 1: 'WARNING', 2: 'INFO', 3: 'DEBUG'}
+log_level = 2
+
+
+def log(level=2, message=""):
+    if ParallelEnv().local_rank == 0:
+        current_time = time.time()
+        time_array = time.localtime(current_time)
+        current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array)
+        if log_level >= level:
+            print(
+                "{} [{}]\t{}".format(current_time, levels[level],
+                                     message).encode("utf-8").decode("latin1"))
+            sys.stdout.flush()
+
+
+def debug(message=""):
+    log(level=3, message=message)
+
+
+def info(message=""):
+    log(level=2, message=message)
+
+
+def warning(message=""):
+    log(level=1, message=message)
+
+
+def error(message=""):
+    log(level=0, message=message)
diff --git a/dygraph/paddleseg/utils/metrics.py b/dygraph/paddleseg/utils/metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..b107cbd57a936fb909086567fc8b703fb86963b7
--- /dev/null
+++ b/dygraph/paddleseg/utils/metrics.py
@@ -0,0 +1,144 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import numpy as np
+from scipy.sparse import csr_matrix
+
+
+class ConfusionMatrix(object):
+    """
+        Confusion Matrix for segmentation evaluation
+    """
+
+    def __init__(self, num_classes=2, streaming=False):
+        self.confusion_matrix = np.zeros([num_classes, num_classes],
+                                         dtype='int64')
+        self.num_classes = num_classes
+        self.streaming = streaming
+
+    def calculate(self, pred, label, ignore=None):
+        # If not in streaming mode, clear matrix everytime when call `calculate`
+        if not self.streaming:
+            self.zero_matrix()
+
+        label = np.transpose(label, (0, 2, 3, 1))
+        ignore = np.transpose(ignore, (0, 2, 3, 1))
+        mask = np.array(ignore) == 1
+
+        label = np.asarray(label)[mask]
+        pred = np.asarray(pred)[mask]
+        one = np.ones_like(pred)
+        # Accumuate ([row=label, col=pred], 1) into sparse matrix
+        spm = csr_matrix((one, (label, pred)),
+                         shape=(self.num_classes, self.num_classes))
+        spm = spm.todense()
+        self.confusion_matrix += spm
+
+    def zero_matrix(self):
+        """ Clear confusion matrix """
+        self.confusion_matrix = np.zeros([self.num_classes, self.num_classes],
+                                         dtype='int64')
+
+    def mean_iou(self):
+        iou_list = []
+        avg_iou = 0
+        # TODO: use numpy sum axis api to simpliy
+        vji = np.zeros(self.num_classes, dtype=int)
+        vij = np.zeros(self.num_classes, dtype=int)
+        for j in range(self.num_classes):
+            v_j = 0
+            for i in range(self.num_classes):
+                v_j += self.confusion_matrix[j][i]
+            vji[j] = v_j
+
+        for i in range(self.num_classes):
+            v_i = 0
+            for j in range(self.num_classes):
+                v_i += self.confusion_matrix[j][i]
+            vij[i] = v_i
+
+        for c in range(self.num_classes):
+            total = vji[c] + vij[c] - self.confusion_matrix[c][c]
+            if total == 0:
+                iou = 0
+            else:
+                iou = float(self.confusion_matrix[c][c]) / total
+            avg_iou += iou
+            iou_list.append(iou)
+        avg_iou = float(avg_iou) / float(self.num_classes)
+        return np.array(iou_list), avg_iou
+
+    def accuracy(self):
+        total = self.confusion_matrix.sum()
+        total_right = 0
+        for c in range(self.num_classes):
+            total_right += self.confusion_matrix[c][c]
+        if total == 0:
+            avg_acc = 0
+        else:
+            avg_acc = float(total_right) / total
+
+        vij = np.zeros(self.num_classes, dtype=int)
+        for i in range(self.num_classes):
+            v_i = 0
+            for j in range(self.num_classes):
+                v_i += self.confusion_matrix[j][i]
+            vij[i] = v_i
+
+        acc_list = []
+        for c in range(self.num_classes):
+            if vij[c] == 0:
+                acc = 0
+            else:
+                acc = self.confusion_matrix[c][c] / float(vij[c])
+            acc_list.append(acc)
+        return np.array(acc_list), avg_acc
+
+    def kappa(self):
+        vji = np.zeros(self.num_classes)
+        vij = np.zeros(self.num_classes)
+        for j in range(self.num_classes):
+            v_j = 0
+            for i in range(self.num_classes):
+                v_j += self.confusion_matrix[j][i]
+            vji[j] = v_j
+
+        for i in range(self.num_classes):
+            v_i = 0
+            for j in range(self.num_classes):
+                v_i += self.confusion_matrix[j][i]
+            vij[i] = v_i
+
+        total = self.confusion_matrix.sum()
+
+        # avoid spillovers
+        # TODO: is it reasonable to hard code 10000.0?
+        total = float(total) / 10000.0
+        vji = vji / 10000.0
+        vij = vij / 10000.0
+
+        tp = 0
+        tc = 0
+        for c in range(self.num_classes):
+            tp += vji[c] * vij[c]
+            tc += self.confusion_matrix[c][c]
+
+        tc = tc / 10000.0
+        pe = tp / (total * total)
+        po = tc / total
+
+        kappa = (po - pe) / (1 - pe)
+        return kappa
diff --git a/dygraph/paddleseg/utils/timer.py b/dygraph/paddleseg/utils/timer.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ebbddc9a154de4a36d6b6d9b437e14382031c49
--- /dev/null
+++ b/dygraph/paddleseg/utils/timer.py
@@ -0,0 +1,60 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time
+
+
+class Timer(object):
+    """ Simple timer class for measuring time consuming """
+
+    def __init__(self):
+        self._start_time = 0.0
+        self._end_time = 0.0
+        self._elapsed_time = 0.0
+        self._is_running = False
+
+    def start(self):
+        self._is_running = True
+        self._start_time = time.time()
+
+    def restart(self):
+        self.start()
+
+    def stop(self):
+        self._is_running = False
+        self._end_time = time.time()
+
+    def elapsed_time(self):
+        self._end_time = time.time()
+        self._elapsed_time = self._end_time - self._start_time
+        if not self.is_running:
+            return 0.0
+
+        return self._elapsed_time
+
+    @property
+    def is_running(self):
+        return self._is_running
+
+
+def calculate_eta(remaining_step, speed):
+    if remaining_step < 0:
+        remaining_step = 0
+    remaining_time = int(remaining_step * speed)
+    result = "{:0>2}:{:0>2}:{:0>2}"
+    arr = []
+    for i in range(2, -1, -1):
+        arr.append(int(remaining_time / 60**i))
+        remaining_time %= 60**i
+    return result.format(*arr)
diff --git a/dygraph/paddleseg/utils/utils.py b/dygraph/paddleseg/utils/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b7d87169a76a196926e7f9e2017ebd42a5605ad
--- /dev/null
+++ b/dygraph/paddleseg/utils/utils.py
@@ -0,0 +1,141 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+import math
+import cv2
+import paddle.fluid as fluid
+
+from . import logger
+
+
+def seconds_to_hms(seconds):
+    h = math.floor(seconds / 3600)
+    m = math.floor((seconds - h * 3600) / 60)
+    s = int(seconds - h * 3600 - m * 60)
+    hms_str = "{}:{}:{}".format(h, m, s)
+    return hms_str
+
+
+def load_pretrained_model(model, pretrained_model):
+    if pretrained_model is not None:
+        logger.info('Load pretrained model from {}'.format(pretrained_model))
+        if os.path.exists(pretrained_model):
+            ckpt_path = os.path.join(pretrained_model, 'model')
+            try:
+                para_state_dict, _ = fluid.load_dygraph(ckpt_path)
+            except:
+                para_state_dict = fluid.load_program_state(pretrained_model)
+
+            model_state_dict = model.state_dict()
+            keys = model_state_dict.keys()
+            num_params_loaded = 0
+            for k in keys:
+                if k not in para_state_dict:
+                    logger.warning("{} is not in pretrained model".format(k))
+                elif list(para_state_dict[k].shape) != list(
+                        model_state_dict[k].shape):
+                    logger.warning(
+                        "[SKIP] Shape of pretrained params {} doesn't match.(Pretrained: {}, Actual: {})"
+                        .format(k, para_state_dict[k].shape,
+                                model_state_dict[k].shape))
+                else:
+                    model_state_dict[k] = para_state_dict[k]
+                    num_params_loaded += 1
+            model.set_dict(model_state_dict)
+            logger.info("There are {}/{} varaibles are loaded.".format(
+                num_params_loaded, len(model_state_dict)))
+
+        else:
+            raise ValueError(
+                'The pretrained model directory is not Found: {}'.format(
+                    pretrained_model))
+    else:
+        logger.warning('No pretrained model to load, train from scratch')
+
+
+def resume(model, optimizer, resume_model):
+    if resume_model is not None:
+        logger.info('Resume model from {}'.format(resume_model))
+        if os.path.exists(resume_model):
+            resume_model = os.path.normpath(resume_model)
+            ckpt_path = os.path.join(resume_model, 'model')
+            para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path)
+            model.set_dict(para_state_dict)
+            optimizer.set_dict(opti_state_dict)
+            epoch = resume_model.split('_')[-1]
+            if epoch.isdigit():
+                epoch = int(epoch)
+            return epoch
+        else:
+            raise ValueError(
+                'The resume model directory is not Found: {}'.format(
+                    resume_model))
+    else:
+        logger.info('No model need to resume')
+
+
+def visualize(image, result, save_dir=None, weight=0.6):
+    """
+    Convert segment result to color image, and save added image.
+    Args:
+        image: the path of origin image
+        result: the predict result of image
+        save_dir: the directory for saving visual image
+        weight: the image weight of visual image, and the result weight is (1 - weight)
+    """
+    color_map = get_color_map_list(256)
+    color_map = np.array(color_map).astype("uint8")
+    # Use OpenCV LUT for color mapping
+    c1 = cv2.LUT(result, color_map[:, 0])
+    c2 = cv2.LUT(result, color_map[:, 1])
+    c3 = cv2.LUT(result, color_map[:, 2])
+    pseudo_img = np.dstack((c1, c2, c3))
+
+    im = cv2.imread(image)
+    vis_result = cv2.addWeighted(im, weight, pseudo_img, 1 - weight, 0)
+
+    if save_dir is not None:
+        if not os.path.exists(save_dir):
+            os.makedirs(save_dir)
+        image_name = os.path.split(image)[-1]
+        out_path = os.path.join(save_dir, image_name)
+        cv2.imwrite(out_path, vis_result)
+    else:
+        return vis_result
+
+
+def get_color_map_list(num_classes):
+    """ Returns the color map for visualizing the segmentation mask,
+        which can support arbitrary number of classes.
+    Args:
+        num_classes: Number of classes
+    Returns:
+        The color map
+    """
+    num_classes += 1
+    color_map = num_classes * [0, 0, 0]
+    for i in range(0, num_classes):
+        j = 0
+        lab = i
+        while lab:
+            color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
+            color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
+            color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
+            j += 1
+            lab >>= 3
+    color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)]
+    color_map = color_map[1:]
+    return color_map
diff --git a/dygraph/train.py b/dygraph/train.py
index 789cdf451d300128aa3341af980dba7664726878..1e56fce70e9e157ca45bf92d7444a327dd2a3951 100644
--- a/dygraph/train.py
+++ b/dygraph/train.py
@@ -17,12 +17,10 @@ import argparse
 import paddle.fluid as fluid
 from paddle.fluid.dygraph.parallel import ParallelEnv
 
-import dygraph
-from dygraph.cvlibs import manager
-from dygraph.utils import get_environ_info
-from dygraph.utils import logger
-from dygraph.utils import Config
-from dygraph.core import train
+import paddleseg
+from paddleseg.cvlibs import manager
+from paddleseg.utils import get_environ_info, logger, Config
+from paddleseg.core import train
 
 
 def parse_args():
diff --git a/dygraph/val.py b/dygraph/val.py
index f4b7d6399c155d629add1888131c0d6bf7430421..6dcf040777aeab35fd742870979b68eac8d5d85b 100644
--- a/dygraph/val.py
+++ b/dygraph/val.py
@@ -17,11 +17,10 @@ import argparse
 import paddle.fluid as fluid
 from paddle.fluid.dygraph.parallel import ParallelEnv
 
-import dygraph
-from dygraph.cvlibs import manager
-from dygraph.utils import get_environ_info
-from dygraph.utils import Config
-from dygraph.core import evaluate
+import paddleseg
+from paddleseg.cvlibs import manager
+from paddleseg.utils import get_environ_info, Config
+from paddleseg.core import evaluate
 
 
 def parse_args():
diff --git a/slim/quantization/eval_quant.py b/slim/quantization/eval_quant.py
index e309858f0deecc53783d6fcc58dfb94ef4014fd6..89d1465e30b89f9e3ce65b16f017cb53d1dff039 100644
--- a/slim/quantization/eval_quant.py
+++ b/slim/quantization/eval_quant.py
@@ -109,7 +109,7 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
         test_prog, startup_prog, phase=ModelPhase.EVAL)
 
     data_loader.set_sample_generator(
-        data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE)
+        data_generator, drop_last=False, batch_size=1)
 
     # Get device environment
     places = fluid.cuda_places() if use_gpu else fluid.cpu_places()
@@ -142,6 +142,7 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
         fluid.io.load_persistables(exe, ckpt_dir, main_program=test_prog)
     if kwargs['convert']:
         test_prog = convert(test_prog, place, config)
+    compiled_test_prog = fluid.CompiledProgram(test_prog)
     # Use streaming confusion matrix to calculate mean_iou
     np.set_printoptions(
         precision=4, suppress=True, linewidth=160, floatmode="fixed")
@@ -157,7 +158,7 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
         try:
             step += 1
             loss, pred, grts, masks = exe.run(
-                test_prog, fetch_list=fetch_list, return_numpy=True)
+                compiled_test_prog, fetch_list=fetch_list, return_numpy=True)
 
             loss = np.mean(np.array(loss))