Merge branch 'develop' of https://github.com/PaddlePaddle/PaddleSeg into develop

b9c9ed27 · wuzewu · 541baa22 · 2ea481f5 · b9c9ed27 · b9c9ed27
88 changed file
--- a/README.md
+++ b/README.md
@@ -89,12 +89,13 @@ pip install -r requirements.txt
 * [数据和配置校验](./docs/check.md)
 * [分割模型介绍](./docs/models.md)
 * [预训练模型下载](./docs/model_zoo.md)
-* [DeepLabv3+模型使用教程](./turtorial/finetune_deeplabv3plus.md)
+* [DeepLabv3+模型使用教程](./tutorial/finetune_deeplabv3plus.md)
-* [U-Net模型使用教程](./turtorial/finetune_unet.md)
+* [U-Net模型使用教程](./tutorial/finetune_unet.md)
-* [ICNet模型使用教程](./turtorial/finetune_icnet.md)
+* [ICNet模型使用教程](./tutorial/finetune_icnet.md)
-* [PSPNet模型使用教程](./turtorial/finetune_pspnet.md)
+* [PSPNet模型使用教程](./tutorial/finetune_pspnet.md)
-* [HRNet模型使用教程](./turtorial/finetune_hrnet.md)
+* [HRNet模型使用教程](./tutorial/finetune_hrnet.md)
-* [Fast-SCNN模型使用教程](./turtorial/finetune_fast_scnn.md)
+* [Fast-SCNN模型使用教程](./tutorial/finetune_fast_scnn.md)
+* [OCRNet模型使用教程](./tutorial/finetune_ocrnet.md)
 ### 预测部署

--- a/configs/deeplabv3p_resnet50_vd_cityscapes.yaml
+++ b/configs/deeplabv3p_resnet50_vd_cityscapes.yaml
+EVAL_CROP_SIZE: (2049, 1025) # (width, height), for unpadding rangescaling and stepscaling
+TRAIN_CROP_SIZE: (769, 769) # (width, height), for unpadding rangescaling and stepscaling
+AUG:
+    AUG_METHOD: "stepscaling" # choice unpadding rangescaling and stepscaling
+    FIX_RESIZE_SIZE: (2048, 1024) # (width, height), for unpadding
+    INF_RESIZE_VALUE: 500  # for rangescaling
+    MAX_RESIZE_VALUE: 600  # for rangescaling
+    MIN_RESIZE_VALUE: 400  # for rangescaling
+    MAX_SCALE_FACTOR: 2.0  # for stepscaling
+    MIN_SCALE_FACTOR: 0.5  # for stepscaling
+    SCALE_STEP_SIZE: 0.25  # for stepscaling
+    MIRROR: True
+    TO_RGB: True
+BATCH_SIZE: 16
+DATASET:
+    DATA_DIR: "./dataset/cityscapes/"
+    IMAGE_TYPE: "rgb"  # choice rgb or rgba
+    NUM_CLASSES: 19
+    TEST_FILE_LIST: "dataset/cityscapes/val.list"
+    TRAIN_FILE_LIST: "dataset/cityscapes/train.list"
+    VAL_FILE_LIST: "dataset/cityscapes/val.list"
+    IGNORE_INDEX: 255
+    SEPARATOR: " "
+FREEZE:
+    MODEL_FILENAME: "model"
+    PARAMS_FILENAME: "params"
+MODEL:
+    DEFAULT_NORM_TYPE: "bn"
+    MODEL_NAME: "deeplabv3p"
+    DEEPLAB:
+        ASPP_WITH_SEP_CONV: True
+        DECODER_USE_SEP_CONV: True
+        BACKBONE: "resnet_vd_50"
+        BACKBONE_LR_MULT_LIST: [0.1, 0.1, 0.2, 0.2, 1.0]
+TRAIN:
+    PRETRAINED_MODEL_DIR: u"pretrained_model/resnet50_vd_imagenet"
+    MODEL_SAVE_DIR: "saved_model/deeplabv3p_resnet50_vd_bn_cityscapes"
+    SNAPSHOT_EPOCH: 10
+    SYNC_BATCH_NORM: True
+TEST:
+    TEST_MODEL: "saved_model/deeplabv3p_resnet50_vd_bn_cityscapes/final"
+SOLVER:
+    LR: 0.05
+    LR_POLICY: "poly"
+    OPTIMIZER: "sgd"
+    NUM_EPOCHS: 700
--- a/configs/ocrnet_w18_cityscapes.yaml
+++ b/configs/ocrnet_w18_cityscapes.yaml
+EVAL_CROP_SIZE: (2048, 1024) # (width, height), for unpadding rangescaling and stepscaling
+TRAIN_CROP_SIZE: (1024, 512) # (width, height), for unpadding rangescaling and stepscaling
+AUG:
+#    AUG_METHOD: "unpadding" # choice unpadding rangescaling and stepscaling
+    AUG_METHOD: "stepscaling" # choice unpadding rangescaling and stepscaling
+    FIX_RESIZE_SIZE: (1024, 512) # (width, height), for unpadding
+    INF_RESIZE_VALUE: 500  # for rangescaling
+    MAX_RESIZE_VALUE: 600  # for rangescaling
+    MIN_RESIZE_VALUE: 400  # for rangescaling
+    MAX_SCALE_FACTOR: 2.0  # for stepscaling
+    MIN_SCALE_FACTOR: 0.5  # for stepscaling
+    SCALE_STEP_SIZE: 0.25  # for stepscaling
+    MIRROR: True
+BATCH_SIZE: 4
+#BATCH_SIZE: 4
+DATASET:
+    DATA_DIR: "./dataset/cityscapes/"
+    IMAGE_TYPE: "rgb"  # choice rgb or rgba
+    NUM_CLASSES: 19
+    TEST_FILE_LIST: "./dataset/cityscapes/val.list"
+    TRAIN_FILE_LIST: "./dataset/cityscapes/train.list"
+    VAL_FILE_LIST: "./dataset/cityscapes/val.list"
+    VIS_FILE_LIST: "./dataset/cityscapes/val.list"
+    IGNORE_INDEX: 255
+    SEPARATOR: " "
+FREEZE:
+    MODEL_FILENAME: "model"
+    PARAMS_FILENAME: "params"
+MODEL:
+    MODEL_NAME: "ocrnet"
+    DEFAULT_NORM_TYPE: "bn"
+    HRNET:
+        STAGE2:
+            NUM_CHANNELS: [18, 36]
+        STAGE3:
+            NUM_CHANNELS: [18, 36, 72]
+        STAGE4:
+            NUM_CHANNELS: [18, 36, 72, 144]
+    OCR:
+        OCR_MID_CHANNELS: 512
+        OCR_KEY_CHANNELS: 256
+    MULTI_LOSS_WEIGHT: [1.0, 1.0]
+TRAIN:
+    PRETRAINED_MODEL_DIR: u"./pretrained_model/ocrnet_w18_cityscape/best_model"
+    MODEL_SAVE_DIR: "output/ocrnet_w18_bn_cityscapes"
+    SNAPSHOT_EPOCH: 1
+    SYNC_BATCH_NORM: True
+TEST:
+    TEST_MODEL: "output/ocrnet_w18_bn_cityscapes/first"
+SOLVER:
+    LR: 0.01
+    LR_POLICY: "poly"
+    OPTIMIZER: "sgd"
+    NUM_EPOCHS: 500
--- a/contrib/ACE2P/README.md
+++ b/contrib/ACE2P/README.md
@@ -37,8 +37,6 @@ ACE2P模型包含三个分支:
 ![](imgs/result.jpg)
-![](ACE2P/imgs/result.jpg)
 人体解析(Human Parsing)是细粒度的语义分割任务，旨在识别像素级别的人类图像的组成部分（例如，身体部位和服装）。本章节使用冠军模型Augmented Context Embedding with Edge Perceiving (ACE2P)进行预测分割。
 ## 代码使用说明
@@ -79,11 +77,11 @@ python -u infer.py --example ACE2P
  原图：
-  ![](ACE2P/imgs/117676_2149260.jpg)
+  ![](imgs/117676_2149260.jpg)
  预测结果：
-  ![](ACE2P/imgs/117676_2149260.png)
+  ![](imgs/117676_2149260.png)
 ### 备注

--- a/contrib/HumanSeg/models/humanseg.py
+++ b/contrib/HumanSeg/models/humanseg.py
@@ -27,6 +27,7 @@ import cv2
 import yaml
 import shutil
 import paddleslim as slim
+import paddle
 import utils
 import utils.logging as logging
@@ -37,6 +38,15 @@ from nets import DeepLabv3p, ShuffleSeg, HRNet
 import transforms as T
+def save_infer_program(test_program, ckpt_dir):
+    _test_program = test_program.clone()
+    _test_program.desc.flush()
+    _test_program.desc._set_version()
+    paddle.fluid.core.save_op_compatible_info(_test_program.desc)
+    with open(os.path.join(ckpt_dir, 'model') + ".pdmodel", "wb") as f:
+        f.write(_test_program.desc.serialize_to_string())
 def dict2str(dict_input):
    out = ''
    for k, v in dict_input.items():
@@ -244,6 +254,7 @@ class SegModel(object):
        if self.status == 'Normal':
            fluid.save(self.train_prog, osp.join(save_dir, 'model'))
+            save_infer_program(self.test_prog, save_dir)
            model_info['status'] = 'Normal'
        elif self.status == 'Quant':
            fluid.save(self.test_prog, osp.join(save_dir, 'model'))

--- a/contrib/RemoteSensing/__init__.py
+++ b/contrib/RemoteSensing/__init__.py
@@ -21,5 +21,3 @@ import readers
 from utils.utils import get_environ_info
 env_info = get_environ_info()
-log_level = 2
--- a/contrib/RemoteSensing/models/base.py
+++ b/contrib/RemoteSensing/models/base.py
@@ -30,6 +30,16 @@ from utils.utils import seconds_to_hms, get_environ_info
 from utils.metrics import ConfusionMatrix
 import transforms.transforms as T
 import utils
+import paddle
+def save_infer_program(test_program, ckpt_dir):
+    _test_program = test_program.clone()
+    _test_program.desc.flush()
+    _test_program.desc._set_version()
+    paddle.fluid.core.save_op_compatible_info(_test_program.desc)
+    with open(os.path.join(ckpt_dir, 'model') + ".pdmodel", "wb") as f:
+        f.write(_test_program.desc.serialize_to_string())
 def dict2str(dict_input):
@@ -238,6 +248,7 @@ class BaseModel(object):
        if self.status == 'Normal':
            fluid.save(self.train_prog, osp.join(save_dir, 'model'))
+            save_infer_program(self.test_prog, save_dir)
        model_info['status'] = self.status
        with open(

--- a/contrib/RemoteSensing/utils/logging.py
+++ b/contrib/RemoteSensing/utils/logging.py
@@ -16,7 +16,6 @@
 import time
 import os
 import sys
-import __init__
 levels = {0: 'ERROR', 1: 'WARNING', 2: 'INFO', 3: 'DEBUG'}
@@ -25,10 +24,9 @@ def log(level=2, message=""):
    current_time = time.time()
    time_array = time.localtime(current_time)
    current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array)
-    if __init__.log_level >= level:
+    print("{} [{}]\t{}".format(current_time, levels[level],
-        print("{} [{}]\t{}".format(current_time, levels[level],
+                               message).encode("utf-8").decode("latin1"))
-                                   message).encode("utf-8").decode("latin1"))
+    sys.stdout.flush()
-        sys.stdout.flush()
 def debug(message=""):

--- a/contrib/SpatialEmbeddings/README.md
+++ b/contrib/SpatialEmbeddings/README.md
+# SpatialEmbeddings 
+## 模型概述
+本模型是基于proposal-free的实例分割模型，快速实时，同时准确率高，适用于自动驾驶等实时场景。
+本模型基于KITTI中MOTS数据集训练得到，是论文 Segment as Points for Efficient Online Multi-Object Tracking and Segmentation中的分割部分
+[论文地址](https://arxiv.org/pdf/2007.01550.pdf)
+## KITTI MOTS指标
+KITTI MOTS验证集AP:0.76, AP_50%:0.915
+## 代码使用说明
+### 1. 模型下载
+执行以下命令下载并解压SpatialEmbeddings预测模型：
+```
+python download_SpatialEmbeddings_kitti.py
+```
+或点击[链接](https://paddleseg.bj.bcebos.com/models/SpatialEmbeddings_kitti.tar)进行手动下载并解压。
+### 2. 数据下载
+前往KITTI官网下载MOTS比赛数据[链接](https://www.vision.rwth-aachen.de/page/mots)
+下载后解压到./data文件夹下, 并生成验证集图片路径的test.txt
+### 3. 快速预测
+使用GPU预测
+```
+python -u infer.py --use_gpu
+```
+使用CPU预测：
+```
+python -u infer.py
+```
+数据及模型路径等详细配置见config.py文件
+#### 4. 预测结果示例：
+  原图：
+  ![](imgs/kitti_0007_000518_ori.png)
+  预测结果：
+  ![](imgs/kitti_0007_000518_pred.png)
+## 引用
+**论文** 
+*Instance Segmentation by Jointly Optimizing Spatial Embeddings and Clustering Bandwidth* 
+**代码**
+https://github.com/davyneven/SpatialEmbeddings
--- a/contrib/SpatialEmbeddings/config.py
+++ b/contrib/SpatialEmbeddings/config.py
+# -*- coding: utf-8 -*-
+from utils.util import AttrDict, merge_cfg_from_args, get_arguments
+import os
+args = get_arguments()
+cfg = AttrDict()
+# 待预测图像所在路径
+cfg.data_dir = "data"
+# 待预测图像名称列表
+cfg.data_list_file = os.path.join("data", "test.txt")
+# 模型加载路径
+cfg.model_path = 'SpatialEmbeddings_kitti'
+# 预测结果保存路径
+cfg.vis_dir = "result"
+# 待预测图像输入尺寸
+cfg.input_size = (384, 1248)
+# sigma值
+cfg.n_sigma = 2
+# 中心点阈值
+cfg.threshold = 0.94
+# 点集数阈值
+cfg.min_pixel = 160
+merge_cfg_from_args(args, cfg)
--- a/contrib/SpatialEmbeddings/data/kitti/0007/kitti_0007_000512.png
+++ b/contrib/SpatialEmbeddings/data/kitti/0007/kitti_0007_000512.png
--- a/contrib/SpatialEmbeddings/data/kitti/0007/kitti_0007_000518.png
+++ b/contrib/SpatialEmbeddings/data/kitti/0007/kitti_0007_000518.png
--- a/contrib/SpatialEmbeddings/data/test.txt
+++ b/contrib/SpatialEmbeddings/data/test.txt
+kitti/0007/kitti_0007_000512.png
+kitti/0007/kitti_0007_000518.png
--- a/contrib/SpatialEmbeddings/download_SpatialEmbeddings_kitti.py
+++ b/contrib/SpatialEmbeddings/download_SpatialEmbeddings_kitti.py
+# coding: utf8
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import os
+LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
+TEST_PATH = os.path.join(LOCAL_PATH, "..", "..", "test")
+sys.path.append(TEST_PATH)
+from test_utils import download_file_and_uncompress
+if __name__ == "__main__":
+    download_file_and_uncompress(
+        url='https://paddleseg.bj.bcebos.com/models/SpatialEmbeddings_kitti.tar',
+        savepath=LOCAL_PATH,
+        extrapath=LOCAL_PATH,
+        extraname='SpatialEmbeddings_kitti')
+    print("Pretrained Model download success!")
--- a/contrib/SpatialEmbeddings/imgs/kitti_0007_000518_ori.png
+++ b/contrib/SpatialEmbeddings/imgs/kitti_0007_000518_ori.png
--- a/contrib/SpatialEmbeddings/imgs/kitti_0007_000518_pred.png
+++ b/contrib/SpatialEmbeddings/imgs/kitti_0007_000518_pred.png
--- a/contrib/SpatialEmbeddings/infer.py
+++ b/contrib/SpatialEmbeddings/infer.py
+# -*- coding: utf-8 -*-
+import os
+import numpy as np
+from utils.util import get_arguments
+from utils.palette import get_palette
+from utils.data_util import Cluster, pad_img
+from PIL import Image as PILImage
+import importlib
+import paddle.fluid as fluid
+from models import SpatialEmbeddings
+args = get_arguments()
+config = importlib.import_module('config')
+cfg = getattr(config, 'cfg')
+cluster = Cluster()
+# 预测数据集类
+class TestDataSet():
+    def __init__(self):
+        self.data_dir = cfg.data_dir 
+        self.data_list_file = cfg.data_list_file
+        self.data_list = self.get_data_list()
+        self.data_num = len(self.data_list)
+    def get_data_list(self):
+        # 获取预测图像路径列表
+        data_list = []
+        data_file_handler = open(self.data_list_file, 'r')
+        for line in data_file_handler:
+            img_name = line.strip()
+            name_prefix = img_name.split('.')[0]
+            if len(img_name.split('.')) == 1:
+                img_name = img_name + '.jpg'
+            img_path = os.path.join(self.data_dir, img_name)
+            data_list.append(img_path)
+        return data_list
+    def preprocess(self, img):
+        # 图像预处理
+        h, w = img.shape[:2]
+        h_new, w_new = cfg.input_size
+        img = np.pad(img, ((0, h_new - h), (0, w_new - w), (0, 0)), 'edge')
+        img = img.astype(np.float32)/255.0
+        img = img.transpose((2, 0, 1))
+        img = np.expand_dims(img, axis=0)
+        return img
+    def get_data(self, index):
+        # 获取图像信息
+        img_path = self.data_list[index]
+        img = np.array(PILImage.open(img_path))
+        if img is None:
+            return img, img,img_path, None
+        img_name = img_path.split(os.sep)[-1]
+        name_prefix = img_name.replace('.'+img_name.split('.')[-1],'')
+        img_shape = img.shape[:2]
+        img_process = self.preprocess(img)
+        return img_process, name_prefix, img_shape
+def get_model(main_prog, startup_prog):
+    img_shape = [3, cfg.input_size[0], cfg.input_size[1]]
+    with fluid.program_guard(main_prog, startup_prog):
+        with fluid.unique_name.guard():
+            input = fluid.layers.data(name='image', shape=img_shape, dtype='float32')
+            output = SpatialEmbeddings(input)
+    return input, output
+def infer():
+    if not os.path.exists(cfg.vis_dir):
+        os.makedirs(cfg.vis_dir)
+    startup_prog = fluid.Program()
+    test_prog = fluid.Program()
+    input, output = get_model(test_prog, startup_prog)
+    test_prog = test_prog.clone(for_test=True)
+    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    exe.run(startup_prog)
+    # 加载预测模型
+    def if_exist(var):
+        return os.path.exists(os.path.join(cfg.model_path, var.name))
+    fluid.io.load_vars(exe, cfg.model_path, main_program=test_prog, predicate=if_exist)
+    #加载预测数据集
+    test_dataset = TestDataSet()
+    data_num = test_dataset.data_num
+    for idx in range(data_num):
+        # 数据获取
+        image, im_name, im_shape = test_dataset.get_data(idx)
+        if image is None:
+            print(im_name, 'is None')
+            continue
+        # 预测
+        outputs = exe.run(program=test_prog, feed={'image': image}, fetch_list=output)
+        instance_map, predictions = cluster.cluster(outputs[0][0], n_sigma=cfg.n_sigma, \
+                                    min_pixel=cfg.min_pixel, threshold=cfg.threshold)
+        # 预测结果保存
+        instance_map = pad_img(instance_map, image.shape[2:])
+        instance_map = instance_map[:im_shape[0], :im_shape[1]]
+        output_im = PILImage.fromarray(np.asarray(instance_map, dtype=np.uint8))
+        palette = get_palette(len(predictions) + 1)
+        output_im.putpalette(palette)
+        result_path = os.path.join(cfg.vis_dir, im_name+'.png')
+        output_im.save(result_path)
+        if (idx + 1) % 100 == 0:
+            print('%d  processd' % (idx + 1))
+    print('%d  processd done' % (idx + 1))   
+    return 0
+if __name__ == "__main__":
+    infer()
--- a/contrib/SpatialEmbeddings/models.py
+++ b/contrib/SpatialEmbeddings/models.py
+from paddle.fluid.initializer import Constant
+from paddle.fluid.param_attr import ParamAttr
+import paddle.fluid as fluid
+def conv(input, 
+        num_filters, 
+        filter_size=None, 
+        stride=1, 
+        padding=0, 
+        dilation=1,
+        act=None,        
+        name='conv'):
+    return fluid.layers.conv2d(input,
+            filter_size=filter_size,
+            num_filters=num_filters, 
+            stride=stride, 
+            padding=padding, 
+            dilation=dilation,
+            act=act,
+            name=name, 
+            param_attr=name+'_weights', 
+            bias_attr=name+'_bias')
+def conv_transpose(input, 
+        num_filters, 
+        output_size=None,
+        filter_size=None, 
+        stride=1, 
+        padding=0, 
+        act=None,        
+        name='conv_transpose'):
+    return fluid.layers.conv2d_transpose(input,
+            filter_size=filter_size,
+            num_filters=num_filters, 
+            stride=stride, 
+            padding=padding, 
+            act=act,
+            name=name, 
+            param_attr=name+'_weights', 
+            bias_attr=name+'_bias')
+EPSILON=0.0010000000474974513
+def bn(input, name):
+    bn_id = name.replace('batch_norm','')
+    return fluid.layers.batch_norm(input, 
+            is_test=True, 
+            epsilon=EPSILON,
+            param_attr='bn_scale'+bn_id + '_scale', 
+            bias_attr='bn_scale'+bn_id+'_offset',
+            moving_mean_name=name + '_mean', 
+            moving_variance_name=name + '_variance', 
+            name=name)
+def max_pool(input, pool_size=2, pool_stride=2, name=None):
+    return fluid.layers.pool2d(input, 
+            pool_size=pool_size, 
+            pool_stride=pool_stride, 
+            ceil_mode=True, 
+            pool_type='max', 
+            exclusive=False, 
+            name=name)
+def SpatialEmbeddings(input):
+    conv1 = conv(input, filter_size=3, num_filters=13, stride=2, padding=1, name='conv1')
+    max_pool1 = fluid.layers.pool2d(input, pool_size=2, pool_stride=2, name='max_pool1')
+    cat1 = fluid.layers.concat([conv1, max_pool1], axis=1, name='cat1')
+    bn_scale1 = bn(cat1, name='batch_norm1')
+    relu1 = fluid.layers.relu(bn_scale1)
+    conv2 = conv(relu1, filter_size=3, num_filters=48, stride=2, padding=1, name='conv2')
+    max_pool2 = fluid.layers.pool2d(relu1, pool_size=2, pool_stride=2, name='max_pool2')
+    cat2 = fluid.layers.concat([conv2, max_pool2], axis=1, name='cat2')
+    bn_scale2 = bn(cat2, name='batch_norm2')
+    relu2 = fluid.layers.relu(bn_scale2)
+    relu3 = conv(relu2, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv3', act='relu')
+    conv4 = conv(relu3, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv4')
+    bn_scale3 = bn(conv4, name='batch_norm3')
+    relu4 = fluid.layers.relu(bn_scale3)
+    relu5 = conv(relu4, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv5', act='relu')
+    conv6 = conv(relu5, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv6')
+    bn_scale4 = bn(conv6,  name='batch_norm4')
+    add1 = fluid.layers.elementwise_add(x=bn_scale4, y=relu2, name='add1')
+    relu6 = fluid.layers.relu(add1)
+    relu7 = conv(relu6, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv7', act='relu')
+    conv8 = conv(relu7, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv8')
+    bn_scale5 = bn(conv8,  name='batch_norm5')
+    relu8 = fluid.layers.relu(bn_scale5)
+    relu9 = conv(relu8, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv9', act='relu')
+    conv10 = conv(relu9, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv10')
+    bn_scale6 = bn(conv10,  name='batch_norm6')
+    add2 = fluid.layers.elementwise_add(x=bn_scale6, y=relu6, name='add2')
+    relu10 = fluid.layers.relu(add2)
+    relu11 = conv(relu10, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv11', act='relu')
+    conv12 = conv(relu11, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv12')
+    bn_scale7 = bn(conv12,  name='batch_norm7')
+    relu12 = fluid.layers.relu(bn_scale7)
+    relu13 = conv(relu12, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv13', act='relu')
+    conv14 = conv(relu13, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv14')
+    bn_scale8 = bn(conv14,  name='batch_norm8')
+    add3 = fluid.layers.elementwise_add(x=bn_scale8, y=relu10, name='add3')
+    relu14 = fluid.layers.relu(add3)
+    relu15 = conv(relu14, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv15', act='relu')
+    conv16 = conv(relu15, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv16')
+    bn_scale9 = bn(conv16,  name='batch_norm9')
+    relu16 = fluid.layers.relu(bn_scale9)
+    relu17 = conv(relu16, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv17', act='relu')
+    conv18 = conv(relu17, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv18')
+    bn_scale10 = bn(conv18,  name='batch_norm10')
+    add4 = fluid.layers.elementwise_add(x=bn_scale10, y=relu14, name='add4')
+    relu18 = fluid.layers.relu(add4)
+    relu19 = conv(relu18, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv19', act='relu')
+    conv20 = conv(relu19, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv20')
+    bn_scale11 = bn(conv20,  name='batch_norm11')
+    relu20 = fluid.layers.relu(bn_scale11)
+    relu21 = conv(relu20, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv21', act='relu')
+    conv22 = conv(relu21, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv22')
+    bn_scale12 = bn(conv22,  name='batch_norm12')
+    add5 = fluid.layers.elementwise_add(x=bn_scale12, y=relu18, name='add5')
+    relu22 = fluid.layers.relu(add5)
+    conv23 = conv(relu22, filter_size=3, num_filters=64, stride=2, padding=1, name='conv23')
+    max_pool3 = fluid.layers.pool2d(relu22, pool_size=2, pool_stride=2, name='max_pool3')
+    cat3 = fluid.layers.concat([conv23, max_pool3], axis=1, name='cat3')
+    bn_scale13 = bn(cat3,  name='batch_norm13')
+    relu23 = fluid.layers.relu(bn_scale13)
+    relu24 = conv(relu23, filter_size=[3, 1], num_filters=128, padding=[1, 0], name='conv24', act='relu')
+    conv25 = conv(relu24, filter_size=[1, 3], num_filters=128, padding=[0, 1], name='conv25')
+    bn_scale14 = bn(conv25,  name='batch_norm14')
+    relu25 = fluid.layers.relu(bn_scale14)
+    relu26 = conv(relu25, filter_size=[3, 1], num_filters=128, padding=[2, 0], dilation=[2, 1], name='conv26', act='relu')
+    conv27 = conv(relu26, filter_size=[1, 3], num_filters=128, padding=[0, 2], dilation=[1, 2], name='conv27')
+    bn_scale15 = bn(conv27,  name='batch_norm15')
+    add6 = fluid.layers.elementwise_add(x=bn_scale15, y=relu23, name='add6')
+    relu27 = fluid.layers.relu(add6)
+    relu28 = conv(relu27, filter_size=[3, 1], num_filters=128, padding=[1, 0], name='conv28', act='relu')
+    conv29 = conv(relu28, filter_size=[1, 3], num_filters=128, padding=[0, 1], name='conv29')
+    bn_scale16 = bn(conv29,  name='batch_norm16')
+    relu29 = fluid.layers.relu(bn_scale16)
+    relu30 = conv(relu29, filter_size=[3, 1], num_filters=128, padding=[4, 0], dilation=[4, 1], name='conv30', act='relu')
+    conv31 = conv(relu30, filter_size=[1, 3], num_filters=128, padding=[0, 4], dilation=[1, 4], name='conv31')
+    bn_scale17 = bn(conv31,  name='batch_norm17')
+    add7 = fluid.layers.elementwise_add(x=bn_scale17, y=relu27, name='add7')
+    relu31 = fluid.layers.relu(add7)
+    relu32 = conv(relu31, filter_size=[3, 1], num_filters=128, padding=[1, 0], name='conv32', act='relu')
+    conv33 = conv(relu32, filter_size=[1, 3], num_filters=128, padding=[0, 1], name='conv33')
+    bn_scale18 = bn(conv33,  name='batch_norm18')
+    relu33 = fluid.layers.relu(bn_scale18)
+    relu34 = conv(relu33, filter_size=[3, 1], num_filters=128, padding=[8, 0], dilation=[8, 1], name='conv34', act='relu')
+    conv35 = conv(relu34, filter_size=[1, 3], num_filters=128, padding=[0, 8], dilation=[1, 8], name='conv35')
+    bn_scale19 = bn(conv35,  name='batch_norm19')
+    add8 = fluid.layers.elementwise_add(x=bn_scale19, y=relu31, name='add8')
+    relu35 = fluid.layers.relu(add8)
+    relu36 = conv(relu35, filter_size=[3, 1], num_filters=128, padding=[1, 0], name='conv36', act='relu')
+    conv37 = conv(relu36, filter_size=[1, 3], num_filters=128, padding=[0, 1], name='conv37')
+    bn_scale20 = bn(conv37,  name='batch_norm20')
+    relu37 = fluid.layers.relu(bn_scale20)
+    relu38 = conv(relu37, filter_size=[3, 1], num_filters=128, padding=[16, 0], dilation=[16, 1], name='conv38', act='relu')
+    conv39 = conv(relu38, filter_size=[1, 3], num_filters=128, padding=[0, 16], dilation=[1, 16], name='conv39')
+    bn_scale21 = bn(conv39,  name='batch_norm21')
+    add9 = fluid.layers.elementwise_add(x=bn_scale21, y=relu35, name='add9')
+    relu39 = fluid.layers.relu(add9)
+    relu40 = conv(relu39, filter_size=[3, 1], num_filters=128, padding=[1, 0], name='conv40', act='relu')
+    conv41 = conv(relu40, filter_size=[1, 3], num_filters=128, padding=[0, 1], name='conv41')
+    bn_scale22 = bn(conv41,  name='batch_norm22')
+    relu41 = fluid.layers.relu(bn_scale22)
+    relu42 = conv(relu41, filter_size=[3, 1], num_filters=128, padding=[2, 0], dilation=[2, 1], name='conv42', act='relu')
+    conv43 = conv(relu42, filter_size=[1, 3], num_filters=128, padding=[0, 2], dilation=[1, 2], name='conv43')
+    bn_scale23 = bn(conv43,  name='batch_norm23')
+    add10 = fluid.layers.elementwise_add(x=bn_scale23, y=relu39, name='add10')
+    relu43 = fluid.layers.relu(add10)
+    relu44 = conv(relu43, filter_size=[3, 1], num_filters=128, padding=[1, 0], name='conv44', act='relu')
+    conv45 = conv(relu44, filter_size=[1, 3], num_filters=128, padding=[0, 1], name='conv45')
+    bn_scale24 = bn(conv45,  name='batch_norm24')
+    relu45 = fluid.layers.relu(bn_scale24)
+    relu46 = conv(relu45, filter_size=[3, 1], num_filters=128, padding=[4, 0], dilation=[4, 1], name='conv46', act='relu')
+    conv47 = conv(relu46, filter_size=[1, 3], num_filters=128, padding=[0, 4], dilation=[1, 4], name='conv47')
+    bn_scale25 = bn(conv47,  name='batch_norm25')
+    add11 = fluid.layers.elementwise_add(x=bn_scale25, y=relu43, name='add11')
+    relu47 = fluid.layers.relu(add11)
+    relu48 = conv(relu47, filter_size=[3, 1], num_filters=128, padding=[1, 0], name='conv48', act='relu')
+    conv49 = conv(relu48, filter_size=[1, 3], num_filters=128, padding=[0, 1], name='conv49')
+    bn_scale26 = bn(conv49,  name='batch_norm26')
+    relu49 = fluid.layers.relu(bn_scale26)
+    relu50 = conv(relu49, filter_size=[3, 1], num_filters=128, padding=[8, 0], dilation=[8, 1], name='conv50', act='relu')
+    conv51 = conv(relu50, filter_size=[1, 3], num_filters=128, padding=[0, 8], dilation=[1, 8], name='conv51')
+    bn_scale27 = bn(conv51,  name='batch_norm27')
+    add12 = fluid.layers.elementwise_add(x=bn_scale27, y=relu47, name='add12')
+    relu51 = fluid.layers.relu(add12)
+    relu52 = conv(relu51, filter_size=[3, 1], num_filters=128, padding=[1, 0], name='conv52', act='relu')
+    conv53 = conv(relu52, filter_size=[1, 3], num_filters=128, padding=[0, 1], name='conv53')
+    bn_scale28 = bn(conv53,  name='batch_norm28')
+    relu53 = fluid.layers.relu(bn_scale28)
+    relu54 = conv(relu53, filter_size=[3, 1], num_filters=128, padding=[16, 0], dilation=[16, 1], name='conv54', act='relu')
+    conv55 = conv(relu54, filter_size=[1, 3], num_filters=128, padding=[0, 16], dilation=[1, 16], name='conv55')
+    bn_scale29 = bn(conv55,  name='batch_norm29')
+    add13 = fluid.layers.elementwise_add(x=bn_scale29, y=relu51, name='add13')
+    relu55 = fluid.layers.relu(add13)
+    conv_transpose1 = conv_transpose(relu55, filter_size=3, num_filters=64, stride=2, padding=1, name='conv_transpose1')
+    conv_transpose4 = conv_transpose(relu55, filter_size=3, num_filters=64, stride=2, padding=1, name='conv_transpose4')
+    bn_scale30 = bn(conv_transpose1,  name='batch_norm30')
+    bn_scale40 = bn(conv_transpose4,  name='batch_norm40')
+    relu56 = fluid.layers.relu(bn_scale30)
+    relu74 = fluid.layers.relu(bn_scale40)
+    relu57 = conv(relu56, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv56', act='relu')
+    relu75 = conv(relu74, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv72', act='relu')
+    conv57 = conv(relu57, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv57')
+    conv73 = conv(relu75, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv73')
+    bn_scale31 = bn(conv57,  name='batch_norm31')
+    bn_scale41 = bn(conv73,  name='batch_norm41')
+    relu58 = fluid.layers.relu(bn_scale31)
+    relu76 = fluid.layers.relu(bn_scale41)
+    relu59 = conv(relu58, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv58', act='relu')
+    relu77 = conv(relu76, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv74', act='relu')
+    conv59 = conv(relu59, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv59')
+    conv75 = conv(relu77, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv75')
+    bn_scale32 = bn(conv59,  name='batch_norm32')
+    bn_scale42 = bn(conv75,  name='batch_norm42')
+    add14 = fluid.layers.elementwise_add(x=bn_scale32, y=relu56, name='add14')
+    add18 = fluid.layers.elementwise_add(x=bn_scale42, y=relu74, name='add18')
+    relu60 = fluid.layers.relu(add14)
+    relu78 = fluid.layers.relu(add18)
+    relu61 = conv(relu60, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv60', act='relu')
+    relu79 = conv(relu78, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv76', act='relu')
+    conv61 = conv(relu61, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv61')
+    conv77 = conv(relu79, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv77')
+    bn_scale33 = bn(conv61,  name='batch_norm33')
+    bn_scale43 = bn(conv77,  name='batch_norm43')
+    relu62 = fluid.layers.relu(bn_scale33)
+    relu80 = fluid.layers.relu(bn_scale43)
+    relu63 = conv(relu62, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv62', act='relu')
+    relu81 = conv(relu80, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv78', act='relu')
+    conv63 = conv(relu63, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv63')
+    conv79 = conv(relu81, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv79')
+    bn_scale34 = bn(conv63,  name='batch_norm34')
+    bn_scale44 = bn(conv79,  name='batch_norm44')
+    add15 = fluid.layers.elementwise_add(x=bn_scale34, y=relu60, name='add15')
+    add19 = fluid.layers.elementwise_add(x=bn_scale44, y=relu78, name='add19')
+    relu64 = fluid.layers.relu(add15)
+    relu82 = fluid.layers.relu(add19)
+    conv_transpose2 = conv_transpose(relu64, filter_size=3, num_filters=16, stride=2, padding=1, name='conv_transpose2')
+    conv_transpose5 = conv_transpose(relu82, filter_size=3, num_filters=16, stride=2, padding=1, name='conv_transpose5')
+    bn_scale35 = bn(conv_transpose2,  name='batch_norm35')
+    bn_scale45 = bn(conv_transpose5,  name='batch_norm45')
+    relu65 = fluid.layers.relu(bn_scale35)
+    relu83 = fluid.layers.relu(bn_scale45)
+    relu66 = conv(relu65, filter_size=[3, 1], num_filters=16, padding=[1, 0], name='conv64', act='relu')
+    relu84 = conv(relu83, filter_size=[3, 1], num_filters=16, padding=[1, 0], name='conv80', act='relu')
+    conv65 = conv(relu66, filter_size=[1, 3], num_filters=16, padding=[0, 1], name='conv65')
+    conv81 = conv(relu84, filter_size=[1, 3], num_filters=16, padding=[0, 1], name='conv81')
+    bn_scale36 = bn(conv65,  name='batch_norm36')
+    bn_scale46 = bn(conv81,  name='batch_norm46')
+    relu67 = fluid.layers.relu(bn_scale36)
+    relu85 = fluid.layers.relu(bn_scale46)
+    relu68 = conv(relu67, filter_size=[3, 1], num_filters=16, padding=[1, 0], name='conv66', act='relu')
+    relu86 = conv(relu85, filter_size=[3, 1], num_filters=16, padding=[1, 0], name='conv82', act='relu')
+    conv67 = conv(relu68, filter_size=[1, 3], num_filters=16, padding=[0, 1], name='conv67')
+    conv83 = conv(relu86, filter_size=[1, 3], num_filters=16, padding=[0, 1], name='conv83')
+    bn_scale37 = bn(conv67,  name='batch_norm37')
+    bn_scale47 = bn(conv83,  name='batch_norm47')
+    add16 = fluid.layers.elementwise_add(x=bn_scale37, y=relu65, name='add16')
+    add20 = fluid.layers.elementwise_add(x=bn_scale47, y=relu83, name='add20')
+    relu69 = fluid.layers.relu(add16)
+    relu87 = fluid.layers.relu(add20)
+    relu70 = conv(relu69, filter_size=[3, 1], num_filters=16, padding=[1, 0], name='conv68', act='relu')
+    relu88 = conv(relu87, filter_size=[3, 1], num_filters=16, padding=[1, 0], name='conv84', act='relu')
+    conv69 = conv(relu70, filter_size=[1, 3], num_filters=16, padding=[0, 1], name='conv69')
+    conv85 = conv(relu88, filter_size=[1, 3], num_filters=16, padding=[0, 1], name='conv85')
+    bn_scale38 = bn(conv69,  name='batch_norm38')
+    bn_scale48 = bn(conv85,  name='batch_norm48')
+    relu71 = fluid.layers.relu(bn_scale38)
+    relu89 = fluid.layers.relu(bn_scale48)
+    relu72 = conv(relu71, filter_size=[3, 1], num_filters=16, padding=[1, 0], name='conv70', act='relu')
+    relu90 = conv(relu89, filter_size=[3, 1], num_filters=16, padding=[1, 0], name='conv86', act='relu')
+    conv71 = conv(relu72, filter_size=[1, 3], num_filters=16, padding=[0, 1], name='conv71')
+    conv87 = conv(relu90, filter_size=[1, 3], num_filters=16, padding=[0, 1], name='conv87')
+    bn_scale39 = bn(conv71,  name='batch_norm39')
+    bn_scale49 = bn(conv87,  name='batch_norm49')
+    add17 = fluid.layers.elementwise_add(x=bn_scale39, y=relu69, name='add17')
+    add21 = fluid.layers.elementwise_add(x=bn_scale49, y=relu87, name='add21')
+    relu73 = fluid.layers.relu(add17)
+    relu91 = fluid.layers.relu(add21)
+    conv_transpose3 = conv_transpose(relu73, filter_size=2, num_filters=4, stride=2, name='conv_transpose3')
+    conv_transpose6 = conv_transpose(relu91, filter_size=2, num_filters=1, stride=2, name='conv_transpose6')
+    cat4 = fluid.layers.concat([conv_transpose3, conv_transpose6], axis=1, name='cat4')
+    return cat4
--- a/contrib/SpatialEmbeddings/utils/__init__.py
+++ b/contrib/SpatialEmbeddings/utils/__init__.py
--- a/contrib/SpatialEmbeddings/utils/data_util.py
+++ b/contrib/SpatialEmbeddings/utils/data_util.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import numpy as np
+from PIL import Image as PILImage
+def sigmoid_np(x):
+    return 1/(1+np.exp(-x))
+class Cluster:
+    def __init__(self, ):
+        xm = np.repeat(np.linspace(0, 2, 2048)[np.newaxis, np.newaxis,:], 1024, axis=1)
+        ym = np.repeat(np.linspace(0, 1, 1024)[np.newaxis, :, np.newaxis], 2048, axis=2)
+        self.xym = np.vstack((xm, ym))
+    def cluster(self, prediction, n_sigma=1, min_pixel=160, threshold=0.5):
+        height, width = prediction.shape[1:3]
+        xym_s = self.xym[:, 0:height, 0:width]
+        spatial_emb = np.tanh(prediction[0:2]) + xym_s
+        sigma = prediction[2:2+n_sigma]
+        seed_map = sigmoid_np(prediction[2+n_sigma:2+n_sigma + 1])
+        instance_map = np.zeros((height, width), np.float32)
+        instances = []
+        count = 1
+        mask = seed_map > 0.5
+        if mask.sum() > min_pixel:
+            spatial_emb_masked = spatial_emb[np.repeat(mask, \
+                                spatial_emb.shape[0], 0)].reshape(2, -1)
+            sigma_masked = sigma[np.repeat(mask, n_sigma, 0)].reshape(n_sigma, -1)
+            seed_map_masked = seed_map[mask].reshape(1, -1)
+            unclustered = np.ones(mask.sum(), np.float32)
+            instance_map_masked = np.zeros(mask.sum(), np.float32)
+            while(unclustered.sum() > min_pixel):
+                seed = (seed_map_masked * unclustered).argmax().item()
+                seed_score = (seed_map_masked * unclustered).max().item()
+                if seed_score < threshold:
+                    break
+                center = spatial_emb_masked[:, seed:seed+1]
+                unclustered[seed] = 0
+                s = np.exp(sigma_masked[:, seed:seed+1]*10)
+                dist = np.exp(-1*np.sum((spatial_emb_masked-center)**2 *s, 0))
+                proposal = (dist > 0.5).squeeze()
+                if proposal.sum() > min_pixel:
+                    if unclustered[proposal].sum()/proposal.sum()> 0.5:
+                        instance_map_masked[proposal.squeeze()] = count
+                        instance_mask = np.zeros((height, width), np.float32)
+                        instance_mask[mask.squeeze()] = proposal 
+                        instances.append(
+                            {'mask': (instance_mask.squeeze()*255).astype(np.uint8), \
+                            'score': seed_score})
+                        count += 1
+                unclustered[proposal] = 0
+            instance_map[mask.squeeze()] = instance_map_masked
+        return instance_map, instances
+def pad_img(img, dst_shape, mode='constant'):
+    img_h, img_w = img.shape[:2]
+    dst_h, dst_w = dst_shape
+    pad_shape = ((0, max(0, dst_h - img_h)), (0, max(0, dst_w - img_w)))
+    return np.pad(img, pad_shape, mode)
+def save_for_eval(predictions, infer_shape, im_shape, vis_dir, im_name):
+    txt_file = os.path.join(vis_dir, im_name + '.txt')
+    with open(txt_file, 'w') as f:
+        for id, pred in enumerate(predictions):
+            save_name = im_name + '_{:02d}.png'.format(id)
+            pred_mask = pad_img(pred['mask'], infer_shape)
+            pred_mask = pred_mask[:im_shape[0], :im_shape[1]]
+            im = PILImage.fromarray(pred_mask)
+            im.save(os.path.join(vis_dir, save_name))
+            cl = 26
+            score = pred['score']
+            f.writelines("{} {} {:.02f}\n".format(save_name, cl, score))
--- a/contrib/SpatialEmbeddings/utils/palette.py
+++ b/contrib/SpatialEmbeddings/utils/palette.py
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: RainbowSecret
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2018
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import cv2
+def get_palette(num_cls):
+    """ Returns the color map for visualizing the segmentation mask.
+    Args:
+        num_cls: Number of classes
+    Returns:
+        The color map
+    """
+    n = num_cls
+    palette = [0] * (n * 3)
+    for j in range(0, n):
+        lab = j
+        palette[j * 3 + 0] = 0
+        palette[j * 3 + 1] = 0
+        palette[j * 3 + 2] = 0
+        i = 0
+        while lab:
+            palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
+            palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
+            palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
+            i += 1
+            lab >>= 3
+    return palette
--- a/contrib/SpatialEmbeddings/utils/util.py
+++ b/contrib/SpatialEmbeddings/utils/util.py
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import argparse
+import os
+def get_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--use_gpu",
+                        action="store_true",
+                        help="Use gpu or cpu to test.")
+    parser.add_argument('--example',
+                        type=str,
+                        help='RoadLine, HumanSeg or ACE2P')
+    return parser.parse_args()
+class AttrDict(dict):
+    def __init__(self, *args, **kwargs):
+        super(AttrDict, self).__init__(*args, **kwargs)
+    def __getattr__(self, name):
+        if name in self.__dict__:
+            return self.__dict__[name]
+        elif name in self:
+            return self[name]
+        else:
+            raise AttributeError(name)
+    def __setattr__(self, name, value):
+        if name in self.__dict__:
+            self.__dict__[name] = value
+        else:
+            self[name] = value
+def merge_cfg_from_args(args, cfg):
+    """Merge config keys, values in args into the global config."""
+    for k, v in vars(args).items():
+        d = cfg
+        try:
+            value = eval(v)
+        except:
+            value = v
+        if value is not None:
+            cfg[k] = value
--- a/deploy/cpp/README.md
+++ b/deploy/cpp/README.md
@@ -82,7 +82,7 @@ deeplabv3p_xception65_humanseg
 ### 4.2. 修改配置
-基于`PaddleSeg`训练的模型导出时，会自动生成对应的预测模型配置文件，请参考文档：[模型导出](../docs/model_export.md)。
+基于`PaddleSeg`训练的模型导出时，会自动生成对应的预测模型配置文件，请参考文档：[模型导出](../../docs/model_export.md)。
 `inference`源代码(即本目录)的`conf`目录下提供了示例人像分割模型的配置文件`humanseg.yaml`, 相关的字段含义和说明如下：

--- a/deploy/lite/README.md
+++ b/deploy/lite/README.md
@@ -24,7 +24,7 @@
 ### 2.4 效果展示
 <img src="example/human_1.png"  width="20%" ><img src="example/human_2.png"  width="20%" ><img src="example/human_3.png"  width="20%" >
 ## 3.模型导出
-此demo的人像分割模型为[下载链接](https://paddleseg.bj.bcebos.com/models/humanseg_mobilenetv2_1_0_bn_freeze_model_pr_po.zip)，是基于Deeplab_v3+mobileNet_v2的humanseg模型，关于humanseg的介绍移步[特色垂类分割模型](./contrib)，更多的分割模型导出可参考：[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v0.2.0/docs/model_export.md)
+此demo的人像分割模型为[下载链接](https://paddleseg.bj.bcebos.com/models/humanseg_mobilenetv2_1_0_bn_freeze_model_pr_po.zip)，是基于Deeplab_v3+mobileNet_v2的humanseg模型，关于humanseg的介绍移步[特色垂类分割模型](../../contrib)，更多的分割模型导出可参考：[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v0.2.0/docs/model_export.md)
 ## 4.模型转换

--- a/docs/configs/train_group.md
+++ b/docs/configs/train_group.md
@@ -45,7 +45,7 @@ TRAIN Group存放所有和训练相关的配置
 是否在多卡间同步BN的均值和方差。
 Synchronized Batch Norm跨GPU批归一化策略最早在[MegDet: A Large Mini-Batch Object Detector](https://arxiv.org/abs/1711.07240)
-论文中提出，在[Bag of Freebies for Training Object Detection Neural Networks](https://arxiv.org/pdf/1902.04103.pdf)论文中以Yolov3验证了这一策略的有效性，[PaddleCV/yolov3](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/yolov3)实现了这一系列策略并比Darknet框架版本在COCO17数据上mAP高5.9.
+论文中提出，在[Bag of Freebies for Training Object Detection Neural Networks](https://arxiv.org/pdf/1902.04103.pdf)论文中以Yolov3验证了这一策略的有效性。
 PaddleSeg基于PaddlePaddle框架的sync_batch_norm策略，可以支持通过多卡实现大batch size的分割模型训练，可以得到更高的mIoU精度。

--- a/docs/model_export.md
+++ b/docs/model_export.md
 # 模型导出
-通过训练得到一个满足要求的模型后，如果想要将该模型接入到C++预测库或者Serving服务，我们需要通过`pdseg/export_model.py`来导出该模型。
+通过训练得到一个满足要求的模型后，如果想要将该模型接入到C++预测库或者Serving服务，我们需要通过[`pdseg/export_model.py`](../../pdseg/export_model.py)来导出该模型。
 该脚本的使用方法和`train.py/eval.py/vis.py`完全一样。

--- a/docs/model_zoo.md
+++ b/docs/model_zoo.md
@@ -14,6 +14,7 @@ PaddleSeg对所有内置的分割模型都提供了公开数据集下的预训
 | MobileNetV2_0.5x  | ImageNet | 0.5x | [MobileNetV2_0.5x](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_5_pretrained.tar) | 65.03%/85.72% |
 | MobileNetV2_1.5x  | ImageNet | 1.5x | [MobileNetV2_1.5x](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x1_5_pretrained.tar) | 74.12%/91.67% |
 | MobileNetV2_2.0x  | ImageNet | 2.0x | [MobileNetV2_2.0x](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x2_0_pretrained.tar) | 75.23%/92.58% |
+| MobileNetV3_Large_ssld_1.0x  | ImageNet | 1.0x | [MobileNetV3_Large_ssld_1.0x](https://paddleseg.bj.bcebos.com/models/MobileNetV3_large_x1_0_ssld_pretrained.tar) | 79.00%/94.50% |
 用户可以结合实际场景的精度和预测性能要求，选取不同`Depth multiplier`参数的MobileNet模型。
@@ -37,6 +38,7 @@ PaddleSeg对所有内置的分割模型都提供了公开数据集下的预训
 |---|---|---|---|
 | ResNet50（适配PSPNet） | ImageNet | [resnet50_v2_pspnet](https://paddleseg.bj.bcebos.com/resnet50_v2_pspnet.tgz)| -- |
 | ResNet101（适配PSPNet） | ImageNet | [resnet101_v2_pspnet](https://paddleseg.bj.bcebos.com/resnet101_v2_pspnet.tgz)| -- |
+| ResNet50_vd | ImageNet | [ResNet50_vd_ssld_pretrained.tgz](https://paddleseg.bj.bcebos.com/models/ResNet50_vd_ssld_pretrained.tgz) | 83.0%/96.4% |
 ## COCO预训练模型
@@ -57,12 +59,15 @@ train数据集合为Cityscapes训练集合，测试为Cityscapes的验证集合
 | 模型 | 数据集合 | 下载地址 |Output Stride| mutli-scale test| mIoU on val|
 |---|---|---|---|---|---|
 | DeepLabv3+/MobileNetv2/bn | Cityscapes |[mobilenet_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/mobilenet_cityscapes.tgz) |16|false| 0.698|
+| DeepLabv3+/MobileNetv3_Large/bn | Cityscapes |[deeplabv3p_mobilenetv3_large_cityscapes.tar.gz](https://paddleseg.bj.bcebos.com/models/deeplabv3p_mobilenetv3_large_cityscapes.tar.gz) |32|false| 0.7328|
 | DeepLabv3+/Xception65/gn  | Cityscapes |[deeplabv3p_xception65_gn_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/deeplabv3p_xception65_cityscapes.tgz) |16|false| 0.7824 |
-| DeepLabv3+/Xception65/bn | Cityscapes |[deeplabv3p_xception65_bn_cityscapes_.tgz](https://paddleseg.bj.bcebos.com/models/xception65_bn_cityscapes.tgz) | 16 | false | 0.7930 |
+| DeepLabv3+/Xception65/bn | Cityscapes |[deeplabv3p_xception65_bn_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/xception65_bn_cityscapes.tgz) | 16 | false | 0.7930 |
+| DeepLabv3+/ResNet50_vd/bn | Cityscapes |[deeplabv3p_resnet50_vd_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/deeplabv3p_resnet50_vd_cityscapes.tgz) | 16 | false | 0.8006 |
 | ICNet/bn | Cityscapes |[icnet_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/icnet_cityscapes.tar.gz) |16|false| 0.6831 |
 | PSPNet/bn | Cityscapes |[pspnet50_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/pspnet50_cityscapes.tgz) |16|false| 0.7013 |
 | PSPNet/bn | Cityscapes |[pspnet101_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/pspnet101_cityscapes.tgz) |16|false| 0.7734 |
 | HRNet_W18/bn | Cityscapes |[hrnet_w18_bn_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/hrnet_w18_bn_cityscapes.tgz) | 4 | false | 0.7936 |
 | Fast-SCNN/bn | Cityscapes |[fast_scnn_cityscapes.tar](https://paddleseg.bj.bcebos.com/models/fast_scnn_cityscape.tar) | 32 | false | 0.6964 |
+| OCRNet/bn | Cityscapes |[ocrnet_w18_bn_cityscapes.tar.gz](https://paddleseg.bj.bcebos.com/models/ocrnet_w18_bn_cityscapes.tar.gz) | 4 | false | 0.8023 |
 测试环境为python 3.7.3，v100，cudnn 7.6.2。
--- a/docs/models.md
+++ b/docs/models.md
 # PaddleSeg 分割模型介绍
- [U-Net](#U-Net)	
+- [U-Net](#U-Net)
- [DeepLabv3+](#DeepLabv3)		
+- [DeepLabv3+](#DeepLabv3)
 - [PSPNet](#PSPNet)
 - [ICNet](#ICNet)
 - [HRNet](#HRNet)
@@ -75,12 +75,10 @@ Fast-SCNN [7] 是一个面向实时的语义分割网络。在双分支的结构
 [3] [Pyramid Scene Parsing Network](https://arxiv.org/abs/1612.01105)
-[4] [Fully Convolutional Networks for Semantic Segmentation](https://people.eecs.berkeley.edu/~jonlong/long_shelhamer_fcn.pdf)
+[4] [Fully Convolutional Networks for Semantic Segmentation](https://arxiv.org/abs/1411.4038)
 [5] [ICNet for Real-Time Semantic Segmentation on High-Resolution Images](https://arxiv.org/abs/1704.08545)
 [6] [Deep High-Resolution Representation Learning for Visual Recognition](https://arxiv.org/abs/1908.07919)
 [7] [Fast-SCNN: Fast Semantic Segmentation Network](https://arxiv.org/abs/1902.04502)
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -21,7 +21,7 @@
 ## 2.下载待训练数据
-![](../turtorial/imgs/optic.png)
+![](../tutorial/imgs/optic.png)
 我们提前准备好了一份眼底医疗分割数据集--视盘分割（optic disc segmentation），包含267张训练图片、76张验证图片、38张测试图片。通过以下命令进行下载：

--- a/dygraph/README.md
+++ b/dygraph/README.md
 # 动态图执行
-## 数据集设置
+## 下载及添加路径
 ```
-data_dir='data/path'
+git clone https://github.com/PaddlePaddle/PaddleSeg
-train_list='train/list/path'
+cd PaddleSeg
-val_list='val/list/path'
+export PYTHONPATH=$PYTHONPATH:`pwd`
-test_list='test/list/path'
+cd dygraph
-num_classes=number/of/dataset/classes
 ```
 ## 训练
 ```
-python3 train.py --model_name UNet \
+python3 train.py --model_name unet \
--data_dir $data_dir \
+--dataset OpticDiscSeg \
--train_list $train_list \
--val_list $val_list \
--num_classes $num_classes \
 --input_size 192 192 \
--num_epochs 4 \
+--iters 10 \
--save_interval_epochs 1 \
+--save_interval_iters 1 \
+--do_eval \
 --save_dir output
 ```
 ## 评估
 ```
-python3 val.py --model_name UNet \
+python3 val.py --model_name unet \
--data_dir $data_dir \
+--dataset OpticDiscSeg \
--val_list $val_list \
--num_classes $num_classes \
 --input_size 192 192 \
--model_dir output/epoch_1
+--model_dir output/best_model
 ```
 ## 预测
 ```
-python3 infer.py --model_name UNet \
+python3 infer.py --model_name unet \
--data_dir $data_dir \
+--dataset OpticDiscSeg \
--test_list $test_list \
+--model_dir output/best_model \
--num_classes $num_classes \
+--input_size 192 192
--input_size 192 192 \
--model_dir output/epoch_1
 ```
--- a/dygraph/__init__.py
+++ b/dygraph/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import dygraph.models
\ No newline at end of file
--- a/dygraph/benchmark/deeplabv3p.py
+++ b/dygraph/benchmark/deeplabv3p.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import paddle.fluid as fluid
+from paddle.fluid.dygraph.parallel import ParallelEnv
+from dygraph.datasets import DATASETS
+import dygraph.transforms as T
+#from dygraph.models import MODELS
+from dygraph.cvlibs import manager
+from dygraph.utils import get_environ_info
+from dygraph.utils import logger
+from dygraph.core import train
+def parse_args():
+    parser = argparse.ArgumentParser(description='Model training')
+    # params of model
+    parser.add_argument(
+        '--model_name',
+        dest='model_name',
+        help='Model type for training, which is one of {}'.format(
+            str(list(manager.MODELS.components_dict.keys()))),
+        type=str,
+        default='UNet')
+    # params of dataset
+    parser.add_argument(
+        '--dataset',
+        dest='dataset',
+        help="The dataset you want to train, which is one of {}".format(
+            str(list(DATASETS.keys()))),
+        type=str,
+        default='OpticDiscSeg')
+    parser.add_argument(
+        '--dataset_root',
+        dest='dataset_root',
+        help="dataset root directory",
+        type=str,
+        default=None)
+    # params of training
+    parser.add_argument(
+        "--input_size",
+        dest="input_size",
+        help="The image size for net inputs.",
+        nargs=2,
+        default=[512, 512],
+        type=int)
+    parser.add_argument(
+        '--iters',
+        dest='iters',
+        help='iters for training',
+        type=int,
+        default=10000)
+    parser.add_argument(
+        '--batch_size',
+        dest='batch_size',
+        help='Mini batch size of one gpu or cpu',
+        type=int,
+        default=2)
+    parser.add_argument(
+        '--learning_rate',
+        dest='learning_rate',
+        help='Learning rate',
+        type=float,
+        default=0.01)
+    parser.add_argument(
+        '--pretrained_model',
+        dest='pretrained_model',
+        help='The path of pretrained model',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--resume_model',
+        dest='resume_model',
+        help='The path of resume model',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--save_interval_iters',
+        dest='save_interval_iters',
+        help='The interval iters for save a model snapshot',
+        type=int,
+        default=5)
+    parser.add_argument(
+        '--save_dir',
+        dest='save_dir',
+        help='The directory for saving the model snapshot',
+        type=str,
+        default='./output')
+    parser.add_argument(
+        '--num_workers',
+        dest='num_workers',
+        help='Num workers for data loader',
+        type=int,
+        default=0)
+    parser.add_argument(
+        '--do_eval',
+        dest='do_eval',
+        help='Eval while training',
+        action='store_true')
+    parser.add_argument(
+        '--log_iters',
+        dest='log_iters',
+        help='Display logging information at every log_iters',
+        default=10,
+        type=int)
+    parser.add_argument(
+        '--use_vdl',
+        dest='use_vdl',
+        help='Whether to record the data to VisualDL during training',
+        action='store_true')
+    return parser.parse_args()
+def main(args):
+    env_info = get_environ_info()
+    info = ['{}: {}'.format(k, v) for k, v in env_info.items()]
+    info = '\n'.join(['\n', format('Environment Information', '-^48s')] + info +
+                     ['-' * 48])
+    logger.info(info)
+    places = fluid.CUDAPlace(ParallelEnv().dev_id) \
+        if env_info['Paddle compiled with cuda'] and env_info['GPUs used'] \
+        else fluid.CPUPlace()
+    if args.dataset not in DATASETS:
+        raise Exception('`--dataset` is invalid. it should be one of {}'.format(
+            str(list(DATASETS.keys()))))
+    dataset = DATASETS[args.dataset]
+    with fluid.dygraph.guard(places):
+        # Creat dataset reader
+        train_transforms = T.Compose([
+            T.RandomHorizontalFlip(0.5),
+            T.ResizeStepScaling(0.5, 2.0, 0.25),
+            T.RandomPaddingCrop(args.input_size),
+            T.RandomDistort(),
+            T.Normalize(),
+        ])
+        train_dataset = dataset(
+            dataset_root=args.dataset_root,
+            transforms=train_transforms,
+            mode='train')
+        eval_dataset = None
+        if args.do_eval:
+            eval_transforms = T.Compose(
+                [T.Padding((2049, 1025)),
+                 T.Normalize()])
+            eval_dataset = dataset(
+                dataset_root=args.dataset_root,
+                transforms=eval_transforms,
+                mode='val')
+        model = manager.MODELS[args.model_name](
+            num_classes=train_dataset.num_classes)
+        # Creat optimizer
+        # todo, may less one than len(loader)
+        num_iters_each_epoch = len(train_dataset) // (
+            args.batch_size * ParallelEnv().nranks)
+        lr_decay = fluid.layers.polynomial_decay(
+            args.learning_rate, args.iters, end_learning_rate=0, power=0.9)
+        optimizer = fluid.optimizer.Momentum(
+            lr_decay,
+            momentum=0.9,
+            parameter_list=model.parameters(),
+            regularization=fluid.regularizer.L2Decay(regularization_coeff=4e-5))
+        train(
+            model,
+            train_dataset,
+            places=places,
+            eval_dataset=eval_dataset,
+            optimizer=optimizer,
+            save_dir=args.save_dir,
+            iters=args.iters,
+            batch_size=args.batch_size,
+            resume_model=args.resume_model,
+            save_interval_iters=args.save_interval_iters,
+            log_iters=args.log_iters,
+            num_classes=train_dataset.num_classes,
+            num_workers=args.num_workers,
+            use_vdl=args.use_vdl)
+if __name__ == '__main__':
+    args = parse_args()
+    main(args)
--- a/dygraph/benchmark/hrnet.py
+++ b/dygraph/benchmark/hrnet.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import paddle.fluid as fluid
+from paddle.fluid.dygraph.parallel import ParallelEnv
+from dygraph.datasets import DATASETS
+import dygraph.transforms as T
+#from dygraph.models import MODELS
+from dygraph.cvlibs import manager
+from dygraph.utils import get_environ_info
+from dygraph.utils import logger
+from dygraph.core import train
+def parse_args():
+    parser = argparse.ArgumentParser(description='Model training')
+    # params of model
+    parser.add_argument(
+        '--model_name',
+        dest='model_name',
+        help='Model type for training, which is one of {}'.format(
+            str(list(manager.MODELS.components_dict.keys()))),
+        type=str,
+        default='UNet')
+    # params of dataset
+    parser.add_argument(
+        '--dataset',
+        dest='dataset',
+        help="The dataset you want to train, which is one of {}".format(
+            str(list(DATASETS.keys()))),
+        type=str,
+        default='OpticDiscSeg')
+    parser.add_argument(
+        '--dataset_root',
+        dest='dataset_root',
+        help="dataset root directory",
+        type=str,
+        default=None)
+    # params of training
+    parser.add_argument(
+        "--input_size",
+        dest="input_size",
+        help="The image size for net inputs.",
+        nargs=2,
+        default=[512, 512],
+        type=int)
+    parser.add_argument(
+        '--iters',
+        dest='iters',
+        help='iters for training',
+        type=int,
+        default=10000)
+    parser.add_argument(
+        '--batch_size',
+        dest='batch_size',
+        help='Mini batch size of one gpu or cpu',
+        type=int,
+        default=2)
+    parser.add_argument(
+        '--learning_rate',
+        dest='learning_rate',
+        help='Learning rate',
+        type=float,
+        default=0.01)
+    parser.add_argument(
+        '--pretrained_model',
+        dest='pretrained_model',
+        help='The path of pretrained model',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--resume_model',
+        dest='resume_model',
+        help='The path of resume model',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--save_interval_iters',
+        dest='save_interval_iters',
+        help='The interval iters for save a model snapshot',
+        type=int,
+        default=5)
+    parser.add_argument(
+        '--save_dir',
+        dest='save_dir',
+        help='The directory for saving the model snapshot',
+        type=str,
+        default='./output')
+    parser.add_argument(
+        '--num_workers',
+        dest='num_workers',
+        help='Num workers for data loader',
+        type=int,
+        default=0)
+    parser.add_argument(
+        '--do_eval',
+        dest='do_eval',
+        help='Eval while training',
+        action='store_true')
+    parser.add_argument(
+        '--log_iters',
+        dest='log_iters',
+        help='Display logging information at every log_iters',
+        default=10,
+        type=int)
+    parser.add_argument(
+        '--use_vdl',
+        dest='use_vdl',
+        help='Whether to record the data to VisualDL during training',
+        action='store_true')
+    return parser.parse_args()
+def main(args):
+    env_info = get_environ_info()
+    info = ['{}: {}'.format(k, v) for k, v in env_info.items()]
+    info = '\n'.join(['\n', format('Environment Information', '-^48s')] + info +
+                     ['-' * 48])
+    logger.info(info)
+    places = fluid.CUDAPlace(ParallelEnv().dev_id) \
+        if env_info['Paddle compiled with cuda'] and env_info['GPUs used'] \
+        else fluid.CPUPlace()
+    if args.dataset not in DATASETS:
+        raise Exception('`--dataset` is invalid. it should be one of {}'.format(
+            str(list(DATASETS.keys()))))
+    dataset = DATASETS[args.dataset]
+    with fluid.dygraph.guard(places):
+        # Creat dataset reader
+        train_transforms = T.Compose([
+            T.RandomHorizontalFlip(0.5),
+            T.ResizeStepScaling(0.5, 2.0, 0.25),
+            T.RandomPaddingCrop(args.input_size),
+            T.RandomDistort(),
+            T.Normalize(),
+        ])
+        train_dataset = dataset(
+            dataset_root=args.dataset_root,
+            transforms=train_transforms,
+            mode='train')
+        eval_dataset = None
+        if args.do_eval:
+            eval_transforms = T.Compose([T.Normalize()])
+            eval_dataset = dataset(
+                dataset_root=args.dataset_root,
+                transforms=eval_transforms,
+                mode='val')
+        model = manager.MODELS[args.model_name](
+            num_classes=train_dataset.num_classes,
+            pretrained_model=args.pretrained_model)
+        # Creat optimizer
+        # todo, may less one than len(loader)
+        num_iters_each_epoch = len(train_dataset) // (
+            args.batch_size * ParallelEnv().nranks)
+        lr_decay = fluid.layers.polynomial_decay(
+            args.learning_rate, args.iters, end_learning_rate=0, power=0.9)
+        optimizer = fluid.optimizer.Momentum(
+            lr_decay,
+            momentum=0.9,
+            parameter_list=model.parameters(),
+            regularization=fluid.regularizer.L2Decay(regularization_coeff=4e-5))
+        train(
+            model,
+            train_dataset,
+            places=places,
+            eval_dataset=eval_dataset,
+            optimizer=optimizer,
+            save_dir=args.save_dir,
+            iters=args.iters,
+            batch_size=args.batch_size,
+            resume_model=args.resume_model,
+            save_interval_iters=args.save_interval_iters,
+            log_iters=args.log_iters,
+            num_classes=train_dataset.num_classes,
+            num_workers=args.num_workers,
+            use_vdl=args.use_vdl)
+if __name__ == '__main__':
+    args = parse_args()
+    main(args)
--- a/dygraph/core/__init__.py
+++ b/dygraph/core/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .train import train
+from .val import evaluate
+from .infer import infer
+__all__ = ['train', 'evaluate', 'infer']
--- a/dygraph/core/infer.py
+++ b/dygraph/core/infer.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from paddle.fluid.dygraph.base import to_variable
+import numpy as np
+import paddle.fluid as fluid
+import cv2
+import tqdm
+from dygraph import utils
+import dygraph.utils.logger as logger
+def mkdir(path):
+    sub_dir = os.path.dirname(path)
+    if not os.path.exists(sub_dir):
+        os.makedirs(sub_dir)
+def infer(model, test_dataset=None, model_dir=None, save_dir='output'):
+    ckpt_path = os.path.join(model_dir, 'model')
+    para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path)
+    model.set_dict(para_state_dict)
+    model.eval()
+    added_saved_dir = os.path.join(save_dir, 'added')
+    pred_saved_dir = os.path.join(save_dir, 'prediction')
+    logger.info("Start to predict...")
+    for im, im_info, im_path in tqdm.tqdm(test_dataset):
+        im = to_variable(im)
+        pred, _ = model(im)
+        pred = pred.numpy()
+        pred = np.squeeze(pred).astype('uint8')
+        for info in im_info[::-1]:
+            if info[0] == 'resize':
+                h, w = info[1][0], info[1][1]
+                pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST)
+            elif info[0] == 'padding':
+                h, w = info[1][0], info[1][1]
+                pred = pred[0:h, 0:w]
+            else:
+                raise Exception("Unexpected info '{}' in im_info".format(
+                    info[0]))
+        im_file = im_path.replace(test_dataset.dataset_root, '')
+        if im_file[0] == '/':
+            im_file = im_file[1:]
+        # save added image
+        added_image = utils.visualize(im_path, pred, weight=0.6)
+        added_image_path = os.path.join(added_saved_dir, im_file)
+        mkdir(added_image_path)
+        cv2.imwrite(added_image_path, added_image)
+        # save prediction
+        pred_im = utils.visualize(im_path, pred, weight=0.0)
+        pred_saved_path = os.path.join(pred_saved_dir, im_file)
+        mkdir(pred_saved_path)
+        cv2.imwrite(pred_saved_path, pred_im)
--- a/dygraph/core/train.py
+++ b/dygraph/core/train.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import paddle.fluid as fluid
+from paddle.fluid.dygraph.parallel import ParallelEnv
+from paddle.fluid.io import DataLoader
+from paddle.incubate.hapi.distributed import DistributedBatchSampler
+import dygraph.utils.logger as logger
+from dygraph.utils import load_pretrained_model
+from dygraph.utils import resume
+from dygraph.utils import Timer, calculate_eta
+from .val import evaluate
+def train(model,
+          train_dataset,
+          places=None,
+          eval_dataset=None,
+          optimizer=None,
+          save_dir='output',
+          iters=10000,
+          batch_size=2,
+          resume_model=None,
+          save_interval_iters=1000,
+          log_iters=10,
+          num_classes=None,
+          num_workers=8,
+          use_vdl=False):
+    ignore_index = model.ignore_index
+    nranks = ParallelEnv().nranks
+    start_iter = 0
+    if resume_model is not None:
+        start_iter = resume(model, optimizer, resume_model)
+    if not os.path.isdir(save_dir):
+        if os.path.exists(save_dir):
+            os.remove(save_dir)
+        os.makedirs(save_dir)
+    if nranks > 1:
+        strategy = fluid.dygraph.prepare_context()
+        ddp_model = fluid.dygraph.DataParallel(model, strategy)
+    batch_sampler = DistributedBatchSampler(
+        train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
+    loader = DataLoader(
+        train_dataset,
+        batch_sampler=batch_sampler,
+        places=places,
+        num_workers=num_workers,
+        return_list=True,
+    )
+    if use_vdl:
+        from visualdl import LogWriter
+        log_writer = LogWriter(save_dir)
+    timer = Timer()
+    avg_loss = 0.0
+    iters_per_epoch = len(batch_sampler)
+    best_mean_iou = -1.0
+    best_model_iter = -1
+    train_reader_cost = 0.0
+    train_batch_cost = 0.0
+    timer.start()
+    iter = 0
+    while iter < iters:
+        for data in loader:
+            iter += 1
+            if iter > iters:
+                break
+            train_reader_cost += timer.elapsed_time()
+            images = data[0]
+            labels = data[1].astype('int64')
+            if nranks > 1:
+                loss = ddp_model(images, labels)
+                # apply_collective_grads sum grads over multiple gpus.
+                loss = ddp_model.scale_loss(loss)
+                loss.backward()
+                ddp_model.apply_collective_grads()
+            else:
+                loss = model(images, labels)
+                loss.backward()
+            optimizer.minimize(loss)
+            model.clear_gradients()
+            avg_loss += loss.numpy()[0]
+            lr = optimizer.current_step_lr()
+            train_batch_cost += timer.elapsed_time()
+            if (iter) % log_iters == 0 and ParallelEnv().local_rank == 0:
+                avg_loss /= log_iters
+                avg_train_reader_cost = train_reader_cost / log_iters
+                avg_train_batch_cost = train_batch_cost / log_iters
+                train_reader_cost = 0.0
+                train_batch_cost = 0.0
+                remain_iters = iters - iter
+                eta = calculate_eta(remain_iters, avg_train_batch_cost)
+                logger.info(
+                    "[TRAIN] epoch={}, iter={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.4f} | ETA {}"
+                    .format((iter - 1) // iters_per_epoch + 1, iter, iters,
+                            avg_loss * nranks, lr, avg_train_batch_cost,
+                            avg_train_reader_cost, eta))
+                if use_vdl:
+                    log_writer.add_scalar('Train/loss', avg_loss * nranks, iter)
+                    log_writer.add_scalar('Train/lr', lr, iter)
+                    log_writer.add_scalar('Train/batch_cost',
+                                          avg_train_batch_cost, iter)
+                    log_writer.add_scalar('Train/reader_cost',
+                                          avg_train_reader_cost, iter)
+                avg_loss = 0.0
+            if (iter % save_interval_iters == 0
+                    or iter == iters) and ParallelEnv().local_rank == 0:
+                current_save_dir = os.path.join(save_dir,
+                                                "iter_{}".format(iter))
+                if not os.path.isdir(current_save_dir):
+                    os.makedirs(current_save_dir)
+                fluid.save_dygraph(model.state_dict(),
+                                   os.path.join(current_save_dir, 'model'))
+                fluid.save_dygraph(optimizer.state_dict(),
+                                   os.path.join(current_save_dir, 'model'))
+                if eval_dataset is not None:
+                    mean_iou, avg_acc = evaluate(
+                        model,
+                        eval_dataset,
+                        model_dir=current_save_dir,
+                        num_classes=num_classes,
+                        ignore_index=ignore_index,
+                        iter_id=iter)
+                    if mean_iou > best_mean_iou:
+                        best_mean_iou = mean_iou
+                        best_model_iter = iter
+                        best_model_dir = os.path.join(save_dir, "best_model")
+                        fluid.save_dygraph(
+                            model.state_dict(),
+                            os.path.join(best_model_dir, 'model'))
+                    logger.info(
+                        'Current evaluated best model in eval_dataset is iter_{}, miou={:4f}'
+                        .format(best_model_iter, best_mean_iou))
+                    if use_vdl:
+                        log_writer.add_scalar('Evaluate/mIoU', mean_iou, iter)
+                        log_writer.add_scalar('Evaluate/aAcc', avg_acc, iter)
+                    model.train()
+            timer.restart()
+    if use_vdl:
+        log_writer.close()
--- a/dygraph/core/val.py
+++ b/dygraph/core/val.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import numpy as np
+import tqdm
+import cv2
+from paddle.fluid.dygraph.base import to_variable
+import paddle.fluid as fluid
+import dygraph.utils.logger as logger
+from dygraph.utils import ConfusionMatrix
+from dygraph.utils import Timer, calculate_eta
+def evaluate(model,
+             eval_dataset=None,
+             model_dir=None,
+             num_classes=None,
+             ignore_index=255,
+             iter_id=None):
+    ckpt_path = os.path.join(model_dir, 'model')
+    para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path)
+    model.set_dict(para_state_dict)
+    model.eval()
+    total_iters = len(eval_dataset)
+    conf_mat = ConfusionMatrix(num_classes, streaming=True)
+    logger.info(
+        "Start to evaluating(total_samples={}, total_iters={})...".format(
+            len(eval_dataset), total_iters))
+    timer = Timer()
+    timer.start()
+    for iter, (im, im_info, label) in tqdm.tqdm(
+            enumerate(eval_dataset), total=total_iters):
+        im = to_variable(im)
+        pred, _ = model(im)
+        pred = pred.numpy().astype('float32')
+        pred = np.squeeze(pred)
+        for info in im_info[::-1]:
+            if info[0] == 'resize':
+                h, w = info[1][0], info[1][1]
+                pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST)
+            elif info[0] == 'padding':
+                h, w = info[1][0], info[1][1]
+                pred = pred[0:h, 0:w]
+            else:
+                raise Exception("Unexpected info '{}' in im_info".format(
+                    info[0]))
+        pred = pred[np.newaxis, :, :, np.newaxis]
+        pred = pred.astype('int64')
+        mask = label != ignore_index
+        conf_mat.calculate(pred=pred, label=label, ignore=mask)
+        _, iou = conf_mat.mean_iou()
+        time_iter = timer.elapsed_time()
+        remain_iter = total_iters - iter - 1
+        logger.debug(
+            "[EVAL] iter_id={}, iter={}/{}, iou={:4f}, sec/iter={:.4f} | ETA {}"
+            .format(iter_id, iter + 1, total_iters, iou, time_iter,
+                    calculate_eta(remain_iter, time_iter)))
+        timer.restart()
+    category_iou, miou = conf_mat.mean_iou()
+    category_acc, macc = conf_mat.accuracy()
+    logger.info("[EVAL] #Images={} mAcc={:.4f} mIoU={:.4f}".format(
+        len(eval_dataset), macc, miou))
+    logger.info("[EVAL] Category IoU: " + str(category_iou))
+    logger.info("[EVAL] Category Acc: " + str(category_acc))
+    logger.info("[EVAL] Kappa:{:.4f} ".format(conf_mat.kappa()))
+    return miou, macc
--- a/dygraph/cvlibs/__init__.py
+++ b/dygraph/cvlibs/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/dygraph/cvlibs/manager.py
+++ b/dygraph/cvlibs/manager.py
+# -*- encoding: utf-8 -*-
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections.abc import Sequence
+import inspect
+class ComponentManager:
+    """
+    Implement a manager class to add the new component properly.
+    The component can be added as either class or function type.
+    For example:
+        >>> model_manager = ComponentManager()
+        >>> class AlexNet: ...
+        >>> class ResNet: ...
+        >>> model_manager.add_component(AlexNet)
+        >>> model_manager.add_component(ResNet)
+        or pass a sequence alliteratively:
+        >>> model_manager.add_component([AlexNet, ResNet])
+        >>> print(model_manager.components_dict)
+    output: {'AlexNet': <class '__main__.AlexNet'>, 'ResNet': <class '__main__.ResNet'>}
+    Or an easier way, using it as a Python decorator, while just add it above the class declaration.
+        >>> model_manager = ComponentManager()
+        >>> @model_manager.add_component
+        >>> class AlexNet: ...
+        >>> @model_manager.add_component
+        >>> class ResNet: ...
+        >>> print(model_manager.components_dict)
+    output: {'AlexNet': <class '__main__.AlexNet'>, 'ResNet': <class '__main__.ResNet'>}
+    """
+    def __init__(self):
+        self._components_dict = dict()
+    def __len__(self):
+        return len(self._components_dict)
+    def __repr__(self):
+        return "{}:{}".format(self.__class__.__name__, list(self._components_dict.keys()))
+    def __getitem__(self, item):
+        if item not in self._components_dict.keys():
+            raise KeyError("{} does not exist in the current {}".format(item, self))
+        return self._components_dict[item]
+    @property
+    def components_dict(self):
+        return self._components_dict
+    def _add_single_component(self, component):
+        """
+        Add a single component into the corresponding manager
+        Args:
+        component (function | class): a new component
+        Returns:
+        None
+        """
+        # Currently only support class or function type
+        if not (inspect.isclass(component) or inspect.isfunction(component)):
+            raise TypeError("Expect class/function type, but received {}".format(type(component)))
+        # Obtain the internal name of the component
+        component_name = component.__name__
+        # Check whether the component was added already
+        if component_name in self._components_dict.keys():
+            raise KeyError("{} exists already!".format(component_name))
+        else:
+            # Take the internal name of the component as its key
+            self._components_dict[component_name] = component
+    def add_component(self, components):
+        """
+        Add component(s) into the corresponding manager
+        Args:
+        components (function | class | list | tuple): support three types of components
+        Returns:
+        None
+        """
+        # Check whether the type is a sequence
+        if isinstance(components, Sequence):
+            for component in components:
+                self._add_single_component(component)
+        else:
+            component = components
+            self._add_single_component(component)
+        return components
+MODELS = ComponentManager()
+BACKBONES = ComponentManager()
\ No newline at end of file
--- a/dygraph/datasets/__init__.py
+++ b/dygraph/datasets/__init__.py
@@ -12,5 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from .dataset import Dataset
 from .optic_disc_seg import OpticDiscSeg
 from .cityscapes import Cityscapes
+from .voc import PascalVOC
+from .ade import ADE20K
+DATASETS = {
+    "OpticDiscSeg": OpticDiscSeg,
+    "Cityscapes": Cityscapes,
+    "PascalVOC": PascalVOC,
+    "ADE20K": ADE20K
+}
--- a/dygraph/datasets/ade.py
+++ b/dygraph/datasets/ade.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import numpy as np
+from PIL import Image
+from .dataset import Dataset
+from dygraph.utils.download import download_file_and_uncompress
+DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
+URL = "http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip"
+class ADE20K(Dataset):
+    """ADE20K dataset `http://sceneparsing.csail.mit.edu/`.
+    Args:
+        dataset_root: The dataset directory.
+        mode: Which part of dataset to use.. it is one of ('train', 'val'). Default: 'train'.
+        transforms: Transforms for image.
+        download: Whether to download dataset if `dataset_root` is None.
+    """
+    def __init__(self,
+                 dataset_root=None,
+                 mode='train',
+                 transforms=None,
+                 download=True):
+        self.dataset_root = dataset_root
+        self.transforms = transforms
+        self.mode = mode
+        self.file_list = list()
+        self.num_classes = 150
+        if mode.lower() not in ['train', 'val']:
+            raise Exception(
+                "`mode` should be one of ('train', 'val') in ADE20K dataset, but got {}."
+                .format(mode))
+        if self.transforms is None:
+            raise Exception("`transforms` is necessary, but it is None.")
+        if self.dataset_root is None:
+            if not download:
+                raise Exception(
+                    "`dataset_root` not set and auto download disabled.")
+            self.dataset_root = download_file_and_uncompress(
+                url=URL,
+                savepath=DATA_HOME,
+                extrapath=DATA_HOME,
+                extraname='ADEChallengeData2016')
+        elif not os.path.exists(self.dataset_root):
+            raise Exception('there is not `dataset_root`: {}.'.format(
+                self.dataset_root))
+        if mode == 'train':
+            img_dir = os.path.join(self.dataset_root, 'images/training')
+            grt_dir = os.path.join(self.dataset_root, 'annotations/training')
+        elif mode == 'val':
+            img_dir = os.path.join(self.dataset_root, 'images/validation')
+            grt_dir = os.path.join(self.dataset_root, 'annotations/validation')
+        img_files = os.listdir(img_dir)
+        grt_files = [i.replace('.jpg', '.png') for i in img_files]
+        for i in range(len(img_files)):
+            img_path = os.path.join(img_dir, img_files[i])
+            grt_path = os.path.join(grt_dir, grt_files[i])
+            self.file_list.append([img_path, grt_path])
+    def __getitem__(self, idx):
+        image_path, grt_path = self.file_list[idx]
+        if self.mode == 'test':
+            im, im_info, _ = self.transforms(im=image_path)
+            im = im[np.newaxis, ...]
+            return im, im_info, image_path
+        elif self.mode == 'val':
+            im, im_info, _ = self.transforms(im=image_path)
+            im = im[np.newaxis, ...]
+            label = np.asarray(Image.open(grt_path))
+            label = label - 1
+            label = label[np.newaxis, np.newaxis, :, :]
+            return im, im_info, label
+        else:
+            im, im_info, label = self.transforms(im=image_path, label=grt_path)
+            label = label - 1
+            return im, label
--- a/dygraph/datasets/cityscapes.py
+++ b/dygraph/datasets/cityscapes.py
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,73 +13,62 @@
 # limitations under the License.
 import os
+import glob
-from paddle.fluid.io import Dataset
+from .dataset import Dataset
-from utils.download import download_file_and_uncompress
-DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
+class Cityscapes(Dataset):
-URL = "https://paddleseg.bj.bcebos.com/dataset/cityscapes.tar"
+    """Cityscapes dataset `https://www.cityscapes-dataset.com/`.
+    The folder structure is as follow:
+    cityscapes
+    |
+    |--leftImg8bit
+    |  |--train
+    |  |--val
+    |  |--test
+    |
+    |--gtFine
+    |  |--train
+    |  |--val
+    |  |--test
+    Make sure there are **labelTrainIds.png in gtFine directory. If not, please run the conver_cityscapes.py in tools.
+    Args:
+        dataset_root: Cityscapes dataset directory.
+        mode: Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
+        transforms: Transforms for image.
+    """
-class Cityscapes(Dataset):
+    def __init__(self, dataset_root, transforms=None, mode='train'):
-    def __init__(self,
+        self.dataset_root = dataset_root
-                 data_dir=None,
-                 transforms=None,
-                 mode='train',
-                 download=True):
-        self.data_dir = data_dir
        self.transforms = transforms
        self.file_list = list()
        self.mode = mode
        self.num_classes = 19
-        if mode.lower() not in ['train', 'eval', 'test']:
+        if mode.lower() not in ['train', 'val', 'test']:
            raise Exception(
-                "mode should be 'train', 'eval' or 'test', but got {}.".format(
+                "mode should be 'train', 'val' or 'test', but got {}.".format(
                    mode))
        if self.transforms is None:
-            raise Exception("transform is necessary, but it is None.")
+            raise Exception("`transforms` is necessary, but it is None.")
-        self.data_dir = data_dir
-        if self.data_dir is None:
-            if not download:
-                raise Exception("data_file not set and auto download disabled.")
-            self.data_dir = download_file_and_uncompress(
-                url=URL, savepath=DATA_HOME, extrapath=DATA_HOME)
-        if mode == 'train':
+        img_dir = os.path.join(self.dataset_root, 'leftImg8bit')
-            file_list = os.path.join(self.data_dir, 'train.list')
+        grt_dir = os.path.join(self.dataset_root, 'gtFine')
-        elif mode == 'eval':
+        if self.dataset_root is None or not os.path.isdir(
-            file_list = os.path.join(self.data_dir, 'val.list')
+                self.dataset_root) or not os.path.isdir(
-        else:
+                    img_dir) or not os.path.isdir(grt_dir):
-            file_list = os.path.join(self.data_dir, 'test.list')
+            raise Exception(
+                "The dataset is not Found or the folder structure is nonconfoumance."
-        with open(file_list, 'r') as f:
+            )
-            for line in f:
-                items = line.strip().split()
-                if len(items) != 2:
-                    if mode == 'train' or mode == 'eval':
-                        raise Exception(
-                            "File list format incorrect! It should be"
-                            " image_name label_name\\n")
-                    image_path = os.path.join(self.data_dir, items[0])
-                    grt_path = None
-                else:
-                    image_path = os.path.join(self.data_dir, items[0])
-                    grt_path = os.path.join(self.data_dir, items[1])
-                self.file_list.append([image_path, grt_path])
-    def __getitem__(self, idx):
+        grt_files = sorted(
-        image_path, grt_path = self.file_list[idx]
+            glob.glob(
-        im, im_info, label = self.transforms(im=image_path, label=grt_path)
+                os.path.join(grt_dir, mode, '*', '*_gtFine_labelTrainIds.png')))
-        if self.mode == 'train':
+        img_files = sorted(
-            return im, label
+            glob.glob(os.path.join(img_dir, mode, '*', '*_leftImg8bit.png')))
-        elif self.mode == 'eval':
-            return im, label
-        if self.mode == 'test':
-            return im, im_info, image_path
-    def __len__(self):
+        self.file_list = [[img_path, grt_path]
-        return len(self.file_list)
+                          for img_path, grt_path in zip(img_files, grt_files)]
--- a/dygraph/datasets/dataset.py
+++ b/dygraph/datasets/dataset.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import paddle.fluid as fluid
+import numpy as np
+from PIL import Image
+class Dataset(fluid.io.Dataset):
+    """Pass in a custom dataset that conforms to the format.
+    Args:
+        dataset_root: The dataset directory.
+        num_classes: Number of classes.
+        mode: which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
+        train_list: The train dataset file. When image_set is 'train', train_list is necessary.
+            The contents of train_list file are as follow:
+            image1.jpg ground_truth1.png
+            image2.jpg ground_truth2.png
+        val_list: The evaluation dataset file. When image_set is 'val', val_list is necessary.
+            The contents is the same as train_list
+        test_list: The test dataset file. When image_set is 'test', test_list is necessary.
+            The annotation file is not necessary in test_list file.
+        separator: The separator of dataset list. Default: ' '.
+        transforms: Transforms for image.
+        Examples:
+            todo
+    """
+    def __init__(self,
+                 dataset_root,
+                 num_classes,
+                 mode='train',
+                 train_list=None,
+                 val_list=None,
+                 test_list=None,
+                 separator=' ',
+                 transforms=None):
+        self.dataset_root = dataset_root
+        self.transforms = transforms
+        self.file_list = list()
+        self.mode = mode
+        self.num_classes = num_classes
+        if mode.lower() not in ['train', 'val', 'test']:
+            raise Exception(
+                "mode should be 'train', 'val' or 'test', but got {}.".format(
+                    mode))
+        if self.transforms is None:
+            raise Exception("`transforms` is necessary, but it is None.")
+        self.dataset_root = dataset_root
+        if not os.path.exists(self.dataset_root):
+            raise Exception('there is not `dataset_root`: {}.'.format(
+                self.dataset_root))
+        if mode == 'train':
+            if train_list is None:
+                raise Exception(
+                    'When `mode` is "train", `train_list` is necessary, but it is None.'
+                )
+            elif not os.path.exists(train_list):
+                raise Exception(
+                    '`train_list` is not found: {}'.format(train_list))
+            else:
+                file_list = train_list
+        elif mode == 'val':
+            if val_list is None:
+                raise Exception(
+                    'When `mode` is "val", `val_list` is necessary, but it is None.'
+                )
+            elif not os.path.exists(val_list):
+                raise Exception('`val_list` is not found: {}'.format(val_list))
+            else:
+                file_list = val_list
+        else:
+            if test_list is None:
+                raise Exception(
+                    'When `mode` is "test", `test_list` is necessary, but it is None.'
+                )
+            elif not os.path.exists(test_list):
+                raise Exception(
+                    '`test_list` is not found: {}'.format(test_list))
+            else:
+                file_list = test_list
+        with open(file_list, 'r') as f:
+            for line in f:
+                items = line.strip().split(separator)
+                if len(items) != 2:
+                    if mode == 'train' or mode == 'val':
+                        raise Exception(
+                            "File list format incorrect! In training or evaluation task it should be"
+                            " image_name{}label_name\\n".format(separator))
+                    image_path = os.path.join(self.dataset_root, items[0])
+                    grt_path = None
+                else:
+                    image_path = os.path.join(self.dataset_root, items[0])
+                    grt_path = os.path.join(self.dataset_root, items[1])
+                self.file_list.append([image_path, grt_path])
+    def __getitem__(self, idx):
+        image_path, grt_path = self.file_list[idx]
+        if self.mode == 'test':
+            im, im_info, _ = self.transforms(im=image_path)
+            im = im[np.newaxis, ...]
+            return im, im_info, image_path
+        elif self.mode == 'val':
+            im, im_info, _ = self.transforms(im=image_path)
+            im = im[np.newaxis, ...]
+            label = np.asarray(Image.open(grt_path))
+            label = label[np.newaxis, np.newaxis, :, :]
+            return im, im_info, label
+        else:
+            im, im_info, label = self.transforms(im=image_path, label=grt_path)
+            return im, label
+    def __len__(self):
+        return len(self.file_list)
--- a/dygraph/datasets/optic_disc_seg.py
+++ b/dygraph/datasets/optic_disc_seg.py
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,9 +14,8 @@
 import os
-from paddle.fluid.io import Dataset
+from .dataset import Dataset
+from dygraph.utils.download import download_file_and_uncompress
-from utils.download import download_file_and_uncompress
 DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
 URL = "https://paddleseg.bj.bcebos.com/dataset/optic_disc_seg.zip"
@@ -24,62 +23,52 @@ URL = "https://paddleseg.bj.bcebos.com/dataset/optic_disc_seg.zip"
 class OpticDiscSeg(Dataset):
    def __init__(self,
-                 data_dir=None,
+                 dataset_root=None,
                 transforms=None,
                 mode='train',
                 download=True):
-        self.data_dir = data_dir
+        self.dataset_root = dataset_root
        self.transforms = transforms
        self.file_list = list()
        self.mode = mode
        self.num_classes = 2
-        if mode.lower() not in ['train', 'eval', 'test']:
+        if mode.lower() not in ['train', 'val', 'test']:
            raise Exception(
-                "mode should be 'train', 'eval' or 'test', but got {}.".format(
+                "`mode` should be 'train', 'val' or 'test', but got {}.".format(
                    mode))
        if self.transforms is None:
-            raise Exception("transform is necessary, but it is None.")
+            raise Exception("`transforms` is necessary, but it is None.")
-        self.data_dir = data_dir
+        if self.dataset_root is None:
-        if self.data_dir is None:
            if not download:
-                raise Exception("data_file not set and auto download disabled.")
+                raise Exception(
-            self.data_dir = download_file_and_uncompress(
+                    "`data_root` not set and auto download disabled.")
+            self.dataset_root = download_file_and_uncompress(
                url=URL, savepath=DATA_HOME, extrapath=DATA_HOME)
+        elif not os.path.exists(self.dataset_root):
+            raise Exception('there is not `dataset_root`: {}.'.format(
+                self.dataset_root))
        if mode == 'train':
-            file_list = os.path.join(self.data_dir, 'train_list.txt')
+            file_list = os.path.join(self.dataset_root, 'train_list.txt')
-        elif mode == 'eval':
+        elif mode == 'val':
-            file_list = os.path.join(self.data_dir, 'val_list.txt')
+            file_list = os.path.join(self.dataset_root, 'val_list.txt')
        else:
-            file_list = os.path.join(self.data_dir, 'test_list.txt')
+            file_list = os.path.join(self.dataset_root, 'test_list.txt')
        with open(file_list, 'r') as f:
            for line in f:
                items = line.strip().split()
                if len(items) != 2:
-                    if mode == 'train' or mode == 'eval':
+                    if mode == 'train' or mode == 'val':
                        raise Exception(
                            "File list format incorrect! It should be"
                            " image_name label_name\\n")
-                    image_path = os.path.join(self.data_dir, items[0])
+                    image_path = os.path.join(self.dataset_root, items[0])
                    grt_path = None
                else:
-                    image_path = os.path.join(self.data_dir, items[0])
+                    image_path = os.path.join(self.dataset_root, items[0])
-                    grt_path = os.path.join(self.data_dir, items[1])
+                    grt_path = os.path.join(self.dataset_root, items[1])
                self.file_list.append([image_path, grt_path])
-    def __getitem__(self, idx):
-        image_path, grt_path = self.file_list[idx]
-        im, im_info, label = self.transforms(im=image_path, label=grt_path)
-        if self.mode == 'train':
-            return im, label
-        elif self.mode == 'eval':
-            return im, label
-        if self.mode == 'test':
-            return im, im_info, image_path
-    def __len__(self):
-        return len(self.file_list)
--- a/dygraph/datasets/voc.py
+++ b/dygraph/datasets/voc.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from .dataset import Dataset
+from dygraph.utils.download import download_file_and_uncompress
+DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
+URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar"
+class PascalVOC(Dataset):
+    """Pascal VOC dataset `http://host.robots.ox.ac.uk/pascal/VOC/`. If you want to augment the dataset,
+    please run the voc_augment.py in tools.
+    Args:
+        dataset_root: The dataset directory.
+        mode: Which part of dataset to use.. it is one of ('train', 'val', 'test'). Default: 'train'.
+        transforms: Transforms for image.
+        download: Whether to download dataset if dataset_root is None.
+    """
+    def __init__(self,
+                 dataset_root=None,
+                 mode='train',
+                 transforms=None,
+                 download=True):
+        self.dataset_root = dataset_root
+        self.transforms = transforms
+        self.mode = mode
+        self.file_list = list()
+        self.num_classes = 21
+        if mode.lower() not in ['train', 'trainval', 'trainaug', 'val']:
+            raise Exception(
+                "`mode` should be one of ('train', 'trainval', 'trainaug', 'val') in PascalVOC dataset, but got {}."
+                .format(mode))
+        if self.transforms is None:
+            raise Exception("`transforms` is necessary, but it is None.")
+        if self.dataset_root is None:
+            if not download:
+                raise Exception(
+                    "`dataset_root` not set and auto download disabled.")
+            self.dataset_root = download_file_and_uncompress(
+                url=URL,
+                savepath=DATA_HOME,
+                extrapath=DATA_HOME,
+                extraname='VOCdevkit')
+        elif not os.path.exists(self.dataset_root):
+            raise Exception('there is not `dataset_root`: {}.'.format(
+                self.dataset_root))
+        image_set_dir = os.path.join(self.dataset_root, 'VOC2012', 'ImageSets',
+                                     'Segmentation')
+        if mode == 'train':
+            file_list = os.path.join(image_set_dir, 'train.txt')
+        elif mode == 'val':
+            file_list = os.path.join(image_set_dir, 'val.txt')
+        elif mode == 'trainval':
+            file_list = os.path.join(image_set_dir, 'trainval.txt')
+        elif mode == 'trainaug':
+            file_list = os.path.join(image_set_dir, 'train.txt')
+            file_list_aug = os.path.join(image_set_dir, 'aug.txt')
+            if not os.path.exists(file_list_aug):
+                raise Exception(
+                    "When `mode` is 'trainaug', Pascal Voc dataset should be augmented, "
+                    "Please make sure voc_augment.py has been properly run when using this mode."
+                )
+        img_dir = os.path.join(self.dataset_root, 'VOC2012', 'JPEGImages')
+        grt_dir = os.path.join(self.dataset_root, 'VOC2012',
+                               'SegmentationClass')
+        grt_dir_aug = os.path.join(self.dataset_root, 'VOC2012',
+                                   'SegmentationClassAug')
+        with open(file_list, 'r') as f:
+            for line in f:
+                line = line.strip()
+                image_path = os.path.join(img_dir, ''.join([line, '.jpg']))
+                grt_path = os.path.join(grt_dir, ''.join([line, '.png']))
+                self.file_list.append([image_path, grt_path])
+        if mode == 'trainaug':
+            with open(file_list_aug, 'r') as f:
+                for line in f:
+                    line = line.strip()
+                    image_path = os.path.join(img_dir, ''.join([line, '.jpg']))
+                    grt_path = os.path.join(grt_dir_aug, ''.join([line,
+                                                                  '.png']))
+                    self.file_list.append([image_path, grt_path])
--- a/dygraph/infer.py
+++ b/dygraph/infer.py
@@ -13,21 +13,15 @@
 # limitations under the License.
 import argparse
-import os
-from paddle.fluid.dygraph.base import to_variable
-import numpy as np
 import paddle.fluid as fluid
 from paddle.fluid.dygraph.parallel import ParallelEnv
-import cv2
-import tqdm
-from datasets import OpticDiscSeg, Cityscapes
+from dygraph.datasets import DATASETS
-import transforms as T
+import dygraph.transforms as T
-import models
+from dygraph.cvlibs import manager
-import utils
+from dygraph.utils import get_environ_info
-import utils.logging as logging
+from dygraph.core import infer
-from utils import get_environ_info
 def parse_args():
@@ -37,18 +31,25 @@ def parse_args():
    parser.add_argument(
        '--model_name',
        dest='model_name',
-        help="Model type for traing, which is one of ('UNet')",
+        help='Model type for testing, which is one of {}'.format(
+            str(list(manager.MODELS.components_dict.keys()))),
        type=str,
        default='UNet')
-    # params of dataset
+    # params of infer
    parser.add_argument(
        '--dataset',
        dest='dataset',
-        help=
+        help="The dataset you want to test, which is one of {}".format(
-        "The dataset you want to train, which is one of ('OpticDiscSeg', 'Cityscapes')",
+            str(list(DATASETS.keys()))),
        type=str,
        default='OpticDiscSeg')
+    parser.add_argument(
+        '--dataset_root',
+        dest='dataset_root',
+        help="dataset root directory",
+        type=str,
+        default=None)
    # params of prediction
    parser.add_argument(
@@ -80,74 +81,26 @@ def parse_args():
    return parser.parse_args()
-def mkdir(path):
-    sub_dir = os.path.dirname(path)
-    if not os.path.exists(sub_dir):
-        os.makedirs(sub_dir)
-def infer(model, test_dataset=None, model_dir=None, save_dir='output'):
-    ckpt_path = os.path.join(model_dir, 'model')
-    para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path)
-    model.set_dict(para_state_dict)
-    model.eval()
-    added_saved_dir = os.path.join(save_dir, 'added')
-    pred_saved_dir = os.path.join(save_dir, 'prediction')
-    logging.info("Start to predict...")
-    for im, im_info, im_path in tqdm.tqdm(test_dataset):
-        im = im[np.newaxis, ...]
-        im = to_variable(im)
-        pred, _ = model(im, mode='test')
-        pred = pred.numpy()
-        pred = np.squeeze(pred).astype('uint8')
-        keys = list(im_info.keys())
-        for k in keys[::-1]:
-            if k == 'shape_before_resize':
-                h, w = im_info[k][0], im_info[k][1]
-                pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST)
-            elif k == 'shape_before_padding':
-                h, w = im_info[k][0], im_info[k][1]
-                pred = pred[0:h, 0:w]
-        im_file = im_path.replace(test_dataset.data_dir, '')
-        if im_file[0] == '/':
-            im_file = im_file[1:]
-        # save added image
-        added_image = utils.visualize(im_path, pred, weight=0.6)
-        added_image_path = os.path.join(added_saved_dir, im_file)
-        mkdir(added_image_path)
-        cv2.imwrite(added_image_path, added_image)
-        # save prediction
-        pred_im = utils.visualize(im_path, pred, weight=0.0)
-        pred_saved_path = os.path.join(pred_saved_dir, im_file)
-        mkdir(pred_saved_path)
-        cv2.imwrite(pred_saved_path, pred_im)
 def main(args):
    env_info = get_environ_info()
    places = fluid.CUDAPlace(ParallelEnv().dev_id) \
-        if env_info['place'] == 'cuda' and fluid.is_compiled_with_cuda() \
+        if env_info['Paddle compiled with cuda'] and env_info['GPUs used'] \
        else fluid.CPUPlace()
-    if args.dataset.lower() == 'opticdiscseg':
+    if args.dataset not in DATASETS:
-        dataset = OpticDiscSeg
+        raise Exception('`--dataset` is invalid. it should be one of {}'.format(
-    elif args.dataset.lower() == 'cityscapes':
+            str(list(DATASETS.keys()))))
-        dataset = Cityscapes
+    dataset = DATASETS[args.dataset]
-    else:
-        raise Exception(
-            "The --dataset set wrong. It should be one of ('OpticDiscSeg', 'Cityscapes')"
-        )
    with fluid.dygraph.guard(places):
        test_transforms = T.Compose([T.Resize(args.input_size), T.Normalize()])
-        test_dataset = dataset(transforms=test_transforms, mode='test')
+        test_dataset = dataset(
+            dataset_root=args.dataset_root,
+            transforms=test_transforms,
+            mode='test')
-        if args.model_name == 'UNet':
+        model = manager.MODELS[args.model_name](
-            model = models.UNet(num_classes=test_dataset.num_classes)
+            num_classes=test_dataset.num_classes)
        infer(
            model,

--- a/dygraph/models/__init__.py
+++ b/dygraph/models/__init__.py
@@ -12,4 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from .architectures import *
 from .unet import UNet
+from .deeplab import *
+from .fcn import *
+from .pspnet import *
--- a/dygraph/models/architectures/__init__.py
+++ b/dygraph/models/architectures/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from . import layer_utils
+from .hrnet import *
+from .resnet_vd import *
+from .xception_deeplab import *
+from .mobilenetv3 import *
--- a/dygraph/models/architectures/hrnet.py
+++ b/dygraph/models/architectures/hrnet.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.param_attr import ParamAttr
+from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
+from paddle.fluid.initializer import Normal
+from paddle.nn import SyncBatchNorm as BatchNorm
+from dygraph.cvlibs import manager
+__all__ = [
+    "HRNet_W18_Small_V1", "HRNet_W18_Small_V2", "HRNet_W18", "HRNet_W30",
+    "HRNet_W32", "HRNet_W40", "HRNet_W44", "HRNet_W48", "HRNet_W60", "HRNet_W64"
+]
+class HRNet(fluid.dygraph.Layer):
+    """
+    HRNet：Deep High-Resolution Representation Learning for Visual Recognition
+    https://arxiv.org/pdf/1908.07919.pdf.
+    Args:
+        stage1_num_modules (int): number of modules for stage1. Default 1.
+        stage1_num_blocks (list): number of blocks per module for stage1. Default [4].
+        stage1_num_channels (list): number of channels per branch for stage1. Default [64].
+        stage2_num_modules (int): number of modules for stage2. Default 1.
+        stage2_num_blocks (list): number of blocks per module for stage2. Default [4, 4]
+        stage2_num_channels (list): number of channels per branch for stage2. Default [18, 36].
+        stage3_num_modules (int): number of modules for stage3. Default 4.
+        stage3_num_blocks (list): number of blocks per module for stage3. Default [4, 4, 4]
+        stage3_num_channels (list): number of channels per branch for stage3. Default [18, 36, 72].
+        stage4_num_modules (int): number of modules for stage4. Default 3.
+        stage4_num_blocks (list): number of blocks per module for stage4. Default [4, 4, 4, 4]
+        stage4_num_channels (list): number of channels per branch for stage4. Default [18, 36, 72. 144].
+        has_se (bool): whether to use Squeeze-and-Excitation module. Default False.
+    """
+    def __init__(self,
+                 stage1_num_modules=1,
+                 stage1_num_blocks=[4],
+                 stage1_num_channels=[64],
+                 stage2_num_modules=1,
+                 stage2_num_blocks=[4, 4],
+                 stage2_num_channels=[18, 36],
+                 stage3_num_modules=4,
+                 stage3_num_blocks=[4, 4, 4],
+                 stage3_num_channels=[18, 36, 72],
+                 stage4_num_modules=3,
+                 stage4_num_blocks=[4, 4, 4, 4],
+                 stage4_num_channels=[18, 36, 72, 144],
+                 has_se=False):
+        super(HRNet, self).__init__()
+        self.stage1_num_modules = stage1_num_modules
+        self.stage1_num_blocks = stage1_num_blocks
+        self.stage1_num_channels = stage1_num_channels
+        self.stage2_num_modules = stage2_num_modules
+        self.stage2_num_blocks = stage2_num_blocks
+        self.stage2_num_channels = stage2_num_channels
+        self.stage3_num_modules = stage3_num_modules
+        self.stage3_num_blocks = stage3_num_blocks
+        self.stage3_num_channels = stage3_num_channels
+        self.stage4_num_modules = stage4_num_modules
+        self.stage4_num_blocks = stage4_num_blocks
+        self.stage4_num_channels = stage4_num_channels
+        self.has_se = has_se
+        self.conv_layer1_1 = ConvBNLayer(
+            num_channels=3,
+            num_filters=64,
+            filter_size=3,
+            stride=2,
+            act='relu',
+            name="layer1_1")
+        self.conv_layer1_2 = ConvBNLayer(
+            num_channels=64,
+            num_filters=64,
+            filter_size=3,
+            stride=2,
+            act='relu',
+            name="layer1_2")
+        self.la1 = Layer1(
+            num_channels=64,
+            num_blocks=self.stage1_num_blocks[0],
+            num_filters=self.stage1_num_channels[0],
+            has_se=has_se,
+            name="layer2")
+        self.tr1 = TransitionLayer(
+            in_channels=[self.stage1_num_channels[0] * 4],
+            out_channels=self.stage2_num_channels,
+            name="tr1")
+        self.st2 = Stage(
+            num_channels=self.stage2_num_channels,
+            num_modules=self.stage2_num_modules,
+            num_blocks=self.stage2_num_blocks,
+            num_filters=self.stage2_num_channels,
+            has_se=self.has_se,
+            name="st2")
+        self.tr2 = TransitionLayer(
+            in_channels=self.stage2_num_channels,
+            out_channels=self.stage3_num_channels,
+            name="tr2")
+        self.st3 = Stage(
+            num_channels=self.stage3_num_channels,
+            num_modules=self.stage3_num_modules,
+            num_blocks=self.stage3_num_blocks,
+            num_filters=self.stage3_num_channels,
+            has_se=self.has_se,
+            name="st3")
+        self.tr3 = TransitionLayer(
+            in_channels=self.stage3_num_channels,
+            out_channels=self.stage4_num_channels,
+            name="tr3")
+        self.st4 = Stage(
+            num_channels=self.stage4_num_channels,
+            num_modules=self.stage4_num_modules,
+            num_blocks=self.stage4_num_blocks,
+            num_filters=self.stage4_num_channels,
+            has_se=self.has_se,
+            name="st4")
+    def forward(self, x, label=None, mode='train'):
+        input_shape = x.shape[2:]
+        conv1 = self.conv_layer1_1(x)
+        conv2 = self.conv_layer1_2(conv1)
+        la1 = self.la1(conv2)
+        tr1 = self.tr1([la1])
+        st2 = self.st2(tr1)
+        tr2 = self.tr2(st2)
+        st3 = self.st3(tr2)
+        tr3 = self.tr3(st3)
+        st4 = self.st4(tr3)
+        x0_h, x0_w = st4[0].shape[2:]
+        x1 = fluid.layers.resize_bilinear(st4[1], out_shape=(x0_h, x0_w))
+        x2 = fluid.layers.resize_bilinear(st4[2], out_shape=(x0_h, x0_w))
+        x3 = fluid.layers.resize_bilinear(st4[3], out_shape=(x0_h, x0_w))
+        x = fluid.layers.concat([st4[0], x1, x2, x3], axis=1)
+        return x
+class ConvBNLayer(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 filter_size,
+                 stride=1,
+                 groups=1,
+                 act="relu",
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self._conv = Conv2D(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2,
+            groups=groups,
+            param_attr=ParamAttr(
+                initializer=Normal(scale=0.001), name=name + "_weights"),
+            bias_attr=False)
+        bn_name = name + '_bn'
+        self._batch_norm = BatchNorm(
+            num_filters,
+            weight_attr=ParamAttr(
+                name=bn_name + '_scale',
+                initializer=fluid.initializer.Constant(1.0)),
+            bias_attr=ParamAttr(
+                bn_name + '_offset',
+                initializer=fluid.initializer.Constant(0.0)))
+        self.act = act
+    def forward(self, input):
+        y = self._conv(input)
+        y = self._batch_norm(y)
+        if self.act == 'relu':
+            y = fluid.layers.relu(y)
+        return y
+class Layer1(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 num_blocks,
+                 has_se=False,
+                 name=None):
+        super(Layer1, self).__init__()
+        self.bottleneck_block_list = []
+        for i in range(num_blocks):
+            bottleneck_block = self.add_sublayer(
+                "bb_{}_{}".format(name, i + 1),
+                BottleneckBlock(
+                    num_channels=num_channels if i == 0 else num_filters * 4,
+                    num_filters=num_filters,
+                    has_se=has_se,
+                    stride=1,
+                    downsample=True if i == 0 else False,
+                    name=name + '_' + str(i + 1)))
+            self.bottleneck_block_list.append(bottleneck_block)
+    def forward(self, input):
+        conv = input
+        for block_func in self.bottleneck_block_list:
+            conv = block_func(conv)
+        return conv
+class TransitionLayer(fluid.dygraph.Layer):
+    def __init__(self, in_channels, out_channels, name=None):
+        super(TransitionLayer, self).__init__()
+        num_in = len(in_channels)
+        num_out = len(out_channels)
+        self.conv_bn_func_list = []
+        for i in range(num_out):
+            residual = None
+            if i < num_in:
+                if in_channels[i] != out_channels[i]:
+                    residual = self.add_sublayer(
+                        "transition_{}_layer_{}".format(name, i + 1),
+                        ConvBNLayer(
+                            num_channels=in_channels[i],
+                            num_filters=out_channels[i],
+                            filter_size=3,
+                            name=name + '_layer_' + str(i + 1)))
+            else:
+                residual = self.add_sublayer(
+                    "transition_{}_layer_{}".format(name, i + 1),
+                    ConvBNLayer(
+                        num_channels=in_channels[-1],
+                        num_filters=out_channels[i],
+                        filter_size=3,
+                        stride=2,
+                        name=name + '_layer_' + str(i + 1)))
+            self.conv_bn_func_list.append(residual)
+    def forward(self, input):
+        outs = []
+        for idx, conv_bn_func in enumerate(self.conv_bn_func_list):
+            if conv_bn_func is None:
+                outs.append(input[idx])
+            else:
+                if idx < len(input):
+                    outs.append(conv_bn_func(input[idx]))
+                else:
+                    outs.append(conv_bn_func(input[-1]))
+        return outs
+class Branches(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_blocks,
+                 in_channels,
+                 out_channels,
+                 has_se=False,
+                 name=None):
+        super(Branches, self).__init__()
+        self.basic_block_list = []
+        for i in range(len(out_channels)):
+            self.basic_block_list.append([])
+            for j in range(num_blocks[i]):
+                in_ch = in_channels[i] if j == 0 else out_channels[i]
+                basic_block_func = self.add_sublayer(
+                    "bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1),
+                    BasicBlock(
+                        num_channels=in_ch,
+                        num_filters=out_channels[i],
+                        has_se=has_se,
+                        name=name + '_branch_layer_' + str(i + 1) + '_' +
+                        str(j + 1)))
+                self.basic_block_list[i].append(basic_block_func)
+    def forward(self, inputs):
+        outs = []
+        for idx, input in enumerate(inputs):
+            conv = input
+            for basic_block_func in self.basic_block_list[idx]:
+                conv = basic_block_func(conv)
+            outs.append(conv)
+        return outs
+class BottleneckBlock(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 has_se,
+                 stride=1,
+                 downsample=False,
+                 name=None):
+        super(BottleneckBlock, self).__init__()
+        self.has_se = has_se
+        self.downsample = downsample
+        self.conv1 = ConvBNLayer(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=1,
+            act="relu",
+            name=name + "_conv1",
+        )
+        self.conv2 = ConvBNLayer(
+            num_channels=num_filters,
+            num_filters=num_filters,
+            filter_size=3,
+            stride=stride,
+            act="relu",
+            name=name + "_conv2")
+        self.conv3 = ConvBNLayer(
+            num_channels=num_filters,
+            num_filters=num_filters * 4,
+            filter_size=1,
+            act=None,
+            name=name + "_conv3")
+        if self.downsample:
+            self.conv_down = ConvBNLayer(
+                num_channels=num_channels,
+                num_filters=num_filters * 4,
+                filter_size=1,
+                act=None,
+                name=name + "_downsample")
+        if self.has_se:
+            self.se = SELayer(
+                num_channels=num_filters * 4,
+                num_filters=num_filters * 4,
+                reduction_ratio=16,
+                name=name + '_fc')
+    def forward(self, input):
+        residual = input
+        conv1 = self.conv1(input)
+        conv2 = self.conv2(conv1)
+        conv3 = self.conv3(conv2)
+        if self.downsample:
+            residual = self.conv_down(input)
+        if self.has_se:
+            conv3 = self.se(conv3)
+        y = fluid.layers.elementwise_add(x=conv3, y=residual, act="relu")
+        return y
+class BasicBlock(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 stride=1,
+                 has_se=False,
+                 downsample=False,
+                 name=None):
+        super(BasicBlock, self).__init__()
+        self.has_se = has_se
+        self.downsample = downsample
+        self.conv1 = ConvBNLayer(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=3,
+            stride=stride,
+            act="relu",
+            name=name + "_conv1")
+        self.conv2 = ConvBNLayer(
+            num_channels=num_filters,
+            num_filters=num_filters,
+            filter_size=3,
+            stride=1,
+            act=None,
+            name=name + "_conv2")
+        if self.downsample:
+            self.conv_down = ConvBNLayer(
+                num_channels=num_channels,
+                num_filters=num_filters * 4,
+                filter_size=1,
+                act="relu",
+                name=name + "_downsample")
+        if self.has_se:
+            self.se = SELayer(
+                num_channels=num_filters,
+                num_filters=num_filters,
+                reduction_ratio=16,
+                name=name + '_fc')
+    def forward(self, input):
+        residual = input
+        conv1 = self.conv1(input)
+        conv2 = self.conv2(conv1)
+        if self.downsample:
+            residual = self.conv_down(input)
+        if self.has_se:
+            conv2 = self.se(conv2)
+        y = fluid.layers.elementwise_add(x=conv2, y=residual, act="relu")
+        return y
+class SELayer(fluid.dygraph.Layer):
+    def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
+        super(SELayer, self).__init__()
+        self.pool2d_gap = Pool2D(pool_type='avg', global_pooling=True)
+        self._num_channels = num_channels
+        med_ch = int(num_channels / reduction_ratio)
+        stdv = 1.0 / math.sqrt(num_channels * 1.0)
+        self.squeeze = Linear(
+            num_channels,
+            med_ch,
+            act="relu",
+            param_attr=ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=name + "_sqz_weights"),
+            bias_attr=ParamAttr(name=name + '_sqz_offset'))
+        stdv = 1.0 / math.sqrt(med_ch * 1.0)
+        self.excitation = Linear(
+            med_ch,
+            num_filters,
+            act="sigmoid",
+            param_attr=ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=name + "_exc_weights"),
+            bias_attr=ParamAttr(name=name + '_exc_offset'))
+    def forward(self, input):
+        pool = self.pool2d_gap(input)
+        pool = fluid.layers.reshape(pool, shape=[-1, self._num_channels])
+        squeeze = self.squeeze(pool)
+        excitation = self.excitation(squeeze)
+        excitation = fluid.layers.reshape(
+            excitation, shape=[-1, self._num_channels, 1, 1])
+        out = input * excitation
+        return out
+class Stage(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_modules,
+                 num_blocks,
+                 num_filters,
+                 has_se=False,
+                 multi_scale_output=True,
+                 name=None):
+        super(Stage, self).__init__()
+        self._num_modules = num_modules
+        self.stage_func_list = []
+        for i in range(num_modules):
+            if i == num_modules - 1 and not multi_scale_output:
+                stage_func = self.add_sublayer(
+                    "stage_{}_{}".format(name, i + 1),
+                    HighResolutionModule(
+                        num_channels=num_channels,
+                        num_blocks=num_blocks,
+                        num_filters=num_filters,
+                        has_se=has_se,
+                        multi_scale_output=False,
+                        name=name + '_' + str(i + 1)))
+            else:
+                stage_func = self.add_sublayer(
+                    "stage_{}_{}".format(name, i + 1),
+                    HighResolutionModule(
+                        num_channels=num_channels,
+                        num_blocks=num_blocks,
+                        num_filters=num_filters,
+                        has_se=has_se,
+                        name=name + '_' + str(i + 1)))
+            self.stage_func_list.append(stage_func)
+    def forward(self, input):
+        out = input
+        for idx in range(self._num_modules):
+            out = self.stage_func_list[idx](out)
+        return out
+class HighResolutionModule(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_blocks,
+                 num_filters,
+                 has_se=False,
+                 multi_scale_output=True,
+                 name=None):
+        super(HighResolutionModule, self).__init__()
+        self.branches_func = Branches(
+            num_blocks=num_blocks,
+            in_channels=num_channels,
+            out_channels=num_filters,
+            has_se=has_se,
+            name=name)
+        self.fuse_func = FuseLayers(
+            in_channels=num_filters,
+            out_channels=num_filters,
+            multi_scale_output=multi_scale_output,
+            name=name)
+    def forward(self, input):
+        out = self.branches_func(input)
+        out = self.fuse_func(out)
+        return out
+class FuseLayers(fluid.dygraph.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 multi_scale_output=True,
+                 name=None):
+        super(FuseLayers, self).__init__()
+        self._actual_ch = len(in_channels) if multi_scale_output else 1
+        self._in_channels = in_channels
+        self.residual_func_list = []
+        for i in range(self._actual_ch):
+            for j in range(len(in_channels)):
+                residual_func = None
+                if j > i:
+                    residual_func = self.add_sublayer(
+                        "residual_{}_layer_{}_{}".format(name, i + 1, j + 1),
+                        ConvBNLayer(
+                            num_channels=in_channels[j],
+                            num_filters=out_channels[i],
+                            filter_size=1,
+                            stride=1,
+                            act=None,
+                            name=name + '_layer_' + str(i + 1) + '_' +
+                            str(j + 1)))
+                    self.residual_func_list.append(residual_func)
+                elif j < i:
+                    pre_num_filters = in_channels[j]
+                    for k in range(i - j):
+                        if k == i - j - 1:
+                            residual_func = self.add_sublayer(
+                                "residual_{}_layer_{}_{}_{}".format(
+                                    name, i + 1, j + 1, k + 1),
+                                ConvBNLayer(
+                                    num_channels=pre_num_filters,
+                                    num_filters=out_channels[i],
+                                    filter_size=3,
+                                    stride=2,
+                                    act=None,
+                                    name=name + '_layer_' + str(i + 1) + '_' +
+                                    str(j + 1) + '_' + str(k + 1)))
+                            pre_num_filters = out_channels[i]
+                        else:
+                            residual_func = self.add_sublayer(
+                                "residual_{}_layer_{}_{}_{}".format(
+                                    name, i + 1, j + 1, k + 1),
+                                ConvBNLayer(
+                                    num_channels=pre_num_filters,
+                                    num_filters=out_channels[j],
+                                    filter_size=3,
+                                    stride=2,
+                                    act="relu",
+                                    name=name + '_layer_' + str(i + 1) + '_' +
+                                    str(j + 1) + '_' + str(k + 1)))
+                            pre_num_filters = out_channels[j]
+                        self.residual_func_list.append(residual_func)
+    def forward(self, input):
+        outs = []
+        residual_func_idx = 0
+        for i in range(self._actual_ch):
+            residual = input[i]
+            residual_shape = residual.shape[-2:]
+            for j in range(len(self._in_channels)):
+                if j > i:
+                    y = self.residual_func_list[residual_func_idx](input[j])
+                    residual_func_idx += 1
+                    y = fluid.layers.resize_bilinear(
+                        input=y, out_shape=residual_shape)
+                    residual = fluid.layers.elementwise_add(
+                        x=residual, y=y, act=None)
+                elif j < i:
+                    y = input[j]
+                    for k in range(i - j):
+                        y = self.residual_func_list[residual_func_idx](y)
+                        residual_func_idx += 1
+                    residual = fluid.layers.elementwise_add(
+                        x=residual, y=y, act=None)
+            layer_helper = LayerHelper(self.full_name(), act='relu')
+            residual = layer_helper.append_activation(residual)
+            outs.append(residual)
+        return outs
+class LastClsOut(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channel_list,
+                 has_se,
+                 num_filters_list=[32, 64, 128, 256],
+                 name=None):
+        super(LastClsOut, self).__init__()
+        self.func_list = []
+        for idx in range(len(num_channel_list)):
+            func = self.add_sublayer(
+                "conv_{}_conv_{}".format(name, idx + 1),
+                BottleneckBlock(
+                    num_channels=num_channel_list[idx],
+                    num_filters=num_filters_list[idx],
+                    has_se=has_se,
+                    downsample=True,
+                    name=name + 'conv_' + str(idx + 1)))
+            self.func_list.append(func)
+    def forward(self, inputs):
+        outs = []
+        for idx, input in enumerate(inputs):
+            out = self.func_list[idx](input)
+            outs.append(out)
+        return outs
+@manager.BACKBONES.add_component
+def HRNet_W18_Small_V1(**kwargs):
+    model = HRNet(
+        stage1_num_modules=1,
+        stage1_num_blocks=[1],
+        stage1_num_channels=[32],
+        stage2_num_modules=1,
+        stage2_num_blocks=[2, 2],
+        stage2_num_channels=[16, 32],
+        stage3_num_modules=1,
+        stage3_num_blocks=[2, 2, 2],
+        stage3_num_channels=[16, 32, 64],
+        stage4_num_modules=1,
+        stage4_num_blocks=[2, 2, 2, 2],
+        stage4_num_channels=[16, 32, 64, 128],
+        **kwargs)
+    return model
+@manager.BACKBONES.add_component
+def HRNet_W18_Small_V2(**kwargs):
+    model = HRNet(
+        stage1_num_modules=1,
+        stage1_num_blocks=[2],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[2, 2],
+        stage2_num_channels=[18, 36],
+        stage3_num_modules=1,
+        stage3_num_blocks=[2, 2, 2],
+        stage3_num_channels=[18, 36, 72],
+        stage4_num_modules=1,
+        stage4_num_blocks=[2, 2, 2, 2],
+        stage4_num_channels=[18, 36, 72, 144],
+        **kwargs)
+    return model
+@manager.BACKBONES.add_component
+def HRNet_W18(**kwargs):
+    model = HRNet(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[18, 36],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[18, 36, 72],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[18, 36, 72, 144],
+        **kwargs)
+    return model
+@manager.BACKBONES.add_component
+def HRNet_W30(**kwargs):
+    model = HRNet(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[30, 60],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[30, 60, 120],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[30, 60, 120, 240],
+        **kwargs)
+    return model
+@manager.BACKBONES.add_component
+def HRNet_W32(**kwargs):
+    model = HRNet(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[32, 64],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[32, 64, 128],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[32, 64, 128, 256],
+        **kwargs)
+    return model
+@manager.BACKBONES.add_component
+def HRNet_W40(**kwargs):
+    model = HRNet(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[40, 80],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[40, 80, 160],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[40, 80, 160, 320],
+        **kwargs)
+    return model
+@manager.BACKBONES.add_component
+def HRNet_W44(**kwargs):
+    model = HRNet(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[44, 88],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[44, 88, 176],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[44, 88, 176, 352],
+        **kwargs)
+    return model
+@manager.BACKBONES.add_component
+def HRNet_W48(**kwargs):
+    model = HRNet(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[48, 96],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[48, 96, 192],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[48, 96, 192, 384],
+        **kwargs)
+    return model
+@manager.BACKBONES.add_component
+def HRNet_W60(**kwargs):
+    model = HRNet(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[60, 120],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[60, 120, 240],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[60, 120, 240, 480],
+        **kwargs)
+    return model
+@manager.BACKBONES.add_component
+def HRNet_W64(**kwargs):
+    model = HRNet(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[64, 128],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[64, 128, 256],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[64, 128, 256, 512],
+        **kwargs)
+    return model
--- a/dygraph/models/architectures/layer_utils.py
+++ b/dygraph/models/architectures/layer_utils.py
+# -*- encoding: utf-8 -*-
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle.nn.functional as F
+from paddle import fluid
+from paddle.fluid import dygraph
+from paddle.fluid.dygraph import Conv2D
+from paddle.nn import SyncBatchNorm as BatchNorm
+from paddle.nn.layer import activation
+class ConvBnRelu(dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 filter_size,
+                 using_sep_conv=False,
+                 **kwargs):
+        super(ConvBnRelu, self).__init__()
+        if using_sep_conv:
+            self.conv = DepthwiseConvBnRelu(num_channels,
+                                            num_filters,
+                                            filter_size,
+                                            **kwargs)
+        else:
+            self.conv = Conv2D(num_channels,
+                               num_filters,
+                               filter_size,
+                               **kwargs)
+        self.batch_norm = BatchNorm(num_filters)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.batch_norm(x)
+        x = F.relu(x)
+        return x
+class ConvBn(dygraph.Layer):
+    def __init__(self, num_channels, num_filters, filter_size, **kwargs):
+        super(ConvBn, self).__init__()
+        self.conv = Conv2D(num_channels,
+                           num_filters,
+                           filter_size,
+                           **kwargs)
+        self.batch_norm = BatchNorm(num_filters)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.batch_norm(x)
+        return x
+class ConvReluPool(dygraph.Layer):
+    def __init__(self, num_channels, num_filters):
+        super(ConvReluPool, self).__init__()
+        self.conv = Conv2D(num_channels,
+                           num_filters,
+                           filter_size=3,
+                           stride=1,
+                           padding=1,
+                           dilation=1)
+    def forward(self, x):
+        x = self.conv(x)
+        x = F.relu(x)
+        x = fluid.layers.pool2d(x, pool_size=2, pool_type="max", pool_stride=2)
+        return x
+class ConvBnReluUpsample(dygraph.Layer):
+    def __init__(self, num_channels, num_filters):
+        super(ConvBnReluUpsample, self).__init__()
+        self.conv_bn_relu = ConvBnRelu(num_channels, num_filters)
+    def forward(self, x, upsample_scale=2):
+        x = self.conv_bn_relu(x)
+        new_shape = [x.shape[2] * upsample_scale, x.shape[3] * upsample_scale]
+        x = fluid.layers.resize_bilinear(x, new_shape)
+        return x
+class DepthwiseConvBnRelu(dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 filter_size,
+                 **kwargs):
+        super(DepthwiseConvBnRelu, self).__init__()
+        self.depthwise_conv = ConvBn(num_channels,
+                                     num_filters=num_channels,
+                                     filter_size=filter_size,
+                                     groups=num_channels,
+                                     use_cudnn=False,
+                                     **kwargs)
+        self.piontwise_conv = ConvBnRelu(num_channels,
+                                         num_filters,
+                                         filter_size=1,
+                                         groups=1)
+    def forward(self, x):
+        x = self.depthwise_conv(x)
+        x = self.piontwise_conv(x)
+        return x
+class Activation(fluid.dygraph.Layer):
+    """
+    The wrapper of activations
+    For example:
+        >>> relu = Activation("relu")
+        >>> print(relu)
+        <class 'paddle.nn.layer.activation.ReLU'>
+        >>> sigmoid = Activation("sigmoid")
+        >>> print(sigmoid)
+        <class 'paddle.nn.layer.activation.Sigmoid'>
+        >>> not_exit_one = Activation("not_exit_one")
+        KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', 
+        'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', 
+        'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])"
+    Args:
+        act (str): the activation name in lowercase
+    """
+    def __init__(self, act=None):
+        super(Activation, self).__init__()
+        self._act = act
+        upper_act_names = activation.__all__
+        lower_act_names = [act.lower() for act in upper_act_names]
+        act_dict = dict(zip(lower_act_names, upper_act_names))
+        if act is not None:
+            if act in act_dict.keys():
+                act_name = act_dict[act]
+                self.act_func = eval("activation.{}()".format(act_name))
+            else:
+                raise KeyError("{} does not exist in the current {}".format(act, act_dict.keys()))
+    def forward(self, x):
+        if self._act is not None:
+            return self.act_func(x)
+        else:
+            return x
\ No newline at end of file
--- a/dygraph/models/architectures/mobilenetv3.py
+++ b/dygraph/models/architectures/mobilenetv3.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import math
+import numpy as np
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.param_attr import ParamAttr
+from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
+from paddle.nn import SyncBatchNorm as BatchNorm
+from dygraph.models.architectures import layer_utils
+from dygraph.cvlibs import manager
+__all__ = [
+    "MobileNetV3_small_x0_35", "MobileNetV3_small_x0_5",
+    "MobileNetV3_small_x0_75", "MobileNetV3_small_x1_0",
+    "MobileNetV3_small_x1_25", "MobileNetV3_large_x0_35",
+    "MobileNetV3_large_x0_5", "MobileNetV3_large_x0_75",
+    "MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25"
+]
+def make_divisible(v, divisor=8, min_value=None):
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+def get_padding_same(kernel_size, dilation_rate):
+    """
+    SAME padding implementation given kernel_size and dilation_rate.
+    The calculation formula as following:
+        (F-(k+(k -1)*(r-1))+2*p)/s + 1 = F_new
+        where F: a feature map
+              k: kernel size, r: dilation rate, p: padding value, s: stride
+              F_new: new feature map 
+    Args:
+        kernel_size (int)
+        dilation_rate (int)
+    Returns:
+        padding_same (int): padding value
+    """
+    k = kernel_size
+    r = dilation_rate
+    padding_same = (k + (k - 1) * (r - 1) - 1)//2 
+    return padding_same
+class MobileNetV3(fluid.dygraph.Layer):
+    def __init__(self, scale=1.0, model_name="small", class_dim=1000, output_stride=None, **kwargs):
+        super(MobileNetV3, self).__init__()
+        inplanes = 16
+        if model_name == "large":
+            self.cfg = [
+                # k, exp, c,  se,     nl,  s,
+                [3, 16, 16, False, "relu", 1],
+                [3, 64, 24, False, "relu", 2],
+                [3, 72, 24, False, "relu", 1], # output 1 -> out_index=2
+                [5, 72, 40, True, "relu", 2],
+                [5, 120, 40, True, "relu", 1],
+                [5, 120, 40, True, "relu", 1], # output 2 -> out_index=5
+                [3, 240, 80, False, "hard_swish", 2],
+                [3, 200, 80, False, "hard_swish", 1],
+                [3, 184, 80, False, "hard_swish", 1],
+                [3, 184, 80, False, "hard_swish", 1],
+                [3, 480, 112, True, "hard_swish", 1],
+                [3, 672, 112, True, "hard_swish", 1], # output 3 -> out_index=11
+                [5, 672, 160, True, "hard_swish", 2],
+                [5, 960, 160, True, "hard_swish", 1],
+                [5, 960, 160, True, "hard_swish", 1], # output 3 -> out_index=14
+            ]
+            self.out_indices = [2, 5, 11, 14]
+            self.cls_ch_squeeze = 960
+            self.cls_ch_expand = 1280
+        elif model_name == "small":
+            self.cfg = [
+                # k, exp, c,  se,     nl,  s,
+                [3, 16, 16, True, "relu", 2], # output 1 -> out_index=0
+                [3, 72, 24, False, "relu", 2],
+                [3, 88, 24, False, "relu", 1], # output 2 -> out_index=3
+                [5, 96, 40, True, "hard_swish", 2],
+                [5, 240, 40, True, "hard_swish", 1],
+                [5, 240, 40, True, "hard_swish", 1],
+                [5, 120, 48, True, "hard_swish", 1],
+                [5, 144, 48, True, "hard_swish", 1], # output 3 -> out_index=7
+                [5, 288, 96, True, "hard_swish", 2],
+                [5, 576, 96, True, "hard_swish", 1],
+                [5, 576, 96, True, "hard_swish", 1], # output 4 -> out_index=10
+            ]
+            self.out_indices = [0, 3, 7, 10]
+            self.cls_ch_squeeze = 576
+            self.cls_ch_expand = 1280
+        else:
+            raise NotImplementedError(
+                "mode[{}_model] is not implemented!".format(model_name))
+        ###################################################
+        # modify stride and dilation based on output_stride
+        self.dilation_cfg = [1] * len(self.cfg)
+        self.modify_bottle_params(output_stride=output_stride)
+        ###################################################
+        self.conv1 = ConvBNLayer(
+            in_c=3,
+            out_c=make_divisible(inplanes * scale),
+            filter_size=3,
+            stride=2,
+            padding=1,
+            num_groups=1,
+            if_act=True,
+            act="hard_swish",
+            name="conv1")
+        self.block_list = []
+        inplanes = make_divisible(inplanes * scale)
+        for i, (k, exp, c, se, nl, s) in enumerate(self.cfg):
+            ######################################
+            # add dilation rate
+            dilation_rate = self.dilation_cfg[i]
+            ######################################
+            self.block_list.append(
+                ResidualUnit(
+                    in_c=inplanes,
+                    mid_c=make_divisible(scale * exp),
+                    out_c=make_divisible(scale * c),
+                    filter_size=k,
+                    stride=s,
+                    dilation=dilation_rate,
+                    use_se=se,
+                    act=nl,
+                    name="conv" + str(i + 2)))
+            self.add_sublayer(
+                sublayer=self.block_list[-1], name="conv" + str(i + 2))
+            inplanes = make_divisible(scale * c)
+        self.last_second_conv = ConvBNLayer(
+            in_c=inplanes,
+            out_c=make_divisible(scale * self.cls_ch_squeeze),
+            filter_size=1,
+            stride=1,
+            padding=0,
+            num_groups=1,
+            if_act=True,
+            act="hard_swish",
+            name="conv_last")
+        self.pool = Pool2D(
+            pool_type="avg", global_pooling=True, use_cudnn=False)
+        self.last_conv = Conv2D(
+            num_channels=make_divisible(scale * self.cls_ch_squeeze),
+            num_filters=self.cls_ch_expand,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            act=None,
+            param_attr=ParamAttr(name="last_1x1_conv_weights"),
+            bias_attr=False)
+        self.out = Linear(
+            input_dim=self.cls_ch_expand,
+            output_dim=class_dim,
+            param_attr=ParamAttr("fc_weights"),
+            bias_attr=ParamAttr(name="fc_offset"))
+    def modify_bottle_params(self, output_stride=None):
+        if output_stride is not None and output_stride % 2 != 0:
+            raise Exception("output stride must to be even number")
+        if output_stride is not None:
+            stride = 2
+            rate = 1
+            for i, _cfg in enumerate(self.cfg):
+                stride = stride * _cfg[-1]
+                if stride > output_stride:
+                    rate = rate * _cfg[-1]
+                    self.cfg[i][-1] = 1
+                self.dilation_cfg[i] = rate
+    def forward(self, inputs, label=None, dropout_prob=0.2):
+        x = self.conv1(inputs)
+        # A feature list saves each downsampling feature.
+        feat_list = []
+        for i, block in enumerate(self.block_list):
+            x = block(x)
+            if i in self.out_indices:
+                feat_list.append(x)
+            #print("block {}:".format(i),x.shape, self.dilation_cfg[i])
+        x = self.last_second_conv(x)
+        x = self.pool(x)
+        x = self.last_conv(x)
+        x = fluid.layers.hard_swish(x)
+        x = fluid.layers.dropout(x=x, dropout_prob=dropout_prob)
+        x = fluid.layers.reshape(x, shape=[x.shape[0], x.shape[1]])
+        x = self.out(x)
+        return x, feat_list
+class ConvBNLayer(fluid.dygraph.Layer):
+    def __init__(self,
+                 in_c,
+                 out_c,
+                 filter_size,
+                 stride,
+                 padding,
+                 dilation=1,
+                 num_groups=1,
+                 if_act=True,
+                 act=None,
+                 use_cudnn=True,
+                 name=""):
+        super(ConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.conv = fluid.dygraph.Conv2D(
+            num_channels=in_c,
+            num_filters=out_c,
+            filter_size=filter_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=num_groups,
+            param_attr=ParamAttr(name=name + "_weights"),
+            bias_attr=False,
+            use_cudnn=use_cudnn,
+            act=None)
+        self.bn = BatchNorm(
+            num_features=out_c,
+            weight_attr=ParamAttr(
+                name=name + "_bn_scale",
+                regularizer=fluid.regularizer.L2DecayRegularizer(
+                    regularization_coeff=0.0)),
+            bias_attr=ParamAttr(
+                name=name + "_bn_offset",
+                regularizer=fluid.regularizer.L2DecayRegularizer(
+                    regularization_coeff=0.0)))
+        self._act_op = layer_utils.Activation(act=None)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        if self.if_act:
+            if self.act == "relu":
+                x = fluid.layers.relu(x)
+            elif self.act == "hard_swish":
+                x = fluid.layers.hard_swish(x)
+            else:
+                print("The activation function is selected incorrectly.")
+                exit()
+        return x
+class ResidualUnit(fluid.dygraph.Layer):
+    def __init__(self,
+                 in_c,
+                 mid_c,
+                 out_c,
+                 filter_size,
+                 stride,
+                 use_se,
+                 dilation=1,
+                 act=None,
+                 name=''):
+        super(ResidualUnit, self).__init__()
+        self.if_shortcut = stride == 1 and in_c == out_c
+        self.if_se = use_se
+        self.expand_conv = ConvBNLayer(
+            in_c=in_c,
+            out_c=mid_c,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            if_act=True,
+            act=act,
+            name=name + "_expand")
+        self.bottleneck_conv = ConvBNLayer(
+            in_c=mid_c,
+            out_c=mid_c,
+            filter_size=filter_size,
+            stride=stride,
+            padding= get_padding_same(filter_size, dilation), #int((filter_size - 1) // 2) + (dilation - 1),
+            dilation=dilation,
+            num_groups=mid_c,
+            if_act=True,
+            act=act,
+            name=name + "_depthwise")
+        if self.if_se:
+            self.mid_se = SEModule(mid_c, name=name + "_se")
+        self.linear_conv = ConvBNLayer(
+            in_c=mid_c,
+            out_c=out_c,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            if_act=False,
+            act=None,
+            name=name + "_linear")
+        self.dilation = dilation
+    def forward(self, inputs):
+        x = self.expand_conv(inputs)
+        x = self.bottleneck_conv(x)
+        if self.if_se:
+            x = self.mid_se(x)
+        x = self.linear_conv(x)
+        if self.if_shortcut:
+            x = fluid.layers.elementwise_add(inputs, x)
+        return x
+class SEModule(fluid.dygraph.Layer):
+    def __init__(self, channel, reduction=4, name=""):
+        super(SEModule, self).__init__()
+        self.avg_pool = fluid.dygraph.Pool2D(
+            pool_type="avg", global_pooling=True, use_cudnn=False)
+        self.conv1 = fluid.dygraph.Conv2D(
+            num_channels=channel,
+            num_filters=channel // reduction,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            act="relu",
+            param_attr=ParamAttr(name=name + "_1_weights"),
+            bias_attr=ParamAttr(name=name + "_1_offset"))
+        self.conv2 = fluid.dygraph.Conv2D(
+            num_channels=channel // reduction,
+            num_filters=channel,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            act=None,
+            param_attr=ParamAttr(name + "_2_weights"),
+            bias_attr=ParamAttr(name=name + "_2_offset"))
+    def forward(self, inputs):
+        outputs = self.avg_pool(inputs)
+        outputs = self.conv1(outputs)
+        outputs = self.conv2(outputs)
+        outputs = fluid.layers.hard_sigmoid(outputs)
+        return fluid.layers.elementwise_mul(x=inputs, y=outputs, axis=0)
+def MobileNetV3_small_x0_35(**kwargs):
+    model = MobileNetV3(model_name="small", scale=0.35, **kwargs)
+    return model
+def MobileNetV3_small_x0_5(**kwargs):
+    model = MobileNetV3(model_name="small", scale=0.5, **kwargs)
+    return model
+def MobileNetV3_small_x0_75(**kwargs):
+    model = MobileNetV3(model_name="small", scale=0.75, **kwargs)
+    return model
+@manager.BACKBONES.add_component
+def MobileNetV3_small_x1_0(**kwargs):
+    model = MobileNetV3(model_name="small", scale=1.0, **kwargs)
+    return model
+def MobileNetV3_small_x1_25(**kwargs):
+    model = MobileNetV3(model_name="small", scale=1.25, **kwargs)
+    return model
+def MobileNetV3_large_x0_35(**kwargs):
+    model = MobileNetV3(model_name="large", scale=0.35, **kwargs)
+    return model
+def MobileNetV3_large_x0_5(**kwargs):
+    model = MobileNetV3(model_name="large", scale=0.5, **kwargs)
+    return model
+def MobileNetV3_large_x0_75(**kwargs):
+    model = MobileNetV3(model_name="large", scale=0.75, **kwargs)
+    return model
+@manager.BACKBONES.add_component
+def MobileNetV3_large_x1_0(**kwargs):
+    model = MobileNetV3(model_name="large", scale=1.0, **kwargs)
+    return model
+def MobileNetV3_large_x1_25(**kwargs):
+    model = MobileNetV3(model_name="large", scale=1.25, **kwargs)
+    return model
--- a/dygraph/models/architectures/resnet_vd.py
+++ b/dygraph/models/architectures/resnet_vd.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import math
+import numpy as np
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.param_attr import ParamAttr
+from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
+from paddle.nn import SyncBatchNorm as BatchNorm
+from dygraph.utils import utils
+from dygraph.models.architectures import layer_utils
+from dygraph.cvlibs import manager
+__all__ = [
+    "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd"
+]
+class ConvBNLayer(fluid.dygraph.Layer):
+    def __init__(
+            self,
+            num_channels,
+            num_filters,
+            filter_size,
+            stride=1,
+            dilation=1,
+            groups=1,
+            is_vd_mode=False,
+            act=None,
+            name=None, ):
+        super(ConvBNLayer, self).__init__()
+        self.is_vd_mode = is_vd_mode
+        self._pool2d_avg = Pool2D(
+            pool_size=2, pool_stride=2, pool_padding=0, pool_type='avg', ceil_mode=True)
+        self._conv = Conv2D(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2 if dilation ==1  else 0,
+            dilation=dilation,
+            groups=groups,
+            act=None,
+            param_attr=ParamAttr(name=name + "_weights"),
+            bias_attr=False)
+        if name == "conv1":
+            bn_name = "bn_" + name
+        else:
+            bn_name = "bn" + name[3:]
+        self._batch_norm = BatchNorm(
+            num_filters,
+            weight_attr=ParamAttr(name=bn_name + '_scale'),
+            bias_attr=ParamAttr(bn_name + '_offset'))
+        self._act_op = layer_utils.Activation(act=act)
+    def forward(self, inputs):
+        if self.is_vd_mode:
+            inputs = self._pool2d_avg(inputs)
+        y = self._conv(inputs)
+        y = self._batch_norm(y)
+        y = self._act_op(y)
+        return y
+class BottleneckBlock(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 stride,
+                 shortcut=True,
+                 if_first=False,
+                 dilation=1,
+                 name=None):
+        super(BottleneckBlock, self).__init__()
+        self.conv0 = ConvBNLayer(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=1,
+            act='relu',
+            name=name + "_branch2a")
+        self.dilation = dilation
+        self.conv1 = ConvBNLayer(
+            num_channels=num_filters,
+            num_filters=num_filters,
+            filter_size=3,
+            stride=stride,
+            act='relu',
+            dilation=dilation,
+            name=name + "_branch2b")
+        self.conv2 = ConvBNLayer(
+            num_channels=num_filters,
+            num_filters=num_filters * 4,
+            filter_size=1,
+            act=None,
+            name=name + "_branch2c")
+        if not shortcut:
+            self.short = ConvBNLayer(
+                num_channels=num_channels,
+                num_filters=num_filters * 4,
+                filter_size=1,
+                stride=1,
+                is_vd_mode=False if if_first or stride==1 else True,
+                name=name + "_branch1")
+        self.shortcut = shortcut
+    def forward(self, inputs):
+        y = self.conv0(inputs)
+        ####################################################################
+        # If given dilation rate > 1, using corresponding padding
+        if self.dilation > 1:
+            padding = self.dilation
+            y = fluid.layers.pad(y, [0,0,0,0,padding,padding,padding,padding])
+        #####################################################################
+        conv1 = self.conv1(y)
+        conv2 = self.conv2(conv1)
+        if self.shortcut:
+            short = inputs
+        else:
+            short = self.short(inputs)
+        y = fluid.layers.elementwise_add(x=short, y=conv2)
+        layer_helper = LayerHelper(self.full_name(), act='relu')
+        return layer_helper.append_activation(y)
+class BasicBlock(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 stride,
+                 shortcut=True,
+                 if_first=False,
+                 name=None):
+        super(BasicBlock, self).__init__()
+        self.stride = stride
+        self.conv0 = ConvBNLayer(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=3,
+            stride=stride,
+            act='relu',
+            name=name + "_branch2a")
+        self.conv1 = ConvBNLayer(
+            num_channels=num_filters,
+            num_filters=num_filters,
+            filter_size=3,
+            act=None,
+            name=name + "_branch2b")
+        if not shortcut:
+            self.short = ConvBNLayer(
+                num_channels=num_channels,
+                num_filters=num_filters,
+                filter_size=1,
+                stride=1,
+                is_vd_mode=False if if_first else True,
+                name=name + "_branch1")
+        self.shortcut = shortcut
+    def forward(self, inputs):
+        y = self.conv0(inputs)
+        conv1 = self.conv1(y)
+        if self.shortcut:
+            short = inputs
+        else:
+            short = self.short(inputs)
+        y = fluid.layers.elementwise_add(x=short, y=conv1)
+        layer_helper = LayerHelper(self.full_name(), act='relu')
+        return layer_helper.append_activation(y)
+class ResNet_vd(fluid.dygraph.Layer):
+    def __init__(self, layers=50, class_dim=1000, output_stride=None, multi_grid=(1, 2, 4), **kwargs):
+        super(ResNet_vd, self).__init__()
+        self.layers = layers
+        supported_layers = [18, 34, 50, 101, 152, 200]
+        assert layers in supported_layers, \
+            "supported layers are {} but input layer is {}".format(
+                supported_layers, layers)
+        if layers == 18:
+            depth = [2, 2, 2, 2]
+        elif layers == 34 or layers == 50:
+            depth = [3, 4, 6, 3]
+        elif layers == 101:
+            depth = [3, 4, 23, 3]
+        elif layers == 152:
+            depth = [3, 8, 36, 3]
+        elif layers == 200:
+            depth = [3, 12, 48, 3]
+        num_channels = [64, 256, 512,
+                        1024] if layers >= 50 else [64, 64, 128, 256]
+        num_filters = [64, 128, 256, 512]
+        dilation_dict=None
+        if output_stride == 8:
+            dilation_dict = {2: 2, 3: 4}
+        elif output_stride == 16:
+            dilation_dict = {3: 2}
+        self.conv1_1 = ConvBNLayer(
+            num_channels=3,
+            num_filters=32,
+            filter_size=3,
+            stride=2,
+            act='relu',
+            name="conv1_1")
+        self.conv1_2 = ConvBNLayer(
+            num_channels=32,
+            num_filters=32,
+            filter_size=3,
+            stride=1,
+            act='relu',
+            name="conv1_2")
+        self.conv1_3 = ConvBNLayer(
+            num_channels=32,
+            num_filters=64,
+            filter_size=3,
+            stride=1,
+            act='relu',
+            name="conv1_3")
+        self.pool2d_max = Pool2D(
+            pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
+        # self.block_list = []
+        self.stage_list = []
+        if layers >= 50:
+            for block in range(len(depth)):
+                shortcut = False
+                block_list=[]
+                for i in range(depth[block]):
+                    if layers in [101, 152] and block == 2:
+                        if i == 0:
+                            conv_name = "res" + str(block + 2) + "a"
+                        else:
+                            conv_name = "res" + str(block + 2) + "b" + str(i)
+                    else:
+                        conv_name = "res" + str(block + 2) + chr(97 + i)
+                    ###############################################################################
+                    # Add dilation rate for some segmentation tasks, if dilation_dict is not None.
+                    dilation_rate = dilation_dict[block] if dilation_dict and block in dilation_dict else 1
+                    # Actually block here is 'stage', and i is 'block' in 'stage'
+                    # At the stage 4, expand the the dilation_rate using multi_grid, default (1, 2, 4)
+                    if block == 3:
+                        dilation_rate = dilation_rate * multi_grid[i]
+                    #print("stage {}, block {}: dilation rate".format(block, i), dilation_rate)
+                    ###############################################################################
+                    bottleneck_block = self.add_sublayer(
+                        'bb_%d_%d' % (block, i),
+                        BottleneckBlock(
+                            num_channels=num_channels[block] if i == 0 else num_filters[block] * 4,
+                            num_filters=num_filters[block],
+                            stride=2 if i == 0 and block != 0 and dilation_rate == 1 else 1,
+                            shortcut=shortcut,
+                            if_first=block == i == 0,
+                            name=conv_name,
+                            dilation=dilation_rate))
+                    block_list.append(bottleneck_block)
+                    shortcut = True
+                self.stage_list.append(block_list)
+        else:
+            for block in range(len(depth)):
+                shortcut = False
+                block_list=[]
+                for i in range(depth[block]):
+                    conv_name = "res" + str(block + 2) + chr(97 + i)
+                    basic_block = self.add_sublayer(
+                        'bb_%d_%d' % (block, i),
+                        BasicBlock(
+                            num_channels=num_channels[block]
+                            if i == 0 else num_filters[block],
+                            num_filters=num_filters[block],
+                            stride=2 if i == 0 and block != 0 else 1,
+                            shortcut=shortcut,
+                            if_first=block == i == 0,
+                            name=conv_name))
+                    block_list.append(basic_block)
+                    shortcut = True
+                self.stage_list.append(block_list)
+        self.pool2d_avg = Pool2D(
+            pool_size=7, pool_type='avg', global_pooling=True)
+        self.pool2d_avg_channels = num_channels[-1] * 2
+        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
+        self.out = Linear(
+            self.pool2d_avg_channels,
+            class_dim,
+            param_attr=ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name="fc_0.w_0"),
+            bias_attr=ParamAttr(name="fc_0.b_0"))
+    def forward(self, inputs):
+        y = self.conv1_1(inputs)
+        y = self.conv1_2(y)
+        y = self.conv1_3(y)
+        y = self.pool2d_max(y)
+        # A feature list saves the output feature map of each stage.
+        feat_list = []
+        for i, stage in enumerate(self.stage_list):
+            for j, block in enumerate(stage):
+                y = block(y)
+                #print("stage {} block {}".format(i+1, j+1), y.shape)
+            feat_list.append(y)
+        y = self.pool2d_avg(y)
+        y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_channels])
+        y = self.out(y)
+        return y, feat_list
+    # def init_weight(self, pretrained_model=None):
+    #     if pretrained_model is not None:
+    #         if os.path.exists(pretrained_model):
+    #             utils.load_pretrained_model(self, pretrained_model)
+def ResNet18_vd(**args):
+    model = ResNet_vd(layers=18, **args)
+    return model
+def ResNet34_vd(**args):
+    model = ResNet_vd(layers=34, **args)
+    return model
+@manager.BACKBONES.add_component
+def ResNet50_vd(**args):
+    model = ResNet_vd(layers=50, **args)
+    return model
+@manager.BACKBONES.add_component
+def ResNet101_vd(**args):
+    model = ResNet_vd(layers=101, **args)
+    return model
+def ResNet152_vd(**args):
+    model = ResNet_vd(layers=152, **args)
+    return model
+def ResNet200_vd(**args):
+    model = ResNet_vd(layers=200, **args)
+    return model
\ No newline at end of file
--- a/dygraph/models/architectures/xception_deeplab.py
+++ b/dygraph/models/architectures/xception_deeplab.py
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.param_attr import ParamAttr
+from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
+from paddle.nn import SyncBatchNorm as BatchNorm
+from dygraph.models.architectures import layer_utils
+from dygraph.cvlibs import manager
+__all__ = ["Xception41_deeplab", "Xception65_deeplab", "Xception71_deeplab"]
+def check_data(data, number):
+    if type(data) == int:
+        return [data] * number
+    assert len(data) == number
+    return data
+def check_stride(s, os):
+    if s <= os:
+        return True
+    else:
+        return False
+def check_points(count, points):
+    if points is None:
+        return False
+    else:
+        if isinstance(points, list):
+            return (True if count in points else False)
+        else:
+            return (True if count == points else False)
+def gen_bottleneck_params(backbone='xception_65'):
+    if backbone == 'xception_65':
+        bottleneck_params = {
+            "entry_flow": (3, [2, 2, 2], [128, 256, 728]),
+            "middle_flow": (16, 1, 728),
+            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
+        }
+    elif backbone == 'xception_41':
+        bottleneck_params = {
+            "entry_flow": (3, [2, 2, 2], [128, 256, 728]),
+            "middle_flow": (8, 1, 728),
+            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
+        }
+    elif backbone == 'xception_71':
+        bottleneck_params = {
+            "entry_flow": (5, [2, 1, 2, 1, 2], [128, 256, 256, 728, 728]),
+            "middle_flow": (16, 1, 728),
+            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
+        }
+    else:
+        raise Exception(
+            "xception backbont only support xception_41/xception_65/xception_71"
+        )
+    return bottleneck_params
+class ConvBNLayer(fluid.dygraph.Layer):
+    def __init__(self,
+                 input_channels,
+                 output_channels,
+                 filter_size,
+                 stride=1,
+                 padding=0,
+                 act=None,
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self._conv = Conv2D(
+            num_channels=input_channels,
+            num_filters=output_channels,
+            filter_size=filter_size,
+            stride=stride,
+            padding=padding,
+            param_attr=ParamAttr(name=name + "/weights"),
+            bias_attr=False)
+        self._bn = BatchNorm(
+            num_features=output_channels,
+            epsilon=1e-3,
+            momentum=0.99,
+            weight_attr=ParamAttr(name=name + "/BatchNorm/gamma"),
+            bias_attr=ParamAttr(name=name + "/BatchNorm/beta"))
+        self._act_op = layer_utils.Activation(act=act)
+    def forward(self, inputs):
+        return self._act_op(self._bn(self._conv(inputs)))
+class Seperate_Conv(fluid.dygraph.Layer):
+    def __init__(self,
+                 input_channels,
+                 output_channels,
+                 stride,
+                 filter,
+                 dilation=1,
+                 act=None,
+                 name=None):
+        super(Seperate_Conv, self).__init__()
+        self._conv1 = Conv2D(
+            num_channels=input_channels,
+            num_filters=input_channels,
+            filter_size=filter,
+            stride=stride,
+            groups=input_channels,
+            padding=(filter) // 2 * dilation,
+            dilation=dilation,
+            param_attr=ParamAttr(name=name + "/depthwise/weights"),
+            bias_attr=False)
+        self._bn1 = BatchNorm(
+            input_channels,
+            epsilon=1e-3,
+            momentum=0.99,
+            weight_attr=ParamAttr(name=name + "/depthwise/BatchNorm/gamma"),
+            bias_attr=ParamAttr(name=name + "/depthwise/BatchNorm/beta"))
+        self._act_op1 = layer_utils.Activation(act=act)
+        self._conv2 = Conv2D(
+            input_channels,
+            output_channels,
+            1,
+            stride=1,
+            groups=1,
+            padding=0,
+            param_attr=ParamAttr(name=name + "/pointwise/weights"),
+            bias_attr=False)
+        self._bn2 = BatchNorm(
+            output_channels,
+            epsilon=1e-3,
+            momentum=0.99,
+            weight_attr=ParamAttr(name=name + "/pointwise/BatchNorm/gamma"),
+            bias_attr=ParamAttr(name=name + "/pointwise/BatchNorm/beta"))
+        self._act_op2 = layer_utils.Activation(act=act)
+    def forward(self, inputs):
+        x = self._conv1(inputs)
+        x = self._bn1(x)
+        x = self._act_op1(x)
+        x = self._conv2(x)
+        x = self._bn2(x)
+        x = self._act_op2(x)
+        return x
+class Xception_Block(fluid.dygraph.Layer):
+    def __init__(self,
+                 input_channels,
+                 output_channels,
+                 strides=1,
+                 filter_size=3,
+                 dilation=1,
+                 skip_conv=True,
+                 has_skip=True,
+                 activation_fn_in_separable_conv=False,
+                 name=None):
+        super(Xception_Block, self).__init__()
+        repeat_number = 3
+        output_channels = check_data(output_channels, repeat_number)
+        filter_size = check_data(filter_size, repeat_number)
+        strides = check_data(strides, repeat_number)
+        self.has_skip = has_skip
+        self.skip_conv = skip_conv
+        self.activation_fn_in_separable_conv = activation_fn_in_separable_conv
+        if not activation_fn_in_separable_conv:
+            self._conv1 = Seperate_Conv(
+                input_channels,
+                output_channels[0],
+                stride=strides[0],
+                filter=filter_size[0],
+                dilation=dilation,
+                name=name + "/separable_conv1")
+            self._conv2 = Seperate_Conv(
+                output_channels[0],
+                output_channels[1],
+                stride=strides[1],
+                filter=filter_size[1],
+                dilation=dilation,
+                name=name + "/separable_conv2")
+            self._conv3 = Seperate_Conv(
+                output_channels[1],
+                output_channels[2],
+                stride=strides[2],
+                filter=filter_size[2],
+                dilation=dilation,
+                name=name + "/separable_conv3")
+        else:
+            self._conv1 = Seperate_Conv(
+                input_channels,
+                output_channels[0],
+                stride=strides[0],
+                filter=filter_size[0],
+                act="relu",
+                dilation=dilation,
+                name=name + "/separable_conv1")
+            self._conv2 = Seperate_Conv(
+                output_channels[0],
+                output_channels[1],
+                stride=strides[1],
+                filter=filter_size[1],
+                act="relu",
+                dilation=dilation,
+                name=name + "/separable_conv2")
+            self._conv3 = Seperate_Conv(
+                output_channels[1],
+                output_channels[2],
+                stride=strides[2],
+                filter=filter_size[2],
+                act="relu",
+                dilation=dilation,
+                name=name + "/separable_conv3")
+        if has_skip and skip_conv:
+            self._short = ConvBNLayer(
+                input_channels,
+                output_channels[-1],
+                1,
+                stride=strides[-1],
+                padding=0,
+                name=name + "/shortcut")
+    def forward(self, inputs):
+        layer_helper = LayerHelper(self.full_name(), act='relu')
+        if not self.activation_fn_in_separable_conv:
+            x = layer_helper.append_activation(inputs)
+            x = self._conv1(x)
+            x = layer_helper.append_activation(x)
+            x = self._conv2(x)
+            x = layer_helper.append_activation(x)
+            x = self._conv3(x)
+        else:
+            x = self._conv1(inputs)
+            x = self._conv2(x)
+            x = self._conv3(x)
+        if self.has_skip is False:
+            return x
+        if self.skip_conv:
+            skip = self._short(inputs)
+        else:
+            skip = inputs
+        return fluid.layers.elementwise_add(x, skip)
+class XceptionDeeplab(fluid.dygraph.Layer):
+    #def __init__(self, backbone, class_dim=1000):
+    # add output_stride
+    def __init__(self, backbone, output_stride=16, class_dim=1000, **kwargs):
+        super(XceptionDeeplab, self).__init__()
+        bottleneck_params = gen_bottleneck_params(backbone)
+        self.backbone = backbone
+        self._conv1 = ConvBNLayer(
+            3,
+            32,
+            3,
+            stride=2,
+            padding=1,
+            act="relu",
+            name=self.backbone + "/entry_flow/conv1")
+        self._conv2 = ConvBNLayer(
+            32,
+            64,
+            3,
+            stride=1,
+            padding=1,
+            act="relu",
+            name=self.backbone + "/entry_flow/conv2")
+        """
+            bottleneck_params = {
+            "entry_flow": (3, [2, 2, 2], [128, 256, 728]),
+            "middle_flow": (16, 1, 728),
+            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
+        }
+        if output_stride == 16:
+            entry_block3_stride = 2
+            middle_block_dilation = 1
+            exit_block_dilations = (1, 2)
+        elif output_stride == 8:
+            entry_block3_stride = 1
+            middle_block_dilation = 2
+            exit_block_dilations = (2, 4)
+        """
+        self.block_num = bottleneck_params["entry_flow"][0]
+        self.strides = bottleneck_params["entry_flow"][1]
+        self.chns = bottleneck_params["entry_flow"][2]
+        self.strides = check_data(self.strides, self.block_num)
+        self.chns = check_data(self.chns, self.block_num)
+        self.entry_flow = []
+        self.middle_flow = []
+        self.stride = 2
+        self.output_stride = output_stride
+        s = self.stride
+        for i in range(self.block_num):
+            stride = self.strides[i] if check_stride(s * self.strides[i],
+                                                     self.output_stride) else 1
+            xception_block = self.add_sublayer(
+                self.backbone + "/entry_flow/block" + str(i + 1),
+                Xception_Block(
+                    input_channels=64 if i == 0 else self.chns[i - 1],
+                    output_channels=self.chns[i],
+                    strides=[1, 1, self.stride],
+                    name=self.backbone + "/entry_flow/block" + str(i + 1)))
+            self.entry_flow.append(xception_block)
+            s = s * stride
+        self.stride = s
+        self.block_num = bottleneck_params["middle_flow"][0]
+        self.strides = bottleneck_params["middle_flow"][1]
+        self.chns = bottleneck_params["middle_flow"][2]
+        self.strides = check_data(self.strides, self.block_num)
+        self.chns = check_data(self.chns, self.block_num)
+        s = self.stride
+        for i in range(self.block_num):
+            stride = self.strides[i] if check_stride(s * self.strides[i],
+                                                     self.output_stride) else 1
+            xception_block = self.add_sublayer(
+                self.backbone + "/middle_flow/block" + str(i + 1),
+                Xception_Block(
+                    input_channels=728,
+                    output_channels=728,
+                    strides=[1, 1, self.strides[i]],
+                    skip_conv=False,
+                    name=self.backbone + "/middle_flow/block" + str(i + 1)))
+            self.middle_flow.append(xception_block)
+            s = s * stride
+        self.stride = s
+        self.block_num = bottleneck_params["exit_flow"][0]
+        self.strides = bottleneck_params["exit_flow"][1]
+        self.chns = bottleneck_params["exit_flow"][2]
+        self.strides = check_data(self.strides, self.block_num)
+        self.chns = check_data(self.chns, self.block_num)
+        s = self.stride
+        stride = self.strides[0] if check_stride(s * self.strides[0],
+                                                 self.output_stride) else 1
+        self._exit_flow_1 = Xception_Block(
+            728,
+            self.chns[0], [1, 1, stride],
+            name=self.backbone + "/exit_flow/block1")
+        s = s * stride
+        stride = self.strides[1] if check_stride(s * self.strides[1],
+                                                 self.output_stride) else 1
+        self._exit_flow_2 = Xception_Block(
+            self.chns[0][-1],
+            self.chns[1], [1, 1, stride],
+            dilation=2,
+            has_skip=False,
+            activation_fn_in_separable_conv=True,
+            name=self.backbone + "/exit_flow/block2")
+        s = s * stride
+        self.stride = s
+        self._drop = Dropout(p=0.5)
+        self._pool = Pool2D(pool_type="avg", global_pooling=True)
+        self._fc = Linear(
+            self.chns[1][-1],
+            class_dim,
+            param_attr=ParamAttr(name="fc_weights"),
+            bias_attr=ParamAttr(name="fc_bias"))
+    def forward(self, inputs):
+        x = self._conv1(inputs)
+        x = self._conv2(x)
+        feat_list = []
+        for i, ef in enumerate(self.entry_flow):
+            x = ef(x)
+            if i == 0:
+                feat_list.append(x)
+        for mf in self.middle_flow:
+            x = mf(x)
+        x = self._exit_flow_1(x)
+        x = self._exit_flow_2(x)
+        feat_list.append(x)
+        x = self._drop(x)
+        x = self._pool(x)
+        x = fluid.layers.squeeze(x, axes=[2, 3])
+        x = self._fc(x)
+        return x, feat_list
+def Xception41_deeplab(**args):
+    model = XceptionDeeplab('xception_41', **args)
+    return model
+@manager.BACKBONES.add_component
+def Xception65_deeplab(**args):
+    model = XceptionDeeplab("xception_65", **args)
+    return model
+def Xception71_deeplab(**args):
+    model = XceptionDeeplab("xception_71", **args)
+    return model
\ No newline at end of file
--- a/dygraph/models/deeplab.py
+++ b/dygraph/models/deeplab.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from dygraph.cvlibs import manager
+from dygraph.models.architectures import layer_utils
+from paddle import fluid
+from paddle.fluid import dygraph
+from paddle.fluid.dygraph import Conv2D
+from dygraph.utils import utils
+__all__ = ['DeepLabV3P', "deeplabv3p_resnet101_vd", "deeplabv3p_resnet101_vd_os8",
+           "deeplabv3p_resnet50_vd", "deeplabv3p_resnet50_vd_os8",
+           "deeplabv3p_xception65_deeplab",
+           "deeplabv3p_mobilenetv3_large", "deeplabv3p_mobilenetv3_small"]
+class ImageAverage(dygraph.Layer):
+    """
+    Global average pooling
+    Args:
+        num_channels (int): the number of input channels.
+    """
+    def __init__(self, num_channels):
+        super(ImageAverage, self).__init__()
+        self.conv_bn_relu = layer_utils.ConvBnRelu(num_channels,
+                                                   num_filters=256,
+                                                   filter_size=1)
+    def forward(self, input):
+        x = fluid.layers.reduce_mean(input, dim=[2, 3], keep_dim=True)
+        x = self.conv_bn_relu(x)
+        x = fluid.layers.resize_bilinear(x, out_shape=input.shape[2:])
+        return x
+class ASPP(dygraph.Layer):
+    """
+     Decoder module of DeepLabV3P model
+    Args:
+        output_stride (int): the ratio of input size and final feature size. Support 16 or 8.
+        in_channels (int): the number of input channels in decoder module.
+        using_sep_conv (bool): whether use separable conv or not. Default to True.
+    """
+    def __init__(self, output_stride, in_channels, using_sep_conv=True):
+        super(ASPP, self).__init__()
+        if output_stride == 16:
+            aspp_ratios = (6, 12, 18)
+        elif output_stride == 8:
+            aspp_ratios = (12, 24, 36)
+        else:
+            raise NotImplementedError("Only support output_stride is 8 or 16, but received{}".format(output_stride))
+        self.image_average = ImageAverage(num_channels=in_channels)
+        # The first aspp using 1*1 conv
+        self.aspp1 = layer_utils.ConvBnRelu(num_channels=in_channels,
+                                            num_filters=256,
+                                            filter_size=1,
+                                            using_sep_conv=False)
+        # The second aspp using 3*3 (separable) conv at dilated rate aspp_ratios[0]
+        self.aspp2 = layer_utils.ConvBnRelu(num_channels=in_channels,
+                                            num_filters=256,
+                                            filter_size=3,
+                                            using_sep_conv=using_sep_conv,
+                                            dilation=aspp_ratios[0],
+                                            padding=aspp_ratios[0])
+        # The Third aspp using 3*3 (separable) conv at dilated rate aspp_ratios[1]
+        self.aspp3 = layer_utils.ConvBnRelu(num_channels=in_channels,
+                                            num_filters=256,
+                                            filter_size=3,
+                                            using_sep_conv=using_sep_conv,
+                                            dilation=aspp_ratios[1],
+                                            padding=aspp_ratios[1])
+        # The Third aspp using 3*3 (separable) conv at dilated rate aspp_ratios[2]
+        self.aspp4 = layer_utils.ConvBnRelu(num_channels=in_channels,
+                                            num_filters=256,
+                                            filter_size=3,
+                                            using_sep_conv=using_sep_conv,
+                                            dilation=aspp_ratios[2],
+                                            padding=aspp_ratios[2])
+        # After concat op, using 1*1 conv
+        self.conv_bn_relu = layer_utils.ConvBnRelu(num_channels=1280,
+                                                   num_filters=256,
+                                                   filter_size=1)
+    def forward(self, x):
+        x1 = self.image_average(x)
+        x2 = self.aspp1(x)
+        x3 = self.aspp2(x)
+        x4 = self.aspp3(x)
+        x5 = self.aspp4(x)
+        x = fluid.layers.concat([x1, x2, x3, x4, x5], axis=1)
+        x = self.conv_bn_relu(x)
+        x = fluid.layers.dropout(x, dropout_prob=0.1)
+        return x
+class Decoder(dygraph.Layer):
+    """
+    Decoder module of DeepLabV3P model
+    Args:
+        num_classes (int): the number of classes.
+        in_channels (int): the number of input channels in decoder module.
+        using_sep_conv (bool): whether use separable conv or not. Default to True.
+    """
+    def __init__(self, num_classes, in_channels, using_sep_conv=True):
+        super(Decoder, self).__init__()
+        self.conv_bn_relu1 = layer_utils.ConvBnRelu(num_channels=in_channels,
+                                                    num_filters=48,
+                                                    filter_size=1)
+        self.conv_bn_relu2 = layer_utils.ConvBnRelu(num_channels=304,
+                                                    num_filters=256,
+                                                    filter_size=3,
+                                                    using_sep_conv=using_sep_conv,
+                                                    padding=1)
+        self.conv_bn_relu3 = layer_utils.ConvBnRelu(num_channels=256,
+                                                    num_filters=256,
+                                                    filter_size=3,
+                                                    using_sep_conv=using_sep_conv,
+                                                    padding=1)
+        self.conv = Conv2D(num_channels=256,
+                           num_filters=num_classes,
+                           filter_size=1)
+    def forward(self, x, low_level_feat):
+        low_level_feat = self.conv_bn_relu1(low_level_feat)
+        x = fluid.layers.resize_bilinear(x, low_level_feat.shape[2:])
+        x = fluid.layers.concat([x, low_level_feat], axis=1)
+        x = self.conv_bn_relu2(x)
+        x = self.conv_bn_relu3(x)
+        x = self.conv(x)
+        return x
+class DeepLabV3P(dygraph.Layer):
+    """
+    The DeepLabV3P consists of three main components, Backbone, ASPP and Decoder
+    The orginal artile refers to
+    "Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation"
+     Liang-Chieh Chen, Yukun Zhu, George Papandreou, Florian Schroff, Hartwig Adam.
+     (https://arxiv.org/abs/1802.02611)
+    Args:
+        backbone (str): backbone name, currently support Xception65, Resnet101_vd. Default Resnet101_vd.
+        num_classes (int): the unique number of target classes. Default 2.
+        output_stride (int): the ratio of input size and final feature size. Default 16.
+        backbone_indices (tuple): two values in the tuple indicte the indices of output of backbone.
+                        the first index will be taken as a low-level feature in Deconder component;
+                        the second one will be taken as input of ASPP component.
+                        Usually backbone consists of four downsampling stage, and return an output of
+                        each stage, so we set default (0, 3), which means taking feature map of the first
+                        stage in backbone as low-level feature used in Decoder, and feature map of the fourth
+                        stage as input of ASPP.
+        backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index.
+        ignore_index (int): the value of ground-truth mask would be ignored while doing evaluation. Default 255.
+        using_sep_conv (bool): a bool value indicates whether using separable convolutions
+                        in ASPP and Decoder components. Default True.
+        pretrained_model (str): the pretrained_model path of backbone.
+    """
+    def __init__(self,
+                 backbone,
+                 num_classes=2,
+                 output_stride=16,
+                 backbone_indices=(0, 3),
+                 backbone_channels=(256, 2048),
+                 ignore_index=255,
+                 using_sep_conv=True,
+                 pretrained_model=None):
+        super(DeepLabV3P, self).__init__()
+        self.backbone = manager.BACKBONES[backbone](output_stride=output_stride)
+        self.aspp = ASPP(output_stride, backbone_channels[1], using_sep_conv)
+        self.decoder = Decoder(num_classes, backbone_channels[0], using_sep_conv)
+        self.ignore_index = ignore_index
+        self.EPS = 1e-5
+        self.backbone_indices = backbone_indices
+        self.init_weight(pretrained_model)
+    def forward(self, input, label=None):
+        _, feat_list = self.backbone(input)
+        low_level_feat = feat_list[self.backbone_indices[0]]
+        x = feat_list[self.backbone_indices[1]]
+        x = self.aspp(x)
+        logit = self.decoder(x, low_level_feat)
+        logit = fluid.layers.resize_bilinear(logit, input.shape[2:])
+        if self.training:
+            return self._get_loss(logit, label)
+        else:
+            score_map = fluid.layers.softmax(logit, axis=1)
+            score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1])
+            pred = fluid.layers.argmax(score_map, axis=3)
+            pred = fluid.layers.unsqueeze(pred, axes=[3])
+            return pred, score_map
+    def init_weight(self, pretrained_model=None):
+        """
+        Initialize the parameters of model parts.
+        Args:
+            pretrained_model ([str], optional): the pretrained_model path of backbone. Defaults to None.
+        """
+        if pretrained_model is not None:
+            if os.path.exists(pretrained_model):
+                utils.load_pretrained_model(self.backbone, pretrained_model)
+                # utils.load_pretrained_model(self, pretrained_model)
+                # for param in self.backbone.parameters():
+                #     param.stop_gradient = True
+    def _get_loss(self, logit, label):
+        """
+        compute forward loss of the model
+        Args:
+            logit (tensor): the logit of model output
+            label (tensor): ground truth
+        Returns:
+            avg_loss (tensor): forward loss
+        """
+        logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
+        label = fluid.layers.transpose(label, [0, 2, 3, 1])
+        mask = label != self.ignore_index
+        mask = fluid.layers.cast(mask, 'float32')
+        loss, probs = fluid.layers.softmax_with_cross_entropy(
+            logit,
+            label,
+            ignore_index=self.ignore_index,
+            return_softmax=True,
+            axis=-1)
+        loss = loss * mask
+        avg_loss = fluid.layers.mean(loss) / (
+                fluid.layers.mean(mask) + self.EPS)
+        label.stop_gradient = True
+        mask.stop_gradient = True
+        return avg_loss
+def build_aspp(output_stride, using_sep_conv):
+    return ASPP(output_stride=output_stride, using_sep_conv=using_sep_conv)
+def build_decoder(num_classes, using_sep_conv):
+    return Decoder(num_classes, using_sep_conv=using_sep_conv)
+@manager.MODELS.add_component
+def deeplabv3p_resnet101_vd(*args, **kwargs):
+    pretrained_model = None
+    return DeepLabV3P(backbone='ResNet101_vd', pretrained_model=pretrained_model, **kwargs)
+@manager.MODELS.add_component
+def deeplabv3p_resnet101_vd_os8(*args, **kwargs):
+    pretrained_model = None
+    return DeepLabV3P(backbone='ResNet101_vd', output_stride=8, pretrained_model=pretrained_model, **kwargs)
+@manager.MODELS.add_component
+def deeplabv3p_resnet50_vd(*args, **kwargs):
+    pretrained_model = None
+    return DeepLabV3P(backbone='ResNet50_vd', pretrained_model=pretrained_model, **kwargs)
+@manager.MODELS.add_component
+def deeplabv3p_resnet50_vd_os8(*args, **kwargs):
+    pretrained_model = None
+    return DeepLabV3P(backbone='ResNet50_vd', output_stride=8, pretrained_model=pretrained_model, **kwargs)
+@manager.MODELS.add_component
+def deeplabv3p_xception65_deeplab(*args, **kwargs):
+    pretrained_model = None
+    return DeepLabV3P(backbone='Xception65_deeplab',
+                      pretrained_model=pretrained_model,
+                      backbone_indices=(0, 1),
+                      backbone_channels=(128, 2048),
+                      **kwargs)
+@manager.MODELS.add_component
+def deeplabv3p_mobilenetv3_large(*args, **kwargs):
+    pretrained_model = None
+    return DeepLabV3P(backbone='MobileNetV3_large_x1_0',
+                      pretrained_model=pretrained_model,
+                      backbone_indices=(0, 3),
+                      backbone_channels=(24, 160),
+                      **kwargs)
+@manager.MODELS.add_component
+def deeplabv3p_mobilenetv3_small(*args, **kwargs):
+    pretrained_model = None
+    return DeepLabV3P(backbone='MobileNetV3_small_x1_0',
+                      pretrained_model=pretrained_model,
+                      backbone_indices=(0, 3),
+                      backbone_channels=(16, 96),
+                      **kwargs)
--- a/dygraph/models/fcn.py
+++ b/dygraph/models/fcn.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import os
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.param_attr import ParamAttr
+from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
+from paddle.fluid.initializer import Normal
+from paddle.nn import SyncBatchNorm as BatchNorm
+from dygraph.cvlibs import manager
+from dygraph import utils
+__all__ = [
+    "fcn_hrnet_w18_small_v1", "fcn_hrnet_w18_small_v2", "fcn_hrnet_w18",
+    "fcn_hrnet_w30", "fcn_hrnet_w32", "fcn_hrnet_w40", "fcn_hrnet_w44",
+    "fcn_hrnet_w48", "fcn_hrnet_w60", "fcn_hrnet_w64"
+]
+class FCN(fluid.dygraph.Layer):
+    """
+    Fully Convolutional Networks for Semantic Segmentation.
+    https://arxiv.org/abs/1411.4038
+    Args:
+        backbone (str): backbone name,
+        num_classes (int): the unique number of target classes.
+        in_channels (int): the channels of input feature maps.
+        channels (int): channels after conv layer before the last one.
+        pretrained_model (str): the path of pretrained model.
+        ignore_index (int): the value of ground-truth mask would be ignored while computing loss or doing evaluation. Default 255.
+    """
+    def __init__(self,
+                 backbone,
+                 num_classes,
+                 in_channels,
+                 channels=None,
+                 pretrained_model=None,
+                 ignore_index=255,
+                 **kwargs):
+        super(FCN, self).__init__()
+        self.num_classes = num_classes
+        self.ignore_index = ignore_index
+        self.EPS = 1e-5
+        if channels is None:
+            channels = in_channels
+        self.backbone = manager.BACKBONES[backbone](**kwargs)
+        self.conv_last_2 = ConvBNLayer(
+            num_channels=in_channels,
+            num_filters=channels,
+            filter_size=1,
+            stride=1,
+            name='conv-2')
+        self.conv_last_1 = Conv2D(
+            num_channels=channels,
+            num_filters=self.num_classes,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            param_attr=ParamAttr(
+                initializer=Normal(scale=0.001), name='conv-1_weights'))
+        self.init_weight(pretrained_model)
+    def forward(self, x, label=None, mode='train'):
+        input_shape = x.shape[2:]
+        x = self.backbone(x)
+        x = self.conv_last_2(x)
+        logit = self.conv_last_1(x)
+        logit = fluid.layers.resize_bilinear(logit, input_shape)
+        if self.training:
+            if label is None:
+                raise Exception('Label is need during training')
+            return self._get_loss(logit, label)
+        else:
+            score_map = fluid.layers.softmax(logit, axis=1)
+            score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1])
+            pred = fluid.layers.argmax(score_map, axis=3)
+            pred = fluid.layers.unsqueeze(pred, axes=[3])
+            return pred, score_map
+    def init_weight(self, pretrained_model=None):
+        """
+        Initialize the parameters of model parts.
+        Args:
+            pretrained_model ([str], optional): the pretrained_model path of backbone. Defaults to None.
+        """
+        if pretrained_model is not None:
+            if os.path.exists(pretrained_model):
+                utils.load_pretrained_model(self.backbone, pretrained_model)
+                utils.load_pretrained_model(self, pretrained_model)
+            else:
+                raise Exception('Pretrained model is not found: {}'.format(
+                    pretrained_model))
+    def _get_loss(self, logit, label):
+        """
+        compute forward loss of the model
+        Args:
+            logit (tensor): the logit of model output
+            label (tensor): ground truth
+        Returns:
+            avg_loss (tensor): forward loss
+        """
+        logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
+        label = fluid.layers.transpose(label, [0, 2, 3, 1])
+        mask = label != self.ignore_index
+        mask = fluid.layers.cast(mask, 'float32')
+        loss, probs = fluid.layers.softmax_with_cross_entropy(
+            logit,
+            label,
+            ignore_index=self.ignore_index,
+            return_softmax=True,
+            axis=-1)
+        loss = loss * mask
+        avg_loss = fluid.layers.mean(loss) / (
+            fluid.layers.mean(mask) + self.EPS)
+        label.stop_gradient = True
+        mask.stop_gradient = True
+        return avg_loss
+class ConvBNLayer(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 filter_size,
+                 stride=1,
+                 groups=1,
+                 act="relu",
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self._conv = Conv2D(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2,
+            groups=groups,
+            param_attr=ParamAttr(
+                initializer=Normal(scale=0.001), name=name + "_weights"),
+            bias_attr=False)
+        bn_name = name + '_bn'
+        self._batch_norm = BatchNorm(
+            num_filters,
+            weight_attr=ParamAttr(
+                name=bn_name + '_scale',
+                initializer=fluid.initializer.Constant(1.0)),
+            bias_attr=ParamAttr(
+                bn_name + '_offset',
+                initializer=fluid.initializer.Constant(0.0)))
+        self.act = act
+    def forward(self, input):
+        y = self._conv(input)
+        y = self._batch_norm(y)
+        if self.act == 'relu':
+            y = fluid.layers.relu(y)
+        return y
+@manager.MODELS.add_component
+def fcn_hrnet_w18_small_v1(*args, **kwargs):
+    return FCN(backbone='HRNet_W18_Small_V1', in_channels=240, **kwargs)
+@manager.MODELS.add_component
+def fcn_hrnet_w18_small_v2(*args, **kwargs):
+    return FCN(backbone='HRNet_W18_Small_V2', in_channels=270, **kwargs)
+@manager.MODELS.add_component
+def fcn_hrnet_w18(*args, **kwargs):
+    return FCN(backbone='HRNet_W18', in_channels=270, **kwargs)
+@manager.MODELS.add_component
+def fcn_hrnet_w30(*args, **kwargs):
+    return FCN(backbone='HRNet_W30', in_channels=450, **kwargs)
+@manager.MODELS.add_component
+def fcn_hrnet_w32(*args, **kwargs):
+    return FCN(backbone='HRNet_W32', in_channels=480, **kwargs)
+@manager.MODELS.add_component
+def fcn_hrnet_w40(*args, **kwargs):
+    return FCN(backbone='HRNet_W40', in_channels=600, **kwargs)
+@manager.MODELS.add_component
+def fcn_hrnet_w44(*args, **kwargs):
+    return FCN(backbone='HRNet_W44', in_channels=660, **kwargs)
+@manager.MODELS.add_component
+def fcn_hrnet_w48(*args, **kwargs):
+    return FCN(backbone='HRNet_W48', in_channels=720, **kwargs)
+@manager.MODELS.add_component
+def fcn_hrnet_w60(*args, **kwargs):
+    return FCN(backbone='HRNet_W60', in_channels=900, **kwargs)
+@manager.MODELS.add_component
+def fcn_hrnet_w64(*args, **kwargs):
+    return FCN(backbone='HRNet_W64', in_channels=960, **kwargs)
--- a/dygraph/models/model_utils.py
+++ b/dygraph/models/model_utils.py
+# -*- encoding: utf-8 -*-
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle
+import paddle.nn.functional as F
+from paddle import fluid
+from paddle.fluid import dygraph
+from paddle.fluid.dygraph import Conv2D
+from paddle.nn import SyncBatchNorm as BatchNorm
+from dygraph.models.architectures import layer_utils
+class FCNHead(fluid.dygraph.Layer):
+    """
+    The FCNHead implementation used in auxilary layer
+    Args:
+        in_channels (int): the number of input channels
+        out_channels (int): the number of output channels
+    """
+    def __init__(self, in_channels, out_channels):
+        super(FCNHead, self).__init__()
+        inter_channels = in_channels // 4
+        self.conv_bn_relu = layer_utils.ConvBnRelu(num_channels=in_channels,
+                                                   num_filters=inter_channels,
+                                                   filter_size=3,
+                                                   padding=1)
+        self.conv = Conv2D(num_channels=inter_channels,
+                           num_filters=out_channels,
+                           filter_size=1)
+    def forward(self, x):
+        x = self.conv_bn_relu(x)
+        x = F.dropout(x, p=0.1)
+        x = self.conv(x)
+        return x
+def get_loss(logit, label, ignore_index=255, EPS=1e-5):
+    """
+    compute forward loss of the model
+    Args:
+        logit (tensor): the logit of model output
+        label (tensor): ground truth
+    Returns:
+        avg_loss (tensor): forward loss
+    """
+    logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
+    label = fluid.layers.transpose(label, [0, 2, 3, 1])
+    mask = label != ignore_index
+    mask = fluid.layers.cast(mask, 'float32')
+    loss, probs = fluid.layers.softmax_with_cross_entropy(
+        logit,
+        label,
+        ignore_index=ignore_index,
+        return_softmax=True,
+        axis=-1)
+    loss = loss * mask
+    avg_loss = paddle.mean(loss) / (paddle.mean(mask) + EPS)
+    label.stop_gradient = True
+    mask.stop_gradient = True
+    return avg_loss
+def get_pred_score_map(logit):
+    """
+    Get prediction and score map output in inference phase.
+    Args:
+        logit (tensor): output logit of network
+    Returns:
+        pred (tensor): predition map
+        score_map (tensor): score map
+    """
+    score_map = F.softmax(logit, axis=1)
+    score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1])
+    pred = fluid.layers.argmax(score_map, axis=3)
+    pred = fluid.layers.unsqueeze(pred, axes=[3])
+    return pred, score_map
\ No newline at end of file
--- a/dygraph/models/pspnet.py
+++ b/dygraph/models/pspnet.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import paddle.nn.functional as F
+from paddle import fluid
+from paddle.fluid.dygraph import Conv2D
+from dygraph.cvlibs import manager
+from dygraph.models import model_utils
+from dygraph.models.architectures import layer_utils
+from dygraph.utils import utils
+class PSPNet(fluid.dygraph.Layer):
+    """
+    The PSPNet implementation
+    The orginal artile refers to 
+        Zhao, Hengshuang, et al. "Pyramid scene parsing network." 
+        Proceedings of the IEEE conference on computer vision and pattern recognition. 2017.
+        (https://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf)
+    Args:
+        backbone (str): backbone name, currently support Resnet50/101.
+        num_classes (int): the unique number of target classes. Default 2.
+        output_stride (int): the ratio of input size and final feature size. Default 16.
+        backbone_indices (tuple): two values in the tuple indicte the indices of output of backbone.
+                        the first index will be taken as a deep-supervision feature in auxiliary layer;
+                        the second one will be taken as input of Pyramid Pooling Module (PPModule).
+                        Usually backbone consists of four downsampling stage, and return an output of
+                        each stage, so we set default (2, 3), which means taking feature map of the third
+                        stage (res4b22) in backbone, and feature map of the fourth stage (res5c) as input of PPModule.
+        backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index.
+        pp_out_channels (int): output channels after Pyramid Pooling Module. Default to 1024.
+        bin_sizes (tuple): the out size of pooled feature maps. Default to (1,2,3,6).
+        enable_auxiliary_loss (bool): a bool values indictes whether adding auxiliary loss. Default to True.
+        ignore_index (int): the value of ground-truth mask would be ignored while doing evaluation. Default to 255.
+        pretrained_model (str): the pretrained_model path of backbone.
+    """
+    def __init__(self,
+                 backbone,
+                 num_classes=2,
+                 output_stride=16,
+                 backbone_indices=(2, 3),
+                 backbone_channels=(1024, 2048),
+                 pp_out_channels=1024,
+                 bin_sizes=(1, 2, 3, 6),
+                 enable_auxiliary_loss=True,
+                 ignore_index=255,
+                 pretrained_model=None):
+        super(PSPNet, self).__init__()
+        self.backbone = manager.BACKBONES[backbone](output_stride=output_stride,
+                                                    multi_grid=(1, 1, 1))
+        self.backbone_indices = backbone_indices
+        self.psp_module = PPModule(in_channels=backbone_channels[1],
+                                   out_channels=pp_out_channels,
+                                   bin_sizes=bin_sizes)
+        self.conv = Conv2D(num_channels=pp_out_channels,
+                           num_filters=num_classes,
+                           filter_size=1)
+        if enable_auxiliary_loss:
+            self.fcn_head = model_utils.FCNHead(in_channels=backbone_channels[0], out_channels=num_classes)
+        self.enable_auxiliary_loss = enable_auxiliary_loss
+        self.ignore_index = ignore_index
+        self.init_weight(pretrained_model)
+    def forward(self, input, label=None):
+        _, feat_list = self.backbone(input)
+        x = feat_list[self.backbone_indices[1]]
+        x = self.psp_module(x)
+        x = F.dropout(x, dropout_prob=0.1)
+        logit = self.conv(x)
+        logit = fluid.layers.resize_bilinear(logit, input.shape[2:])
+        if self.enable_auxiliary_loss:
+            auxiliary_feat = feat_list[self.backbone_indices[0]]
+            auxiliary_logit = self.fcn_head(auxiliary_feat)
+            auxiliary_logit = fluid.layers.resize_bilinear(auxiliary_logit, input.shape[2:])
+        if self.training:
+            loss = model_utils.get_loss(logit, label)
+            if self.enable_auxiliary_loss:
+                auxiliary_loss = model_utils.get_loss(auxiliary_logit, label)
+                loss += (0.4 * auxiliary_loss)
+            return loss
+        else:
+            pred, score_map = model_utils.get_pred_score_map(logit)
+            return pred, score_map
+    def init_weight(self, pretrained_model=None):
+        """
+        Initialize the parameters of model parts.
+        Args:
+            pretrained_model ([str], optional): the pretrained_model path of backbone. Defaults to None.
+        """
+        if pretrained_model is not None:
+            if os.path.exists(pretrained_model):
+                utils.load_pretrained_model(self.backbone, pretrained_model)
+class PPModule(fluid.dygraph.Layer):
+    """
+    Pyramid pooling module
+    Args:
+        in_channels (int): the number of intput channels to pyramid pooling module.
+        out_channels (int): the number of output channels after pyramid pooling module.
+        bin_sizes (tuple): the out size of pooled feature maps. Default to (1,2,3,6).
+    """
+    def __init__(self, in_channels, out_channels, bin_sizes=(1, 2, 3, 6)):
+        super(PPModule, self).__init__()
+        self.bin_sizes = bin_sizes
+        # we use dimension reduction after pooling mentioned in original implementation.
+        self.stages = fluid.dygraph.LayerList([self._make_stage(in_channels, size) for size in bin_sizes])
+        self.conv_bn_relu2 = layer_utils.ConvBnRelu(num_channels=in_channels * 2,
+                                                    num_filters=out_channels,
+                                                    filter_size=3,
+                                                    padding=1)
+    def _make_stage(self, in_channels, size):
+        """
+        Create one pooling layer.
+        In our implementation, we adopt the same dimention reduction as the original paper that might be
+        slightly different with other implementations. 
+        After pooling, the channels are reduced to 1/len(bin_sizes) immediately, while some other implementations
+        keep the channels to be same.
+        Args:
+            in_channels (int): the number of intput channels to pyramid pooling module.
+            size (int): the out size of the pooled layer.
+        Returns:
+            conv (tensor): a tensor after Pyramid Pooling Module
+        """
+        # this paddle version does not support AdaptiveAvgPool2d, so skip it here.
+        # prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
+        conv = layer_utils.ConvBnRelu(num_channels=in_channels,
+                                      num_filters=in_channels // len(self.bin_sizes),
+                                      filter_size=1)
+        return conv
+    def forward(self, input):
+        cat_layers = []
+        for i, stage in enumerate(self.stages):
+            size = self.bin_sizes[i]
+            x = fluid.layers.adaptive_pool2d(input, pool_size=(size, size), pool_type="max")
+            x = stage(x)
+            x = fluid.layers.resize_bilinear(x, out_shape=input.shape[2:])
+            cat_layers.append(x)
+        cat_layers = [input] + cat_layers[::-1]
+        cat = fluid.layers.concat(cat_layers, axis=1)
+        out = self.conv_bn_relu2(cat)
+        return out
+@manager.MODELS.add_component
+def pspnet_resnet101_vd(*args, **kwargs):
+    pretrained_model = None
+    return PSPNet(backbone='ResNet101_vd', pretrained_model=pretrained_model, **kwargs)
+@manager.MODELS.add_component
+def pspnet_resnet101_vd_os8(*args, **kwargs):
+    pretrained_model = None
+    return PSPNet(backbone='ResNet101_vd', output_stride=8, pretrained_model=pretrained_model, **kwargs)
+@manager.MODELS.add_component
+def pspnet_resnet50_vd(*args, **kwargs):
+    pretrained_model = None
+    return PSPNet(backbone='ResNet50_vd', pretrained_model=pretrained_model, **kwargs)
+@manager.MODELS.add_component
+def pspnet_resnet50_vd_os8(*args, **kwargs):
+    pretrained_model = None
+    return PSPNet(backbone='ResNet50_vd', output_stride=8, pretrained_model=pretrained_model, **kwargs)
--- a/dygraph/models/unet.py
+++ b/dygraph/models/unet.py
@@ -12,24 +12,42 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 import paddle.fluid as fluid
-from paddle.fluid.dygraph import Conv2D, BatchNorm, Pool2D
+from paddle.fluid.dygraph import Conv2D, Pool2D
+from paddle.nn import SyncBatchNorm as BatchNorm
+from dygraph.cvlibs import manager
+from dygraph import utils
 class UNet(fluid.dygraph.Layer):
-    def __init__(self, num_classes, ignore_index=255):
+    """
-        super().__init__()
+    U-Net: Convolutional Networks for Biomedical Image Segmentation.
+    https://arxiv.org/abs/1505.04597
+    Args:
+        num_classes (int): the unique number of target classes.
+        pretrained_model (str): the path of pretrained model.
+        ignore_index (int): the value of ground-truth mask would be ignored while computing loss or doing evaluation. Default 255.
+    """
+    def __init__(self, num_classes, pretrained_model=None, ignore_index=255):
+        super(UNet, self).__init__()
        self.encode = UnetEncoder()
        self.decode = UnetDecode()
        self.get_logit = GetLogit(64, num_classes)
        self.ignore_index = ignore_index
        self.EPS = 1e-5
-    def forward(self, x, label=None, mode='train'):
+        self.init_weight(pretrained_model)
+    def forward(self, x, label=None):
        encode_data, short_cuts = self.encode(x)
        decode_data = self.decode(encode_data, short_cuts)
        logit = self.get_logit(decode_data)
-        if mode == 'train':
+        if self.training:
            return self._get_loss(logit, label)
        else:
            score_map = fluid.layers.softmax(logit, axis=1)
@@ -38,7 +56,23 @@ class UNet(fluid.dygraph.Layer):
            pred = fluid.layers.unsqueeze(pred, axes=[3])
            return pred, score_map
+    def init_weight(self, pretrained_model=None):
+        """
+        Initialize the parameters of model parts.
+        Args:
+            pretrained_model ([str], optional): the pretrained_model path of backbone. Defaults to None.
+        """
+        if pretrained_model is not None:
+            if os.path.exists(pretrained_model):
+                utils.load_pretrained_model(self.backbone, pretrained_model)
+                utils.load_pretrained_model(self, pretrained_model)
+            else:
+                raise Exception('Pretrained model is not found: {}'.format(
+                    pretrained_model))
    def _get_loss(self, logit, label):
+        logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
+        label = fluid.layers.transpose(label, [0, 2, 3, 1])
        mask = label != self.ignore_index
        mask = fluid.layers.cast(mask, 'float32')
        loss, probs = fluid.layers.softmax_with_cross_entropy(
@@ -46,7 +80,7 @@ class UNet(fluid.dygraph.Layer):
            label,
            ignore_index=self.ignore_index,
            return_softmax=True,
-            axis=1)
+            axis=-1)
        loss = loss * mask
        avg_loss = fluid.layers.mean(loss) / (
@@ -59,7 +93,7 @@ class UNet(fluid.dygraph.Layer):
 class UnetEncoder(fluid.dygraph.Layer):
    def __init__(self):
-        super().__init__()
+        super(UnetEncoder, self).__init__()
        self.double_conv = DoubleConv(3, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
@@ -82,7 +116,7 @@ class UnetEncoder(fluid.dygraph.Layer):
 class UnetDecode(fluid.dygraph.Layer):
    def __init__(self):
-        super().__init__()
+        super(UnetDecode, self).__init__()
        self.up1 = Up(512, 256)
        self.up2 = Up(256, 128)
        self.up3 = Up(128, 64)
@@ -98,21 +132,21 @@ class UnetDecode(fluid.dygraph.Layer):
 class DoubleConv(fluid.dygraph.Layer):
    def __init__(self, num_channels, num_filters):
-        super().__init__()
+        super(DoubleConv, self).__init__()
        self.conv0 = Conv2D(
            num_channels=num_channels,
            num_filters=num_filters,
            filter_size=3,
            stride=1,
            padding=1)
-        self.bn0 = BatchNorm(num_channels=num_filters)
+        self.bn0 = BatchNorm(num_filters)
        self.conv1 = Conv2D(
            num_channels=num_filters,
            num_filters=num_filters,
            filter_size=3,
            stride=1,
            padding=1)
-        self.bn1 = BatchNorm(num_channels=num_filters)
+        self.bn1 = BatchNorm(num_filters)
    def forward(self, x):
        x = self.conv0(x)
@@ -126,7 +160,7 @@ class DoubleConv(fluid.dygraph.Layer):
 class Down(fluid.dygraph.Layer):
    def __init__(self, num_channels, num_filters):
-        super().__init__()
+        super(Down, self).__init__()
        self.max_pool = Pool2D(
            pool_size=2, pool_type='max', pool_stride=2, pool_padding=0)
        self.double_conv = DoubleConv(num_channels, num_filters)
@@ -139,7 +173,7 @@ class Down(fluid.dygraph.Layer):
 class Up(fluid.dygraph.Layer):
    def __init__(self, num_channels, num_filters):
-        super().__init__()
+        super(Up, self).__init__()
        self.double_conv = DoubleConv(2 * num_channels, num_filters)
    def forward(self, x, short_cut):
@@ -152,7 +186,7 @@ class Up(fluid.dygraph.Layer):
 class GetLogit(fluid.dygraph.Layer):
    def __init__(self, num_channels, num_classes):
-        super().__init__()
+        super(GetLogit, self).__init__()
        self.conv = Conv2D(
            num_channels=num_channels,
            num_filters=num_classes,
@@ -163,3 +197,8 @@ class GetLogit(fluid.dygraph.Layer):
    def forward(self, x):
        x = self.conv(x)
        return x
+@manager.MODELS.add_component
+def unet(*args, **kwargs):
+    return UNet(*args, **kwargs)
--- a/dygraph/tools/conver_cityscapes.py
+++ b/dygraph/tools/conver_cityscapes.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+File: convert_cityscapes.py
+This file is based on https://github.com/mcordts/cityscapesScripts to generate **labelTrainIds.png for training.
+Before running, you should download the cityscapes form https://www.cityscapes-dataset.com/ and make the folder
+structure as follow:
+cityscapes
+|
+|--leftImg8bit
+|  |--train
+|  |--val
+|  |--test
+|
+|--gtFine
+|  |--train
+|  |--val
+|  |--test
+"""
+import os
+import argparse
+from multiprocessing import Pool, cpu_count
+import glob
+from cityscapesscripts.preparation.json2labelImg import json2labelImg
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Generate **labelTrainIds.png for training')
+    parser.add_argument(
+        '--cityscapes_path',
+        dest='cityscapes_path',
+        help='cityscapes path',
+        type=str)
+    parser.add_argument(
+        '--num_workers',
+        dest='num_workers',
+        help='How many processes are used for data conversion',
+        type=int,
+        default=cpu_count())
+    return parser.parse_args()
+def gen_labelTrainIds(json_file):
+    label_file = json_file.replace("_polygons.json", "_labelTrainIds.png")
+    json2labelImg(json_file, label_file, "trainIds")
+def main():
+    args = parse_args()
+    fine_path = os.path.join(args.cityscapes_path, 'gtFine')
+    json_files = glob.glob(os.path.join(fine_path, '*', '*', '*_polygons.json'))
+    print('generating **_labelTrainIds.png')
+    p = Pool(args.num_workers)
+    for f in json_files:
+        p.apply_async(gen_labelTrainIds, args=(f, ))
+    p.close()
+    p.join()
+if __name__ == '__main__':
+    main()
--- a/dygraph/tools/voc_augment.py
+++ b/dygraph/tools/voc_augment.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+File: voc_augment.py
+This file use SBD(Semantic Boundaries Dataset) <http://home.bharathh.info/pubs/codes/SBD/download.html>
+to augment the Pascal VOC.
+"""
+import os
+import argparse
+from multiprocessing import Pool, cpu_count
+import cv2
+import numpy as np
+from scipy.io import loadmat
+import tqdm
+from dygraph.utils.download import download_file_and_uncompress
+DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
+URL = 'http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz'
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description=
+        'Convert SBD to Pascal Voc annotations to augment the train dataset of Pascal Voc'
+    )
+    parser.add_argument(
+        '--voc_path',
+        dest='voc_path',
+        help='pascal voc path',
+        type=str,
+        default=os.path.join(DATA_HOME, 'VOCdevkit'))
+    parser.add_argument(
+        '--num_workers',
+        dest='num_workers',
+        help='How many processes are used for data conversion',
+        type=int,
+        default=cpu_count())
+    return parser.parse_args()
+def mat_to_png(mat_file, sbd_cls_dir, save_dir):
+    mat_path = os.path.join(sbd_cls_dir, mat_file)
+    mat = loadmat(mat_path)
+    mask = mat['GTcls'][0]['Segmentation'][0].astype(np.uint8)
+    save_file = os.path.join(save_dir, mat_file.replace('mat', 'png'))
+    cv2.imwrite(save_file, mask)
+def main():
+    args = parse_args()
+    sbd_path = download_file_and_uncompress(
+        url=URL,
+        savepath=DATA_HOME,
+        extrapath=DATA_HOME,
+        extraname='benchmark_RELEASE')
+    with open(os.path.join(sbd_path, 'dataset/train.txt'), 'r') as f:
+        sbd_file_list = [line.strip() for line in f]
+    with open(os.path.join(sbd_path, 'dataset/val.txt'), 'r') as f:
+        sbd_file_list += [line.strip() for line in f]
+    if not os.path.exists(args.voc_path):
+        raise Exception(
+            'There is no voc_path: {}. Please ensure that the Pascal VOC dataset has been downloaded correctly'
+        )
+    with open(
+            os.path.join(args.voc_path,
+                         'VOC2012/ImageSets/Segmentation/trainval.txt'),
+            'r') as f:
+        voc_file_list = [line.strip() for line in f]
+    aug_file_list = list(set(sbd_file_list) - set(voc_file_list))
+    with open(
+            os.path.join(args.voc_path,
+                         'VOC2012/ImageSets/Segmentation/aug.txt'), 'w') as f:
+        f.writelines(''.join([line, '\n']) for line in aug_file_list)
+    sbd_cls_dir = os.path.join(sbd_path, 'dataset/cls')
+    save_dir = os.path.join(args.voc_path, 'VOC2012/SegmentationClassAug')
+    if not os.path.exists(save_dir):
+        os.mkdir(save_dir)
+    mat_file_list = os.listdir(sbd_cls_dir)
+    p = Pool(args.num_workers)
+    for f in tqdm.tqdm(mat_file_list):
+        p.apply_async(mat_to_png, args=(f, sbd_cls_dir, save_dir))
+    p.close()
+    p.join()
+if __name__ == '__main__':
+    main()
--- a/dygraph/train.py
+++ b/dygraph/train.py
@@ -13,22 +13,16 @@
 # limitations under the License.
 import argparse
-import os
 import paddle.fluid as fluid
 from paddle.fluid.dygraph.parallel import ParallelEnv
-from paddle.fluid.io import DataLoader
-from paddle.incubate.hapi.distributed import DistributedBatchSampler
-from datasets import OpticDiscSeg, Cityscapes
+from dygraph.datasets import DATASETS
-import transforms as T
+import dygraph.transforms as T
-import models
+from dygraph.cvlibs import manager
-import utils.logging as logging
+from dygraph.utils import get_environ_info
-from utils import get_environ_info
+from dygraph.utils import logger
-from utils import load_pretrained_model
+from dygraph.core import train
-from utils import resume
-from utils import Timer, calculate_eta
-from val import evaluate
 def parse_args():
@@ -38,7 +32,8 @@ def parse_args():
    parser.add_argument(
        '--model_name',
        dest='model_name',
-        help="Model type for traing, which is one of ('UNet')",
+        help='Model type for training, which is one of {}'.format(
+            str(list(manager.MODELS.components_dict.keys()))),
        type=str,
        default='UNet')
@@ -46,10 +41,16 @@ def parse_args():
    parser.add_argument(
        '--dataset',
        dest='dataset',
-        help=
+        help="The dataset you want to train, which is one of {}".format(
-        "The dataset you want to train, which is one of ('OpticDiscSeg', 'Cityscapes')",
+            str(list(DATASETS.keys()))),
        type=str,
        default='OpticDiscSeg')
+    parser.add_argument(
+        '--dataset_root',
+        dest='dataset_root',
+        help="dataset root directory",
+        type=str,
+        default=None)
    # params of training
    parser.add_argument(
@@ -60,11 +61,11 @@ def parse_args():
        default=[512, 512],
        type=int)
    parser.add_argument(
-        '--num_epochs',
+        '--iters',
-        dest='num_epochs',
+        dest='iters',
-        help='Number epochs for training',
+        help='iters for training',
        type=int,
-        default=100)
+        default=10000)
    parser.add_argument(
        '--batch_size',
        dest='batch_size',
@@ -90,9 +91,9 @@ def parse_args():
        type=str,
        default=None)
    parser.add_argument(
-        '--save_interval_epochs',
+        '--save_interval_iters',
-        dest='save_interval_epochs',
+        dest='save_interval_iters',
-        help='The interval epochs for save a model snapshot',
+        help='The interval iters for save a model snapshot',
        type=int,
        default=5)
    parser.add_argument(
@@ -113,9 +114,9 @@ def parse_args():
        help='Eval while training',
        action='store_true')
    parser.add_argument(
-        '--log_steps',
+        '--log_iters',
-        dest='log_steps',
+        dest='log_iters',
-        help='Display logging information at every log_steps',
+        help='Display logging information at every log_iters',
        default=10,
        type=int)
    parser.add_argument(
@@ -127,148 +128,21 @@ def parse_args():
    return parser.parse_args()
-def train(model,
-          train_dataset,
-          places=None,
-          eval_dataset=None,
-          optimizer=None,
-          save_dir='output',
-          num_epochs=100,
-          batch_size=2,
-          pretrained_model=None,
-          resume_model=None,
-          save_interval_epochs=1,
-          log_steps=10,
-          num_classes=None,
-          num_workers=8,
-          use_vdl=False):
-    ignore_index = model.ignore_index
-    nranks = ParallelEnv().nranks
-    start_epoch = 0
-    if resume_model is not None:
-        start_epoch = resume(model, optimizer, resume_model)
-    elif pretrained_model is not None:
-        load_pretrained_model(model, pretrained_model)
-    if not os.path.isdir(save_dir):
-        if os.path.exists(save_dir):
-            os.remove(save_dir)
-        os.makedirs(save_dir)
-    if nranks > 1:
-        strategy = fluid.dygraph.prepare_context()
-        model_parallel = fluid.dygraph.DataParallel(model, strategy)
-    batch_sampler = DistributedBatchSampler(
-        train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
-    loader = DataLoader(
-        train_dataset,
-        batch_sampler=batch_sampler,
-        places=places,
-        num_workers=num_workers,
-        return_list=True,
-    )
-    if use_vdl:
-        from visualdl import LogWriter
-        log_writer = LogWriter(save_dir)
-    timer = Timer()
-    timer.start()
-    avg_loss = 0.0
-    steps_per_epoch = len(batch_sampler)
-    total_steps = steps_per_epoch * (num_epochs - start_epoch)
-    num_steps = 0
-    best_mean_iou = -1.0
-    best_model_epoch = 1
-    for epoch in range(start_epoch, num_epochs):
-        for step, data in enumerate(loader):
-            images = data[0]
-            labels = data[1].astype('int64')
-            if nranks > 1:
-                loss = model_parallel(images, labels, mode='train')
-                loss = model_parallel.scale_loss(loss)
-                loss.backward()
-                model_parallel.apply_collective_grads()
-            else:
-                loss = model(images, labels, mode='train')
-                loss.backward()
-            optimizer.minimize(loss)
-            model.clear_gradients()
-            avg_loss += loss.numpy()[0]
-            lr = optimizer.current_step_lr()
-            num_steps += 1
-            if num_steps % log_steps == 0 and ParallelEnv().local_rank == 0:
-                avg_loss /= log_steps
-                time_step = timer.elapsed_time() / log_steps
-                remain_steps = total_steps - num_steps
-                logging.info(
-                    "[TRAIN] Epoch={}/{}, Step={}/{}, loss={:.4f}, lr={:.6f}, sec/step={:.4f} | ETA {}"
-                    .format(epoch + 1, num_epochs, step + 1, steps_per_epoch,
-                            avg_loss, lr, time_step,
-                            calculate_eta(remain_steps, time_step)))
-                if use_vdl:
-                    log_writer.add_scalar('Train/loss', avg_loss, num_steps)
-                    log_writer.add_scalar('Train/lr', lr, num_steps)
-                avg_loss = 0.0
-                timer.restart()
-        if ((epoch + 1) % save_interval_epochs == 0
-                or epoch + 1 == num_epochs) and ParallelEnv().local_rank == 0:
-            current_save_dir = os.path.join(save_dir,
-                                            "epoch_{}".format(epoch + 1))
-            if not os.path.isdir(current_save_dir):
-                os.makedirs(current_save_dir)
-            fluid.save_dygraph(model.state_dict(),
-                               os.path.join(current_save_dir, 'model'))
-            fluid.save_dygraph(optimizer.state_dict(),
-                               os.path.join(current_save_dir, 'model'))
-            if eval_dataset is not None:
-                mean_iou, mean_acc = evaluate(
-                    model,
-                    eval_dataset,
-                    places=places,
-                    model_dir=current_save_dir,
-                    num_classes=num_classes,
-                    batch_size=batch_size,
-                    ignore_index=ignore_index,
-                    epoch_id=epoch + 1)
-                if mean_iou > best_mean_iou:
-                    best_mean_iou = mean_iou
-                    best_model_epoch = epoch + 1
-                    best_model_dir = os.path.join(save_dir, "best_model")
-                    fluid.save_dygraph(model.state_dict(),
-                                       os.path.join(best_model_dir, 'model'))
-                    logging.info(
-                        'Current evaluated best model in eval_dataset is epoch_{}, miou={:4f}'
-                        .format(best_model_epoch, best_mean_iou))
-                if use_vdl:
-                    log_writer.add_scalar('Evaluate/mean_iou', mean_iou,
-                                          epoch + 1)
-                    log_writer.add_scalar('Evaluate/mean_acc', mean_acc,
-                                          epoch + 1)
-                model.train()
-    if use_vdl:
-        log_writer.close()
 def main(args):
    env_info = get_environ_info()
+    info = ['{}: {}'.format(k, v) for k, v in env_info.items()]
+    info = '\n'.join(['\n', format('Environment Information', '-^48s')] + info +
+                     ['-' * 48])
+    logger.info(info)
    places = fluid.CUDAPlace(ParallelEnv().dev_id) \
-        if env_info['place'] == 'cuda' and fluid.is_compiled_with_cuda() \
+        if env_info['Paddle compiled with cuda'] and env_info['GPUs used'] \
        else fluid.CPUPlace()
-    if args.dataset.lower() == 'opticdiscseg':
+    if args.dataset not in DATASETS:
-        dataset = OpticDiscSeg
+        raise Exception('`--dataset` is invalid. it should be one of {}'.format(
-    elif args.dataset.lower() == 'cityscapes':
+            str(list(DATASETS.keys()))))
-        dataset = Cityscapes
+    dataset = DATASETS[args.dataset]
-    else:
-        raise Exception(
-            "The --dataset set wrong. It should be one of ('OpticDiscSeg', 'Cityscapes')"
-        )
    with fluid.dygraph.guard(places):
        # Creat dataset reader
@@ -277,26 +151,31 @@ def main(args):
            T.RandomHorizontalFlip(),
            T.Normalize()
        ])
-        train_dataset = dataset(transforms=train_transforms, mode='train')
+        train_dataset = dataset(
+            dataset_root=args.dataset_root,
+            transforms=train_transforms,
+            mode='train')
        eval_dataset = None
        if args.do_eval:
            eval_transforms = T.Compose(
                [T.Resize(args.input_size),
                 T.Normalize()])
-            eval_dataset = dataset(transforms=eval_transforms, mode='eval')
+            eval_dataset = dataset(
+                dataset_root=args.dataset_root,
+                transforms=eval_transforms,
+                mode='val')
-        if args.model_name == 'UNet':
+        model = manager.MODELS[args.model_name](
-            model = models.UNet(
+            num_classes=train_dataset.num_classes,
-                num_classes=train_dataset.num_classes, ignore_index=255)
+            pretrained_model=args.pretrained_model)
        # Creat optimizer
        # todo, may less one than len(loader)
-        num_steps_each_epoch = len(train_dataset) // (
+        num_iters_each_epoch = len(train_dataset) // (
            args.batch_size * ParallelEnv().nranks)
-        decay_step = args.num_epochs * num_steps_each_epoch
        lr_decay = fluid.layers.polynomial_decay(
-            args.learning_rate, decay_step, end_learning_rate=0, power=0.9)
+            args.learning_rate, args.iters, end_learning_rate=0, power=0.9)
        optimizer = fluid.optimizer.Momentum(
            lr_decay,
            momentum=0.9,
@@ -310,12 +189,11 @@ def main(args):
            eval_dataset=eval_dataset,
            optimizer=optimizer,
            save_dir=args.save_dir,
-            num_epochs=args.num_epochs,
+            iters=args.iters,
            batch_size=args.batch_size,
-            pretrained_model=args.pretrained_model,
            resume_model=args.resume_model,
-            save_interval_epochs=args.save_interval_epochs,
+            save_interval_iters=args.save_interval_iters,
-            log_steps=args.log_steps,
+            log_iters=args.log_iters,
            num_classes=train_dataset.num_classes,
            num_workers=args.num_workers,
            use_vdl=args.use_vdl)

--- a/dygraph/transforms/transforms.py
+++ b/dygraph/transforms/transforms.py
+# coding: utf8
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -12,27 +13,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .functional import *
 import random
+from collections import OrderedDict
 import numpy as np
 from PIL import Image
 import cv2
-from collections import OrderedDict
+from .functional import *
-class Compose:
-    """根据数据预处理/增强算子对输入数据进行操作。
-       所有操作的输入图像流形状均是[H, W, C]，其中H为图像高，W为图像宽，C为图像通道数。
-    Args:
-        transforms (list): 数据预处理/增强算子。
-        to_rgb (bool): 是否转化为rgb通道格式
-    Raises:
-        TypeError: transforms不是list对象
-        ValueError: transforms元素个数小于1。
-    """
+class Compose:
    def __init__(self, transforms, to_rgb=True):
        if not isinstance(transforms, list):
            raise TypeError('The transforms must be a list!')
@@ -43,20 +34,8 @@ class Compose:
        self.to_rgb = to_rgb
    def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (str/np.ndarray): 图像路径/图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息，dict中的字段如下：
-                - shape_before_resize (tuple): 图像resize之前的大小（h, w）。
-                - shape_before_padding (tuple): 图像padding之前的大小（h, w）。
-            label (str/np.ndarray): 标注图像路径/标注图像np.ndarray数据。
-        Returns:
-            tuple: 根据网络所需字段所组成的tuple；字段由transforms中的最后一个数据预处理操作决定。
-        """
        if im_info is None:
-            im_info = dict()
+            im_info = list()
        if isinstance(im, str):
            im = cv2.imread(im).astype('float32')
        if isinstance(label, str):
@@ -80,27 +59,10 @@ class Compose:
 class RandomHorizontalFlip:
-    """以一定的概率对图像进行水平翻转。当存在标注图像时，则同步进行翻转。
-    Args:
-        prob (float): 随机水平翻转的概率。默认值为0.5。
-    """
    def __init__(self, prob=0.5):
        self.prob = prob
    def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-        """
        if random.random() < self.prob:
            im = horizontal_flip(im)
            if label is not None:
@@ -112,26 +74,10 @@ class RandomHorizontalFlip:
 class RandomVerticalFlip:
-    """以一定的概率对图像进行垂直翻转。当存在标注图像时，则同步进行翻转。
-    Args:
-        prob (float): 随机垂直翻转的概率。默认值为0.1。
-    """
    def __init__(self, prob=0.1):
        self.prob = prob
    def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-        """
        if random.random() < self.prob:
            im = vertical_flip(im)
            if label is not None:
@@ -143,25 +89,6 @@ class RandomVerticalFlip:
 class Resize:
-    """调整图像大小（resize）。
-    - 当目标大小（target_size）类型为int时，根据插值方式，
-      将图像resize为[target_size, target_size]。
-    - 当目标大小（target_size）类型为list或tuple时，根据插值方式，
-      将图像resize为target_size。
-    注意：当插值方式为“RANDOM”时，则随机选取一种插值方式进行resize。
-    Args:
-        target_size (int/list/tuple): 短边目标长度。默认为608。
-        interp (str): resize的插值方式，与opencv的插值方式对应，取值范围为
-            ['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM']。默认为"LINEAR"。
-    Raises:
-        TypeError: 形参数据类型不满足需求。
-        ValueError: 插值方式不在['NEAREST', 'LINEAR', 'CUBIC',
-                    'AREA', 'LANCZOS4', 'RANDOM']中。
-    """
    # The interpolation mode
    interp_dict = {
        'NEAREST': cv2.INTER_NEAREST,
@@ -189,26 +116,9 @@ class Resize:
        self.target_size = target_size
    def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict, 可选): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-                其中，im_info跟新字段为：
-                    -shape_before_resize (tuple): 保存resize之前图像的形状(h, w）。
-        Raises:
-            TypeError: 形参数据类型不满足需求。
-            ValueError: 数据长度不匹配。
-        """
        if im_info is None:
-            im_info = OrderedDict()
+            im_info = list()
-        im_info['shape_before_resize'] = im.shape[:2]
+        im_info.append(('resize', im.shape[:2]))
        if not isinstance(im, np.ndarray):
            raise TypeError("Resize: image type is not numpy.")
        if len(im.shape) != 3:
@@ -228,32 +138,14 @@ class Resize:
 class ResizeByLong:
-    """对图像长边resize到固定值，短边按比例进行缩放。当存在标注图像时，则同步进行处理。
-    Args:
-        long_size (int): resize后图像的长边大小。
-    """
    def __init__(self, long_size):
        self.long_size = long_size
    def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-                其中，im_info新增字段为：
-                    -shape_before_resize (tuple): 保存resize之前图像的形状(h, w）。
-        """
        if im_info is None:
-            im_info = OrderedDict()
+            im_info = list()
-        im_info['shape_before_resize'] = im.shape[:2]
+        im_info.append(('resize', im.shape[:2]))
        im = resize_long(im, self.long_size)
        if label is not None:
            label = resize_long(label, self.long_size, cv2.INTER_NEAREST)
@@ -265,15 +157,6 @@ class ResizeByLong:
 class ResizeRangeScaling:
-    """对图像长边随机resize到指定范围内，短边按比例进行缩放。当存在标注图像时，则同步进行处理。
-    Args:
-        min_value (int): 图像长边resize后的最小值。默认值400。
-        max_value (int): 图像长边resize后的最大值。默认值600。
-    Raises:
-        ValueError: min_value大于max_value
-    """
    def __init__(self, min_value=400, max_value=600):
        if min_value > max_value:
            raise ValueError('min_value must be less than max_value, '
@@ -283,17 +166,6 @@ class ResizeRangeScaling:
        self.max_value = max_value
    def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-        """
        if self.min_value == self.max_value:
            random_size = self.max_value
        else:
@@ -310,17 +182,6 @@ class ResizeRangeScaling:
 class ResizeStepScaling:
-    """对图像按照某一个比例resize，这个比例以scale_step_size为步长
-    在[min_scale_factor, max_scale_factor]随机变动。当存在标注图像时，则同步进行处理。
-    Args:
-        min_scale_factor（float), resize最小尺度。默认值0.75。
-        max_scale_factor (float), resize最大尺度。默认值1.25。
-        scale_step_size (float), resize尺度范围间隔。默认值0.25。
-    Raises:
-        ValueError: min_scale_factor大于max_scale_factor
-    """
    def __init__(self,
                 min_scale_factor=0.75,
                 max_scale_factor=1.25,
@@ -335,17 +196,6 @@ class ResizeStepScaling:
        self.scale_step_size = scale_step_size
    def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-        """
        if self.min_scale_factor == self.max_scale_factor:
            scale_factor = self.min_scale_factor
@@ -375,17 +225,6 @@ class ResizeStepScaling:
 class Normalize:
-    """对图像进行标准化。
-    1.尺度缩放到 [0,1]。
-    2.对图像进行减均值除以标准差操作。
-    Args:
-        mean (list): 图像数据集的均值。默认值[0.5, 0.5, 0.5]。
-        std (list): 图像数据集的标准差。默认值[0.5, 0.5, 0.5]。
-    Raises:
-        ValueError: mean或std不是list对象。std包含0。
-    """
    def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]):
        self.mean = mean
        self.std = std
@@ -396,18 +235,6 @@ class Normalize:
            raise ValueError('{}: std is invalid!'.format(self))
    def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-         Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-        """
        mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
        std = np.array(self.std)[np.newaxis, np.newaxis, :]
        im = normalize(im, mean, std)
@@ -419,18 +246,6 @@ class Normalize:
 class Padding:
-    """对图像或标注图像进行padding，padding方向为右和下。
-    根据提供的值对图像或标注图像进行padding操作。
-    Args:
-        target_size (int|list|tuple): padding后图像的大小。
-        im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
-        label_padding_value (int): 标注图像padding的值。默认值为255。
-    Raises:
-        TypeError: target_size不是int|list|tuple。
-        ValueError:  target_size为list|tuple时元素个数不等于2。
-    """
    def __init__(self,
                 target_size,
                 im_padding_value=[127.5, 127.5, 127.5],
@@ -449,25 +264,9 @@ class Padding:
        self.label_padding_value = label_padding_value
    def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-                其中，im_info新增字段为：
-                    -shape_before_padding (tuple): 保存padding之前图像的形状(h, w）。
-        Raises:
-            ValueError: 输入图像im或label的形状大于目标值
-        """
        if im_info is None:
-            im_info = OrderedDict()
+            im_info = list()
-        im_info['shape_before_padding'] = im.shape[:2]
+        im_info.append(('padding', im.shape[:2]))
        im_height, im_width = im.shape[0], im.shape[1]
        if isinstance(self.target_size, int):
@@ -483,21 +282,23 @@ class Padding:
                'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})'
                .format(im_width, im_height, target_width, target_height))
        else:
-            im = cv2.copyMakeBorder(im,
+            im = cv2.copyMakeBorder(
-                                    0,
+                im,
-                                    pad_height,
+                0,
-                                    0,
+                pad_height,
-                                    pad_width,
+                0,
-                                    cv2.BORDER_CONSTANT,
+                pad_width,
-                                    value=self.im_padding_value)
+                cv2.BORDER_CONSTANT,
+                value=self.im_padding_value)
            if label is not None:
-                label = cv2.copyMakeBorder(label,
+                label = cv2.copyMakeBorder(
-                                           0,
+                    label,
-                                           pad_height,
+                    0,
-                                           0,
+                    pad_height,
-                                           pad_width,
+                    0,
-                                           cv2.BORDER_CONSTANT,
+                    pad_width,
-                                           value=self.label_padding_value)
+                    cv2.BORDER_CONSTANT,
+                    value=self.label_padding_value)
        if label is None:
            return (im, im_info)
        else:
@@ -505,17 +306,6 @@ class Padding:
 class RandomPaddingCrop:
-    """对图像和标注图进行随机裁剪，当所需要的裁剪尺寸大于原图时，则进行padding操作。
-    Args:
-        crop_size (int|list|tuple): 裁剪图像大小。默认为512。
-        im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
-        label_padding_value (int): 标注图像padding的值。默认值为255。
-    Raises:
-        TypeError: crop_size不是int/list/tuple。
-        ValueError:  target_size为list/tuple时元素个数不等于2。
-    """
    def __init__(self,
                 crop_size=512,
                 im_padding_value=[127.5, 127.5, 127.5],
@@ -534,17 +324,6 @@ class RandomPaddingCrop:
        self.label_padding_value = label_padding_value
    def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-         Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-        """
        if isinstance(self.crop_size, int):
            crop_width = self.crop_size
            crop_height = self.crop_size
@@ -564,21 +343,23 @@ class RandomPaddingCrop:
            pad_height = max(crop_height - img_height, 0)
            pad_width = max(crop_width - img_width, 0)
            if (pad_height > 0 or pad_width > 0):
-                im = cv2.copyMakeBorder(im,
+                im = cv2.copyMakeBorder(
-                                        0,
+                    im,
-                                        pad_height,
+                    0,
-                                        0,
+                    pad_height,
-                                        pad_width,
+                    0,
-                                        cv2.BORDER_CONSTANT,
+                    pad_width,
-                                        value=self.im_padding_value)
+                    cv2.BORDER_CONSTANT,
+                    value=self.im_padding_value)
                if label is not None:
-                    label = cv2.copyMakeBorder(label,
+                    label = cv2.copyMakeBorder(
-                                               0,
+                        label,
-                                               pad_height,
+                        0,
-                                               0,
+                        pad_height,
-                                               pad_width,
+                        0,
-                                               cv2.BORDER_CONSTANT,
+                        pad_width,
-                                               value=self.label_padding_value)
+                        cv2.BORDER_CONSTANT,
+                        value=self.label_padding_value)
                img_height = im.shape[0]
                img_width = im.shape[1]
@@ -586,11 +367,11 @@ class RandomPaddingCrop:
                h_off = np.random.randint(img_height - crop_height + 1)
                w_off = np.random.randint(img_width - crop_width + 1)
-                im = im[h_off:(crop_height + h_off), w_off:(w_off +
+                im = im[h_off:(crop_height + h_off), w_off:(
-                                                            crop_width), :]
+                    w_off + crop_width), :]
                if label is not None:
-                    label = label[h_off:(crop_height +
+                    label = label[h_off:(crop_height + h_off), w_off:(
-                                         h_off), w_off:(w_off + crop_width)]
+                        w_off + crop_width)]
        if label is None:
            return (im, im_info)
        else:
@@ -598,26 +379,10 @@ class RandomPaddingCrop:
 class RandomBlur:
-    """以一定的概率对图像进行高斯模糊。
-    Args：
-        prob (float): 图像模糊概率。默认为0.1。
-    """
    def __init__(self, prob=0.1):
        self.prob = prob
    def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-        """
        if self.prob <= 0:
            n = 0
        elif self.prob >= 1:
@@ -640,16 +405,6 @@ class RandomBlur:
 class RandomRotation:
-    """对图像进行随机旋转。
-    在不超过最大旋转角度的情况下，图像进行随机旋转，当存在标注图像时，同步进行，
-    并对旋转后的图像和标注图像进行相应的padding。
-    Args:
-        max_rotation (float): 最大旋转角度。默认为15度。
-        im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
-        label_padding_value (int): 标注图像padding的值。默认为255。
-    """
    def __init__(self,
                 max_rotation=15,
                 im_padding_value=[127.5, 127.5, 127.5],
@@ -659,17 +414,6 @@ class RandomRotation:
        self.label_padding_value = label_padding_value
    def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-        """
        if self.max_rotation > 0:
            (h, w) = im.shape[:2]
            do_rotation = np.random.uniform(-self.max_rotation,
@@ -686,18 +430,20 @@ class RandomRotation:
            r[0, 2] += (nw / 2) - cx
            r[1, 2] += (nh / 2) - cy
            dsize = (nw, nh)
-            im = cv2.warpAffine(im,
+            im = cv2.warpAffine(
-                                r,
+                im,
-                                dsize=dsize,
+                r,
-                                flags=cv2.INTER_LINEAR,
+                dsize=dsize,
-                                borderMode=cv2.BORDER_CONSTANT,
+                flags=cv2.INTER_LINEAR,
-                                borderValue=self.im_padding_value)
+                borderMode=cv2.BORDER_CONSTANT,
-            label = cv2.warpAffine(label,
+                borderValue=self.im_padding_value)
-                                   r,
+            label = cv2.warpAffine(
-                                   dsize=dsize,
+                label,
-                                   flags=cv2.INTER_NEAREST,
+                r,
-                                   borderMode=cv2.BORDER_CONSTANT,
+                dsize=dsize,
-                                   borderValue=self.label_padding_value)
+                flags=cv2.INTER_NEAREST,
+                borderMode=cv2.BORDER_CONSTANT,
+                borderValue=self.label_padding_value)
        if label is None:
            return (im, im_info)
@@ -706,29 +452,11 @@ class RandomRotation:
 class RandomScaleAspect:
-    """裁剪并resize回原始尺寸的图像和标注图像。
-    按照一定的面积比和宽高比对图像进行裁剪，并reszie回原始图像的图像，当存在标注图时，同步进行。
-    Args：
-        min_scale (float)：裁取图像占原始图像的面积比，取值[0，1]，为0时则返回原图。默认为0.5。
-        aspect_ratio (float): 裁取图像的宽高比范围，非负值，为0时返回原图。默认为0.33。
-    """
    def __init__(self, min_scale=0.5, aspect_ratio=0.33):
        self.min_scale = min_scale
        self.aspect_ratio = aspect_ratio
    def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-        """
        if self.min_scale != 0 and self.aspect_ratio != 0:
            img_height = im.shape[0]
            img_width = im.shape[1]
@@ -751,10 +479,12 @@ class RandomScaleAspect:
                    im = im[h1:(h1 + dh), w1:(w1 + dw), :]
                    label = label[h1:(h1 + dh), w1:(w1 + dw)]
-                    im = cv2.resize(im, (img_width, img_height),
+                    im = cv2.resize(
-                                    interpolation=cv2.INTER_LINEAR)
+                        im, (img_width, img_height),
-                    label = cv2.resize(label, (img_width, img_height),
+                        interpolation=cv2.INTER_LINEAR)
-                                       interpolation=cv2.INTER_NEAREST)
+                    label = cv2.resize(
+                        label, (img_width, img_height),
+                        interpolation=cv2.INTER_NEAREST)
                    break
        if label is None:
            return (im, im_info)
@@ -763,21 +493,6 @@ class RandomScaleAspect:
 class RandomDistort:
-    """对图像进行随机失真。
-    1. 对变换的操作顺序进行随机化操作。
-    2. 按照1中的顺序以一定的概率对图像进行随机像素内容变换。
-    Args:
-        brightness_range (float): 明亮度因子的范围。默认为0.5。
-        brightness_prob (float): 随机调整明亮度的概率。默认为0.5。
-        contrast_range (float): 对比度因子的范围。默认为0.5。
-        contrast_prob (float): 随机调整对比度的概率。默认为0.5。
-        saturation_range (float): 饱和度因子的范围。默认为0.5。
-        saturation_prob (float): 随机调整饱和度的概率。默认为0.5。
-        hue_range (int): 色调因子的范围。默认为18。
-        hue_prob (float): 随机调整色调的概率。默认为0.5。
-    """
    def __init__(self,
                 brightness_range=0.5,
                 brightness_prob=0.5,
@@ -797,17 +512,6 @@ class RandomDistort:
        self.hue_prob = hue_prob
    def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-        """
        brightness_lower = 1 - self.brightness_range
        brightness_upper = 1 + self.brightness_range
        contrast_lower = 1 - self.contrast_range

--- a/dygraph/utils/__init__.py
+++ b/dygraph/utils/__init__.py
@@ -12,8 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from . import logging
+from . import logger
 from . import download
 from .metrics import ConfusionMatrix
 from .utils import *
 from .timer import Timer, calculate_eta
+from .get_environ_info import get_environ_info
--- a/dygraph/utils/download.py
+++ b/dygraph/utils/download.py
@@ -85,8 +85,8 @@ def _uncompress_file(filepath, extrapath, delete_file, print_progress):
    for total_num, index, rootpath in handler(filepath, extrapath):
        if print_progress:
            done = int(50 * float(index) / total_num)
-            progress("[%-50s] %.2f%%" %
+            progress(
-                     ('=' * done, float(100 * index) / total_num))
+                "[%-50s] %.2f%%" % ('=' * done, float(100 * index) / total_num))
    if print_progress:
        progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True)
@@ -132,4 +132,4 @@ def download_file_and_uncompress(url,
                                        print_progress)
            savename = os.path.join(extrapath, savename)
        shutil.move(savename, extraname)
-    return savename
+    return extraname
--- a/dygraph/utils/get_environ_info.py
+++ b/dygraph/utils/get_environ_info.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import sys
+from collections import OrderedDict
+import subprocess
+import glob
+import paddle
+import paddle.fluid as fluid
+import cv2
+IS_WINDOWS = sys.platform == 'win32'
+def _find_cuda_home():
+    '''Finds the CUDA install path. It refers to the implementation of
+    pytorch <https://github.com/pytorch/pytorch/blob/master/torch/utils/cpp_extension.py>.
+    '''
+    # Guess #1
+    cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
+    if cuda_home is None:
+        # Guess #2
+        try:
+            which = 'where' if IS_WINDOWS else 'which'
+            nvcc = subprocess.check_output([which,
+                                            'nvcc']).decode().rstrip('\r\n')
+            cuda_home = os.path.dirname(os.path.dirname(nvcc))
+        except Exception:
+            # Guess #3
+            if IS_WINDOWS:
+                cuda_homes = glob.glob(
+                    'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*')
+                if len(cuda_homes) == 0:
+                    cuda_home = ''
+                else:
+                    cuda_home = cuda_homes[0]
+            else:
+                cuda_home = '/usr/local/cuda'
+            if not os.path.exists(cuda_home):
+                cuda_home = None
+    return cuda_home
+def _get_nvcc_info(cuda_home):
+    if cuda_home is not None and os.path.isdir(cuda_home):
+        try:
+            nvcc = os.path.join(cuda_home, 'bin/nvcc')
+            nvcc = subprocess.check_output(
+                "{} -V".format(nvcc), shell=True).decode()
+            nvcc = nvcc.strip().split('\n')[-1]
+        except subprocess.SubprocessError:
+            nvcc = "Not Available"
+    return nvcc
+def _get_gpu_info():
+    try:
+        gpu_info = subprocess.check_output(['nvidia-smi',
+                                            '-L']).decode().strip()
+        gpu_info = gpu_info.split('\n')
+        for i in range(len(gpu_info)):
+            gpu_info[i] = ' '.join(gpu_info[i].split(' ')[:4])
+    except:
+        gpu_info = ' Can not get GPU information. Please make sure CUDA have been installed successfully.'
+    return gpu_info
+def get_environ_info():
+    """collect environment information"""
+    env_info = {}
+    env_info['System Platform'] = sys.platform
+    if env_info['System Platform'] == 'linux':
+        lsb_v = subprocess.check_output(['lsb_release', '-v']).decode().strip()
+        lsb_v = lsb_v.replace('\t', ' ')
+        lsb_d = subprocess.check_output(['lsb_release', '-d']).decode().strip()
+        lsb_d = lsb_d.replace('\t', ' ')
+        env_info['LSB'] = [lsb_v, lsb_d]
+    env_info['Python'] = sys.version.replace('\n', '')
+    compiled_with_cuda = paddle.fluid.is_compiled_with_cuda()
+    env_info['Paddle compiled with cuda'] = compiled_with_cuda
+    if compiled_with_cuda:
+        cuda_home = _find_cuda_home()
+        env_info['NVCC'] = _get_nvcc_info(cuda_home)
+        gpu_nums = fluid.core.get_cuda_device_count()
+        env_info['GPUs used'] = gpu_nums
+        env_info['CUDA_VISIBLE_DEVICES'] = os.environ.get(
+            'CUDA_VISIBLE_DEVICES')
+        env_info['GPU'] = _get_gpu_info()
+    gcc = subprocess.check_output(['gcc', '--version']).decode()
+    gcc = gcc.strip().split('\n')[0]
+    env_info['GCC'] = gcc
+    env_info['PaddlePaddle'] = paddle.__version__
+    env_info['OpenCV'] = cv2.__version__
+    return env_info
--- a/dygraph/utils/logging.py
+++ b/dygraph/utils/logging.py
--- a/dygraph/utils/utils.py
+++ b/dygraph/utils/utils.py
@@ -18,7 +18,7 @@ import math
 import cv2
 import paddle.fluid as fluid
-from . import logging
+from . import logger
 def seconds_to_hms(seconds):
@@ -29,39 +29,25 @@ def seconds_to_hms(seconds):
    return hms_str
-def get_environ_info():
-    info = dict()
-    info['place'] = 'cpu'
-    info['num'] = int(os.environ.get('CPU_NUM', 1))
-    if os.environ.get('CUDA_VISIBLE_DEVICES', None) != "":
-        if hasattr(fluid.core, 'get_cuda_device_count'):
-            gpu_num = 0
-            try:
-                gpu_num = fluid.core.get_cuda_device_count()
-            except:
-                os.environ['CUDA_VISIBLE_DEVICES'] = ''
-                pass
-            if gpu_num > 0:
-                info['place'] = 'cuda'
-                info['num'] = fluid.core.get_cuda_device_count()
-    return info
 def load_pretrained_model(model, pretrained_model):
    if pretrained_model is not None:
-        logging.info('Load pretrained model from {}'.format(pretrained_model))
+        logger.info('Load pretrained model from {}'.format(pretrained_model))
        if os.path.exists(pretrained_model):
            ckpt_path = os.path.join(pretrained_model, 'model')
-            para_state_dict, _ = fluid.load_dygraph(ckpt_path)
+            try:
+                para_state_dict, _ = fluid.load_dygraph(ckpt_path)
+            except:
+                para_state_dict = fluid.load_program_state(pretrained_model)
            model_state_dict = model.state_dict()
            keys = model_state_dict.keys()
            num_params_loaded = 0
            for k in keys:
                if k not in para_state_dict:
-                    logging.warning("{} is not in pretrained model".format(k))
+                    logger.warning("{} is not in pretrained model".format(k))
                elif list(para_state_dict[k].shape) != list(
                        model_state_dict[k].shape):
-                    logging.warning(
+                    logger.warning(
                        "[SKIP] Shape of pretrained params {} doesn't match.(Pretrained: {}, Actual: {})"
                        .format(k, para_state_dict[k].shape,
                                model_state_dict[k].shape))
@@ -69,7 +55,7 @@ def load_pretrained_model(model, pretrained_model):
                    model_state_dict[k] = para_state_dict[k]
                    num_params_loaded += 1
            model.set_dict(model_state_dict)
-            logging.info("There are {}/{} varaibles are loaded.".format(
+            logger.info("There are {}/{} varaibles are loaded.".format(
                num_params_loaded, len(model_state_dict)))
        else:
@@ -77,13 +63,14 @@ def load_pretrained_model(model, pretrained_model):
                'The pretrained model directory is not Found: {}'.format(
                    pretrained_model))
    else:
-        logging.info('No pretrained model to load, train from scratch')
+        logger.info('No pretrained model to load, train from scratch')
 def resume(model, optimizer, resume_model):
    if resume_model is not None:
-        logging.info('Resume model from {}'.format(resume_model))
+        logger.info('Resume model from {}'.format(resume_model))
        if os.path.exists(resume_model):
+            resume_model = os.path.normpath(resume_model)
            ckpt_path = os.path.join(resume_model, 'model')
            para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path)
            model.set_dict(para_state_dict)
@@ -97,7 +84,7 @@ def resume(model, optimizer, resume_model):
                'The resume model directory is not Found: {}'.format(
                    resume_model))
    else:
-        logging.info('No model need to resume')
+        logger.info('No model need to resume')
 def visualize(image, result, save_dir=None, weight=0.6):

--- a/dygraph/val.py
+++ b/dygraph/val.py
@@ -13,23 +13,15 @@
 # limitations under the License.
 import argparse
-import os
-import math
-from paddle.fluid.dygraph.base import to_variable
-import numpy as np
 import paddle.fluid as fluid
 from paddle.fluid.dygraph.parallel import ParallelEnv
-from paddle.fluid.io import DataLoader
-from paddle.fluid.dataloader import BatchSampler
-from datasets import OpticDiscSeg, Cityscapes
+from dygraph.datasets import DATASETS
-import transforms as T
+import dygraph.transforms as T
-import models
+from dygraph.cvlibs import manager
-import utils.logging as logging
+from dygraph.utils import get_environ_info
-from utils import get_environ_info
+from dygraph.core import evaluate
-from utils import ConfusionMatrix
-from utils import Timer, calculate_eta
 def parse_args():
@@ -39,7 +31,8 @@ def parse_args():
    parser.add_argument(
        '--model_name',
        dest='model_name',
-        help="Model type for evaluation, which is one of ('UNet')",
+        help='Model type for evaluation, which is one of {}'.format(
+            str(list(manager.MODELS.components_dict.keys()))),
        type=str,
        default='UNet')
@@ -47,10 +40,16 @@ def parse_args():
    parser.add_argument(
        '--dataset',
        dest='dataset',
-        help=
+        help="The dataset you want to evaluation, which is one of {}".format(
-        "The dataset you want to evaluation, which is one of ('OpticDiscSeg', 'Cityscapes')",
+            str(list(DATASETS.keys()))),
        type=str,
        default='OpticDiscSeg')
+    parser.add_argument(
+        '--dataset_root',
+        dest='dataset_root',
+        help="dataset root directory",
+        type=str,
+        default=None)
    # params of evaluate
    parser.add_argument(
@@ -60,12 +59,6 @@ def parse_args():
        nargs=2,
        default=[512, 512],
        type=int)
-    parser.add_argument(
-        '--batch_size',
-        dest='batch_size',
-        help='Mini batch size',
-        type=int,
-        default=2)
    parser.add_argument(
        '--model_dir',
        dest='model_dir',
@@ -76,93 +69,32 @@ def parse_args():
    return parser.parse_args()
-def evaluate(model,
-             eval_dataset=None,
-             places=None,
-             model_dir=None,
-             num_classes=None,
-             batch_size=2,
-             ignore_index=255,
-             epoch_id=None):
-    ckpt_path = os.path.join(model_dir, 'model')
-    para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path)
-    model.set_dict(para_state_dict)
-    model.eval()
-    batch_sampler = BatchSampler(
-        eval_dataset, batch_size=batch_size, shuffle=False, drop_last=False)
-    loader = DataLoader(
-        eval_dataset,
-        batch_sampler=batch_sampler,
-        places=places,
-        return_list=True,
-    )
-    total_steps = len(batch_sampler)
-    conf_mat = ConfusionMatrix(num_classes, streaming=True)
-    logging.info(
-        "Start to evaluating(total_samples={}, total_steps={})...".format(
-            len(eval_dataset), total_steps))
-    timer = Timer()
-    timer.start()
-    for step, data in enumerate(loader):
-        images = data[0]
-        labels = data[1].astype('int64')
-        pred, _ = model(images, mode='eval')
-        pred = pred.numpy()
-        labels = labels.numpy()
-        mask = labels != ignore_index
-        conf_mat.calculate(pred=pred, label=labels, ignore=mask)
-        _, iou = conf_mat.mean_iou()
-        time_step = timer.elapsed_time()
-        remain_step = total_steps - step - 1
-        logging.info(
-            "[EVAL] Epoch={}, Step={}/{}, iou={:4f}, sec/step={:.4f} | ETA {}".
-            format(epoch_id, step + 1, total_steps, iou, time_step,
-                   calculate_eta(remain_step, time_step)))
-        timer.restart()
-    category_iou, miou = conf_mat.mean_iou()
-    category_acc, macc = conf_mat.accuracy()
-    logging.info("[EVAL] #image={} acc={:.4f} IoU={:.4f}".format(
-        len(eval_dataset), macc, miou))
-    logging.info("[EVAL] Category IoU: " + str(category_iou))
-    logging.info("[EVAL] Category Acc: " + str(category_acc))
-    logging.info("[EVAL] Kappa:{:.4f} ".format(conf_mat.kappa()))
-    return miou, macc
 def main(args):
    env_info = get_environ_info()
    places = fluid.CUDAPlace(ParallelEnv().dev_id) \
-        if env_info['place'] == 'cuda' and fluid.is_compiled_with_cuda() \
+        if env_info['Paddle compiled with cuda'] and env_info['GPUs used'] \
        else fluid.CPUPlace()
-    if args.dataset.lower() == 'opticdiscseg':
+    if args.dataset not in DATASETS:
-        dataset = OpticDiscSeg
+        raise Exception('`--dataset` is invalid. it should be one of {}'.format(
-    elif args.dataset.lower() == 'cityscapes':
+            str(list(DATASETS.keys()))))
-        dataset = Cityscapes
+    dataset = DATASETS[args.dataset]
-    else:
-        raise Exception(
-            "The --dataset set wrong. It should be one of ('OpticDiscSeg', 'Cityscapes')"
-        )
    with fluid.dygraph.guard(places):
        eval_transforms = T.Compose([T.Resize(args.input_size), T.Normalize()])
-        eval_dataset = dataset(transforms=eval_transforms, mode='eval')
+        eval_dataset = dataset(
+            dataset_root=args.dataset_root,
+            transforms=eval_transforms,
+            mode='val')
-        if args.model_name == 'UNet':
+        model = manager.MODELS[args.model_name](
-            model = models.UNet(num_classes=eval_dataset.num_classes)
+            num_classes=eval_dataset.num_classes)
        evaluate(
            model,
            eval_dataset,
-            places=places,
            model_dir=args.model_dir,
-            num_classes=eval_dataset.num_classes,
+            num_classes=eval_dataset.num_classes)
-            batch_size=args.batch_size)
 if __name__ == '__main__':

--- a/pdseg/models/model_builder.py
+++ b/pdseg/models/model_builder.py
@@ -26,7 +26,7 @@ from loss import multi_dice_loss
 from loss import multi_bce_loss
 from lovasz_losses import lovasz_hinge
 from lovasz_losses import lovasz_softmax
-from models.modeling import deeplab, unet, icnet, pspnet, hrnet, fast_scnn
+from models.modeling import deeplab, unet, icnet, pspnet, hrnet, fast_scnn, ocrnet
 class ModelPhase(object):
@@ -85,6 +85,8 @@ def seg_model(image, class_num):
        logits = hrnet.hrnet(image, class_num)
    elif model_name == 'fast_scnn':
        logits = fast_scnn.fast_scnn(image, class_num)
+    elif model_name == 'ocrnet':
+        logits = ocrnet.ocrnet(image, class_num)
    else:
        raise Exception(
            "unknow model name, only support unet, deeplabv3p, icnet, pspnet, hrnet, fast_scnn"

--- a/pdseg/models/modeling/deeplab.py
+++ b/pdseg/models/modeling/deeplab.py
@@ -352,6 +352,8 @@ def resnet_vd(input):
    else:
        raise Exception("deeplab only support stride 8 or 16")
    lr_mult_list = cfg.MODEL.DEEPLAB.BACKBONE_LR_MULT_LIST
+    if lr_mult_list is None:
+        lr_mult_list = [1.0, 1.0, 1.0, 1.0, 1.0]
    model = resnet_vd_backbone(
        layers, stem='deeplab', lr_mult_list=lr_mult_list)
    data, decode_shortcuts = model.net(

--- a/pdseg/models/modeling/ocrnet.py
+++ b/pdseg/models/modeling/ocrnet.py
+# coding: utf8
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.initializer import MSRA
+from paddle.fluid.param_attr import ParamAttr
+from utils.config import cfg
+def conv_bn_layer(input,
+                  filter_size,
+                  num_filters,
+                  stride=1,
+                  padding=1,
+                  num_groups=1,
+                  if_act=True,
+                  name=None):
+    conv = fluid.layers.conv2d(
+        input=input,
+        num_filters=num_filters,
+        filter_size=filter_size,
+        stride=stride,
+        padding=(filter_size - 1) // 2,
+        groups=num_groups,
+        act=None,
+        #        param_attr=ParamAttr(initializer=MSRA(), learning_rate=1.0, name=name + '_weights'),
+        param_attr=ParamAttr(
+            initializer=fluid.initializer.Normal(scale=0.001),
+            learning_rate=1.0,
+            name=name + '_weights'),
+        bias_attr=False)
+    bn_name = name + '_bn'
+    bn = fluid.layers.batch_norm(
+        input=conv,
+        param_attr=ParamAttr(
+            name=bn_name + "_scale",
+            initializer=fluid.initializer.Constant(1.0)),
+        bias_attr=ParamAttr(
+            name=bn_name + "_offset",
+            initializer=fluid.initializer.Constant(0.0)),
+        moving_mean_name=bn_name + '_mean',
+        moving_variance_name=bn_name + '_variance')
+    if if_act:
+        bn = fluid.layers.relu(bn)
+    return bn
+def basic_block(input, num_filters, stride=1, downsample=False, name=None):
+    residual = input
+    conv = conv_bn_layer(
+        input=input,
+        filter_size=3,
+        num_filters=num_filters,
+        stride=stride,
+        name=name + '_conv1')
+    conv = conv_bn_layer(
+        input=conv,
+        filter_size=3,
+        num_filters=num_filters,
+        if_act=False,
+        name=name + '_conv2')
+    if downsample:
+        residual = conv_bn_layer(
+            input=input,
+            filter_size=1,
+            num_filters=num_filters,
+            if_act=False,
+            name=name + '_downsample')
+    return fluid.layers.elementwise_add(x=residual, y=conv, act='relu')
+def bottleneck_block(input, num_filters, stride=1, downsample=False, name=None):
+    residual = input
+    conv = conv_bn_layer(
+        input=input,
+        filter_size=1,
+        num_filters=num_filters,
+        name=name + '_conv1')
+    conv = conv_bn_layer(
+        input=conv,
+        filter_size=3,
+        num_filters=num_filters,
+        stride=stride,
+        name=name + '_conv2')
+    conv = conv_bn_layer(
+        input=conv,
+        filter_size=1,
+        num_filters=num_filters * 4,
+        if_act=False,
+        name=name + '_conv3')
+    if downsample:
+        residual = conv_bn_layer(
+            input=input,
+            filter_size=1,
+            num_filters=num_filters * 4,
+            if_act=False,
+            name=name + '_downsample')
+    return fluid.layers.elementwise_add(x=residual, y=conv, act='relu')
+def fuse_layers(x, channels, multi_scale_output=True, name=None):
+    out = []
+    for i in range(len(channels) if multi_scale_output else 1):
+        residual = x[i]
+        shape = residual.shape
+        width = shape[-1]
+        height = shape[-2]
+        for j in range(len(channels)):
+            if j > i:
+                y = conv_bn_layer(
+                    x[j],
+                    filter_size=1,
+                    num_filters=channels[i],
+                    if_act=False,
+                    name=name + '_layer_' + str(i + 1) + '_' + str(j + 1))
+                y = fluid.layers.resize_bilinear(
+                    input=y, out_shape=[height, width])
+                residual = fluid.layers.elementwise_add(
+                    x=residual, y=y, act=None)
+            elif j < i:
+                y = x[j]
+                for k in range(i - j):
+                    if k == i - j - 1:
+                        y = conv_bn_layer(
+                            y,
+                            filter_size=3,
+                            num_filters=channels[i],
+                            stride=2,
+                            if_act=False,
+                            name=name + '_layer_' + str(i + 1) + '_' +
+                            str(j + 1) + '_' + str(k + 1))
+                    else:
+                        y = conv_bn_layer(
+                            y,
+                            filter_size=3,
+                            num_filters=channels[j],
+                            stride=2,
+                            name=name + '_layer_' + str(i + 1) + '_' +
+                            str(j + 1) + '_' + str(k + 1))
+                residual = fluid.layers.elementwise_add(
+                    x=residual, y=y, act=None)
+        residual = fluid.layers.relu(residual)
+        out.append(residual)
+    return out
+def branches(x, block_num, channels, name=None):
+    out = []
+    for i in range(len(channels)):
+        residual = x[i]
+        for j in range(block_num):
+            residual = basic_block(
+                residual,
+                channels[i],
+                name=name + '_branch_layer_' + str(i + 1) + '_' + str(j + 1))
+        out.append(residual)
+    return out
+def high_resolution_module(x, channels, multi_scale_output=True, name=None):
+    residual = branches(x, 4, channels, name=name)
+    out = fuse_layers(
+        residual, channels, multi_scale_output=multi_scale_output, name=name)
+    return out
+def transition_layer(x, in_channels, out_channels, name=None):
+    num_in = len(in_channels)
+    num_out = len(out_channels)
+    out = []
+    for i in range(num_out):
+        if i < num_in:
+            if in_channels[i] != out_channels[i]:
+                residual = conv_bn_layer(
+                    x[i],
+                    filter_size=3,
+                    num_filters=out_channels[i],
+                    name=name + '_layer_' + str(i + 1))
+                out.append(residual)
+            else:
+                out.append(x[i])
+        else:
+            residual = conv_bn_layer(
+                x[-1],
+                filter_size=3,
+                num_filters=out_channels[i],
+                stride=2,
+                name=name + '_layer_' + str(i + 1))
+            out.append(residual)
+    return out
+def stage(x, num_modules, channels, multi_scale_output=True, name=None):
+    out = x
+    for i in range(num_modules):
+        if i == num_modules - 1 and multi_scale_output == False:
+            out = high_resolution_module(
+                out,
+                channels,
+                multi_scale_output=False,
+                name=name + '_' + str(i + 1))
+        else:
+            out = high_resolution_module(
+                out, channels, name=name + '_' + str(i + 1))
+    return out
+def layer1(input, name=None):
+    conv = input
+    for i in range(4):
+        conv = bottleneck_block(
+            conv,
+            num_filters=64,
+            downsample=True if i == 0 else False,
+            name=name + '_' + str(i + 1))
+    return conv
+def aux_head(input, last_inp_channels, num_classes):
+    x = conv_bn_layer(
+        input=input,
+        filter_size=1,
+        num_filters=last_inp_channels,
+        stride=1,
+        padding=0,
+        name='aux_head_conv1')
+    x = fluid.layers.conv2d(
+        input=x,
+        num_filters=num_classes,
+        filter_size=1,
+        stride=1,
+        padding=0,
+        act=None,
+        #        param_attr=ParamAttr(initializer=MSRA(), learning_rate=1.0, name='aux_head_conv2_weights'),
+        param_attr=ParamAttr(
+            initializer=fluid.initializer.Normal(scale=0.001),
+            learning_rate=1.0,
+            name='aux_head_conv2_weights'),
+        bias_attr=ParamAttr(
+            initializer=fluid.initializer.Constant(0.0),
+            name="aux_head_conv2_bias"))
+    return x
+def conv3x3_ocr(input, ocr_mid_channels):
+    x = conv_bn_layer(
+        input=input,
+        filter_size=3,
+        num_filters=ocr_mid_channels,
+        stride=1,
+        padding=1,
+        name='conv3x3_ocr')
+    return x
+def f_pixel(input, key_channels):
+    x = conv_bn_layer(
+        input=input,
+        filter_size=1,
+        num_filters=key_channels,
+        stride=1,
+        padding=0,
+        name='f_pixel_conv1')
+    x = conv_bn_layer(
+        input=x,
+        filter_size=1,
+        num_filters=key_channels,
+        stride=1,
+        padding=0,
+        name='f_pixel_conv2')
+    return x
+def f_object(input, key_channels):
+    x = conv_bn_layer(
+        input=input,
+        filter_size=1,
+        num_filters=key_channels,
+        stride=1,
+        padding=0,
+        name='f_object_conv1')
+    x = conv_bn_layer(
+        input=x,
+        filter_size=1,
+        num_filters=key_channels,
+        stride=1,
+        padding=0,
+        name='f_object_conv2')
+    return x
+def f_down(input, key_channels):
+    x = conv_bn_layer(
+        input=input,
+        filter_size=1,
+        num_filters=key_channels,
+        stride=1,
+        padding=0,
+        name='f_down_conv')
+    return x
+def f_up(input, in_channels):
+    x = conv_bn_layer(
+        input=input,
+        filter_size=1,
+        num_filters=in_channels,
+        stride=1,
+        padding=0,
+        name='f_up_conv')
+    return x
+def object_context_block(x, proxy, in_channels, key_channels, scale):
+    batch_size, _, h, w = x.shape
+    if scale > 1:
+        x = fluid.layers.pool2d(x, pool_size=[scale, scale], pool_type='max')
+    query = f_pixel(x, key_channels)
+    query = fluid.layers.reshape(
+        query,
+        shape=[batch_size, key_channels, query.shape[2] * query.shape[3]])
+    query = fluid.layers.transpose(query, perm=[0, 2, 1])
+    key = f_object(proxy, key_channels)
+    key = fluid.layers.reshape(
+        key, shape=[batch_size, key_channels, key.shape[2] * key.shape[3]])
+    value = f_down(proxy, key_channels)
+    value = fluid.layers.reshape(
+        value,
+        shape=[batch_size, key_channels, value.shape[2] * value.shape[3]])
+    value = fluid.layers.transpose(value, perm=[0, 2, 1])
+    sim_map = fluid.layers.matmul(query, key)
+    sim_map = (key_channels**-.5) * sim_map
+    sim_map = fluid.layers.softmax(sim_map, axis=-1)
+    context = fluid.layers.matmul(sim_map, value)
+    context = fluid.layers.transpose(context, perm=[0, 2, 1])
+    context = fluid.layers.reshape(
+        context, shape=[batch_size, key_channels, x.shape[2], x.shape[3]])
+    context = f_up(context, in_channels)
+    if scale > 1:
+        context = fluid.layers.resize_bilinear(context, out_shape=[h, w])
+    return context
+def ocr_gather_head(feats, probs, scale=1):
+    feats = fluid.layers.reshape(
+        feats,
+        shape=[feats.shape[0], feats.shape[1], feats.shape[2] * feats.shape[3]])
+    feats = fluid.layers.transpose(feats, perm=[0, 2, 1])
+    probs = fluid.layers.reshape(
+        probs,
+        shape=[probs.shape[0], probs.shape[1], probs.shape[2] * probs.shape[3]])
+    probs = fluid.layers.softmax(scale * probs, axis=2)
+    ocr_context = fluid.layers.matmul(probs, feats)
+    ocr_context = fluid.layers.transpose(ocr_context, perm=[0, 2, 1])
+    ocr_context = fluid.layers.unsqueeze(ocr_context, axes=[3])
+    return ocr_context
+def ocr_distri_head(feats,
+                    proxy_feats,
+                    ocr_mid_channels,
+                    ocr_key_channels,
+                    scale=1,
+                    dropout=0.05):
+    context = object_context_block(feats, proxy_feats, ocr_mid_channels,
+                                   ocr_key_channels, scale)
+    x = fluid.layers.concat([context, feats], axis=1)
+    x = conv_bn_layer(
+        input=x,
+        filter_size=1,
+        num_filters=ocr_mid_channels,
+        stride=1,
+        padding=0,
+        name='spatial_ocr_conv')
+    x = fluid.layers.dropout(x, dropout_prob=dropout)
+    return x
+def cls_head(input, num_classes):
+    x = fluid.layers.conv2d(
+        input=input,
+        num_filters=num_classes,
+        filter_size=1,
+        stride=1,
+        padding=0,
+        act=None,
+        #        param_attr=ParamAttr(initializer=MSRA(), learning_rate=1.0, name='cls_head_conv_weights'),
+        param_attr=ParamAttr(
+            initializer=fluid.initializer.Normal(scale=0.001),
+            learning_rate=1.0,
+            name='cls_head_conv_weights'),
+        bias_attr=ParamAttr(
+            initializer=fluid.initializer.Constant(0.0),
+            name="cls_head_conv_bias"))
+    return x
+def ocr_module(input, last_inp_channels, num_classes, ocr_mid_channels,
+               ocr_key_channels):
+    out_aux = aux_head(input, last_inp_channels, num_classes)
+    feats = conv3x3_ocr(input, ocr_mid_channels)
+    context = ocr_gather_head(feats, out_aux)
+    feats = ocr_distri_head(feats, context, ocr_mid_channels, ocr_key_channels)
+    out = cls_head(feats, num_classes)
+    return out, out_aux
+def high_resolution_ocr_net(input, num_classes):
+    channels_2 = cfg.MODEL.HRNET.STAGE2.NUM_CHANNELS
+    channels_3 = cfg.MODEL.HRNET.STAGE3.NUM_CHANNELS
+    channels_4 = cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS
+    num_modules_2 = cfg.MODEL.HRNET.STAGE2.NUM_MODULES
+    num_modules_3 = cfg.MODEL.HRNET.STAGE3.NUM_MODULES
+    num_modules_4 = cfg.MODEL.HRNET.STAGE4.NUM_MODULES
+    ocr_mid_channels = cfg.MODEL.OCR.OCR_MID_CHANNELS
+    ocr_key_channels = cfg.MODEL.OCR.OCR_KEY_CHANNELS
+    last_inp_channels = sum(channels_4)
+    x = conv_bn_layer(
+        input=input,
+        filter_size=3,
+        num_filters=64,
+        stride=2,
+        if_act=True,
+        name='layer1_1')
+    x = conv_bn_layer(
+        input=x,
+        filter_size=3,
+        num_filters=64,
+        stride=2,
+        if_act=True,
+        name='layer1_2')
+    la1 = layer1(x, name='layer2')
+    tr1 = transition_layer([la1], [256], channels_2, name='tr1')
+    st2 = stage(tr1, num_modules_2, channels_2, name='st2')
+    tr2 = transition_layer(st2, channels_2, channels_3, name='tr2')
+    st3 = stage(tr2, num_modules_3, channels_3, name='st3')
+    tr3 = transition_layer(st3, channels_3, channels_4, name='tr3')
+    st4 = stage(tr3, num_modules_4, channels_4, name='st4')
+    # upsample
+    shape = st4[0].shape
+    height, width = shape[-2], shape[-1]
+    st4[1] = fluid.layers.resize_bilinear(st4[1], out_shape=[height, width])
+    st4[2] = fluid.layers.resize_bilinear(st4[2], out_shape=[height, width])
+    st4[3] = fluid.layers.resize_bilinear(st4[3], out_shape=[height, width])
+    feats = fluid.layers.concat(st4, axis=1)
+    out, out_aux = ocr_module(feats, last_inp_channels, num_classes,
+                              ocr_mid_channels, ocr_key_channels)
+    out = fluid.layers.resize_bilinear(out, input.shape[2:])
+    out_aux = fluid.layers.resize_bilinear(out_aux, input.shape[2:])
+    return out, out_aux
+def ocrnet(input, num_classes):
+    logit = high_resolution_ocr_net(input, num_classes)
+    return logit
--- a/pdseg/train.py
+++ b/pdseg/train.py
@@ -27,6 +27,7 @@ import pprint
 import random
 import shutil
+import paddle
 import numpy as np
 import paddle.fluid as fluid
 from paddle.fluid import profiler
@@ -158,6 +159,15 @@ def load_checkpoint(exe, program):
    return begin_epoch
+def save_infer_program(test_program, ckpt_dir):
+    _test_program = test_program.clone()
+    _test_program.desc.flush()
+    _test_program.desc._set_version()
+    paddle.fluid.core.save_op_compatible_info(_test_program.desc)
+    with open(os.path.join(ckpt_dir, 'model') + ".pdmodel", "wb") as f:
+        f.write(_test_program.desc.serialize_to_string())
 def update_best_model(ckpt_dir):
    best_model_dir = os.path.join(cfg.TRAIN.MODEL_SAVE_DIR, 'best_model')
    if os.path.exists(best_model_dir):
@@ -173,6 +183,7 @@ def print_info(*msg):
 def train(cfg):
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
+    test_prog = fluid.Program()
    if args.enable_ce:
        startup_prog.random_seed = 1000
        train_prog.random_seed = 1000
@@ -224,6 +235,7 @@ def train(cfg):
    data_loader, avg_loss, lr, pred, grts, masks = build_model(
        train_prog, startup_prog, phase=ModelPhase.TRAIN)
+    build_model(test_prog, fluid.Program(), phase=ModelPhase.EVAL)
    data_loader.set_sample_generator(
        data_generator, batch_size=batch_size_per_dev, drop_last=drop_last)
@@ -387,6 +399,7 @@ def train(cfg):
        if (epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0
                or epoch == cfg.SOLVER.NUM_EPOCHS) and cfg.TRAINER_ID == 0:
            ckpt_dir = save_checkpoint(train_prog, epoch)
+            save_infer_program(test_prog, ckpt_dir)
            if args.do_eval:
                print("Evaluation start")
@@ -419,7 +432,8 @@ def train(cfg):
    # save final model
    if cfg.TRAINER_ID == 0:
-        save_checkpoint(train_prog, 'final')
+        ckpt_dir = save_checkpoint(train_prog, 'final')
+        save_infer_program(test_prog, ckpt_dir)
 def main(args):

--- a/pdseg/utils/config.py
+++ b/pdseg/utils/config.py
@@ -248,7 +248,10 @@ cfg.MODEL.HRNET.STAGE3.NUM_CHANNELS = [40, 80, 160]
 # HRNET STAGE4 设置
 cfg.MODEL.HRNET.STAGE4.NUM_MODULES = 3
 cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS = [40, 80, 160, 320]
+########################## OCNET模型配置 ######################################
+cfg.MODEL.OCR.OCR_MID_CHANNELS = 512
+cfg.MODEL.OCR.OCR_KEY_CHANNELS = 256
 ########################## 预测部署模型配置 ###################################
 # 预测保存的模型名称
 cfg.FREEZE.MODEL_FILENAME = '__model__'

--- a/pretrained_model/download_model.py
+++ b/pretrained_model/download_model.py
@@ -24,6 +24,8 @@ from test_utils import download_file_and_uncompress
 model_urls = {
    # ImageNet Pretrained
+    "mobilenetv3_large_ssld_imagenet":
+    "https://paddleseg.bj.bcebos.com/models/MobileNetV3_large_x1_0_ssld_pretrained.tar",
    "mobilenetv2-2-0_bn_imagenet":
    "https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x2_0_pretrained.tar",
    "mobilenetv2-1-5_bn_imagenet":
@@ -42,6 +44,8 @@ model_urls = {
    "https://paddleseg.bj.bcebos.com/models/Xception41_pretrained.tgz",
    "xception65_imagenet":
    "https://paddleseg.bj.bcebos.com/models/Xception65_pretrained.tgz",
+    "resnet50_vd_imagenet":
+    "https://paddleseg.bj.bcebos.com/models/ResNet50_vd_ssld_pretrained.tgz",
    "hrnet_w18_bn_imagenet":
    "https://paddleseg.bj.bcebos.com/models/hrnet_w18_imagenet.tar",
    "hrnet_w30_bn_imagenet":
@@ -70,12 +74,16 @@ model_urls = {
    "https://paddleseg.bj.bcebos.com/models/pspnet101_coco.tgz",
    # Cityscapes pretrained
+    "deeplabv3p_mobilenetv3_large_cityscapes":
+    "https://paddleseg.bj.bcebos.com/models/deeplabv3p_mobilenetv3_large_cityscapes.tar.gz",
    "deeplabv3p_mobilenetv2-1-0_bn_cityscapes":
    "https://paddleseg.bj.bcebos.com/models/mobilenet_cityscapes.tgz",
    "deeplabv3p_xception65_gn_cityscapes":
    "https://paddleseg.bj.bcebos.com/models/deeplabv3p_xception65_cityscapes.tgz",
    "deeplabv3p_xception65_bn_cityscapes":
    "https://paddleseg.bj.bcebos.com/models/xception65_bn_cityscapes.tgz",
+    "deeplabv3p_resnet50_vd_cityscapes":
+    "https://paddleseg.bj.bcebos.com/models/deeplabv3p_resnet50_vd_cityscapes.tgz",
    "unet_bn_coco":
    "https://paddleseg.bj.bcebos.com/models/unet_coco_v3.tgz",
    "icnet_bn_cityscapes":
@@ -88,6 +96,8 @@ model_urls = {
    "https://paddleseg.bj.bcebos.com/models/hrnet_w18_bn_cityscapes.tgz",
    "fast_scnn_cityscapes":
    "https://paddleseg.bj.bcebos.com/models/fast_scnn_cityscape.tar",
+    "ocrnet_w18_bn_cityscapes":
+    "https://paddleseg.bj.bcebos.com/models/ocrnet_w18_bn_cityscapes.tar.gz",
 }
 if __name__ == "__main__":

--- a/requirements.txt
+++ b/requirements.txt
@@ -2,4 +2,4 @@ pre-commit
 yapf == 0.26.0
 flake8
 pyyaml >= 5.1
-visualdl == 2.0.0b4
+visualdl >= 2.0.0
--- a/turtorial/finetune_deeplabv3plus.md
+++ b/turtorial/finetune_deeplabv3plus.md
@@ -145,8 +145,11 @@ PaddleSeg在AI Studio平台上提供了在线体验的DeepLabv3+图像分割教
 |mobilenetv2-0-25_bn_imagenet|MobileNetV2|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenetv2 <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 0.25 <br> MODEL.DEFAULT_NORM_TYPE: bn|
 |xception41_imagenet|Xception41|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_41 <br> MODEL.DEFAULT_NORM_TYPE: bn|
 |xception65_imagenet|Xception65|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_65 <br> MODEL.DEFAULT_NORM_TYPE: bn|
+|resnet50_vd_imagenet|ResNet50_vd|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: resnet50_vd <br> MODEL.DEFAULT_NORM_TYPE: bn|
 |deeplabv3p_mobilenetv2-1-0_bn_coco|MobileNetV2|bn|COCO|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenetv2 <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.0 <br> MODEL.DEEPLAB.ENCODER_WITH_ASPP: False <br> MODEL.DEEPLAB.ENABLE_DECODER: False <br> MODEL.DEFAULT_NORM_TYPE: bn|
 |**deeplabv3p_xception65_bn_coco**|Xception65|bn|COCO|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_65 <br> MODEL.DEFAULT_NORM_TYPE: bn |
 |deeplabv3p_mobilenetv2-1-0_bn_cityscapes|MobileNetV2|bn|Cityscapes|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenetv2 <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.0 <br> MODEL.DEEPLAB.ENCODER_WITH_ASPP: False <br> MODEL.DEEPLAB.ENABLE_DECODER: False <br> MODEL.DEFAULT_NORM_TYPE: bn|
+|deeplabv3p_mobilenetv3_large_cityscapes|MobileNetV3|bn|Cityscapes|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenetv3_large <br> MODEL.DEFAULT_NORM_TYPE: bn|
 |deeplabv3p_xception65_gn_cityscapes|Xception65|gn|Cityscapes|MODEL.MODEL_NAME: deeplabv3p <br>  MODEL.DEEPLAB.BACKBONE: xception_65 <br> MODEL.DEFAULT_NORM_TYPE: gn|
 |deeplabv3p_xception65_bn_cityscapes|Xception65|bn|Cityscapes|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_65 <br> MODEL.DEFAULT_NORM_TYPE: bn|
+|deeplabv3p_resnet50_vd_cityscapes|resnet50_vd|bn|Cityscapes|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: resnet50_vd <br> MODEL.DEFAULT_NORM_TYPE: bn|
--- a/turtorial/finetune_fast_scnn.md
+++ b/turtorial/finetune_fast_scnn.md
@@ -115,5 +115,3 @@ python pdseg/eval.py --use_gpu --cfg ./configs/fast_scnn_pet.yaml
 | Fast-SCNN/bn | (1024, 2048) |6.28ms| 0.6964 |
 上述测试环境为v100. 测试使用paddle的推理接口[zero_copy](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/python_infer_cn.html#id8)的方式，模型输出是类别，即argmax后的值。
--- a/turtorial/finetune_hrnet.md
+++ b/turtorial/finetune_hrnet.md
@@ -145,4 +145,3 @@ python pdseg/vis.py --use_gpu --cfg ./configs/hrnet_optic.yaml
 | hrnet_w44_bn_imagenet |HRNet| ImageNet | MODEL.MODEL_NAME: hrnet <br> MODEL.HRNET.STAGE2.NUM_CHANNELS: [44, 88] <br> MODEL.HRNET.STAGE3.NUM_CHANNELS: [44, 88, 176] <br> MODEL.HRNET.STAGE4.NUM_CHANNELS: [44, 88, 176, 352] <br> MODEL.DEFAULT_NORM_TYPE: bn |
 | hrnet_w48_bn_imagenet |HRNet| ImageNet | MODEL.MODEL_NAME: hrnet <br> MODEL.HRNET.STAGE2.NUM_CHANNELS: [48, 96] <br> MODEL.HRNET.STAGE3.NUM_CHANNELS: [48, 96, 192] <br> MODEL.HRNET.STAGE4.NUM_CHANNELS: [48, 96, 192, 384] <br> MODEL.DEFAULT_NORM_TYPE: bn |
 | hrnet_w64_bn_imagenet |HRNet| ImageNet | MODEL.MODEL_NAME: hrnet <br> MODEL.HRNET.STAGE2.NUM_CHANNELS: [64, 128] <br> MODEL.HRNET.STAGE3.NUM_CHANNELS: [64, 128, 256] <br> MODEL.HRNET.STAGE4.NUM_CHANNELS: [64, 128, 256, 512] <br> MODEL.DEFAULT_NORM_TYPE: bn |
--- a/turtorial/finetune_icnet.md
+++ b/turtorial/finetune_icnet.md
--- a/tutorial/finetune_ocrnet.md
+++ b/tutorial/finetune_ocrnet.md
+# OCRNet模型使用教程
+本教程旨在介绍如何通过使用PaddleSeg提供的 ***`OCRNet`*** 预训练模型在自定义数据集上进行训练、评估和可视化。
+* 在阅读本教程前，请确保您已经了解过PaddleSeg的[快速入门](../README.md#快速入门)和[基础功能](../README.md#基础功能)等章节，以便对PaddleSeg有一定的了解。
+* 本教程的所有命令都基于PaddleSeg主目录进行执行。
+* 目前OCRNet只支持HRNet作为backbone
+## 一. 准备待训练数据
+![](./imgs/optic.png)
+我们提前准备好了一份眼底医疗分割数据集，包含267张训练图片、76张验证图片、38张测试图片。通过以下命令进行下载：
+```shell
+python dataset/download_optic.py
+```
+## 二. 下载预训练模型
+接着下载对应的预训练模型
+```shell
+python pretrained_model/download_model.py ocrnet_w18_bn_cityscapes
+```
+关于已有的OCRNet预训练模型的列表，请参见[模型组合](#模型组合)。如果需要使用其他预训练模型，下载该模型并将配置中的BACKBONE、NORM_TYPE等进行替换即可。
+## 三. 准备配置
+接着我们需要确定相关配置，从本教程的角度，配置分为三部分：
+* 数据集
+  * 训练集主目录
+  * 训练集文件列表
+  * 测试集文件列表
+  * 评估集文件列表
+* 预训练模型
+  * 预训练模型名称
+  * 预训练模型各阶段通道数设置
+  * 预训练模型的Normalization类型
+  * 预训练模型路径
+* 其他
+  * 学习率
+  * Batch大小
+  * ...
+在三者中，预训练模型的配置尤为重要，如果模型配置错误，会导致预训练的参数没有加载，进而影响收敛速度。预训练模型相关的配置如第二步所展示。
+数据集的配置和数据路径有关，在本教程中，数据存放在`dataset/optic_disc_seg`中
+其他配置则根据数据集和机器环境的情况进行调节，最终我们保存一个如下内容的yaml配置文件，存放路径为**configs/ocrnet_optic.yaml**
+```yaml
+# 数据集配置
+DATASET:
+    DATA_DIR: "./dataset/optic_disc_seg/"
+    NUM_CLASSES: 2
+    TEST_FILE_LIST: "./dataset/optic_disc_seg/test_list.txt"
+    TRAIN_FILE_LIST: "./dataset/optic_disc_seg/train_list.txt"
+    VAL_FILE_LIST: "./dataset/optic_disc_seg/val_list.txt"
+    VIS_FILE_LIST: "./dataset/optic_disc_seg/test_list.txt"
+# 预训练模型配置
+MODEL:
+    MODEL_NAME: "ocrnet"
+    DEFAULT_NORM_TYPE: "bn"
+    # 此处设置backbone的配置
+    HRNET:
+        STAGE2:
+            NUM_CHANNELS: [18, 36]
+        STAGE3:
+            NUM_CHANNELS: [18, 36, 72]
+        STAGE4:
+            NUM_CHANNELS: [18, 36, 72, 144]
+    # 此处设置OCR HEAD
+# 其他配置
+TRAIN_CROP_SIZE: (512, 512)
+EVAL_CROP_SIZE: (512, 512)
+AUG:
+    AUG_METHOD: "unpadding"
+    FIX_RESIZE_SIZE: (512, 512)
+BATCH_SIZE: 4
+TRAIN:
+    PRETRAINED_MODEL_DIR: "./pretrained_model/ocrnet_w18_bn_cityscapes/"
+    MODEL_SAVE_DIR: "./saved_model/ocrnet_optic/"
+    SNAPSHOT_EPOCH: 5
+TEST:
+    TEST_MODEL: "./saved_model/ocrnet_optic/final"
+SOLVER:
+    NUM_EPOCHS: 10
+    LR: 0.001
+    LR_POLICY: "poly"
+    OPTIMIZER: "adam"
+```
+## 四. 配置/数据校验
+在开始训练和评估之前，我们还需要对配置和数据进行一次校验，确保数据和配置是正确的。使用下述命令启动校验流程
+```shell
+python pdseg/check.py --cfg ./configs/ocrnet_optic.yaml
+```
+## 五. 开始训练
+校验通过后，使用下述命令启动训练
+```shell
+# 指定GPU卡号（以0号卡为例）
+export CUDA_VISIBLE_DEVICES=0
+# 训练
+python pdseg/train.py --use_gpu --cfg ./configs/ocrnet_optic.yaml
+```
+## 六. 进行评估
+模型训练完成，使用下述命令启动评估
+```shell
+python pdseg/eval.py --use_gpu --cfg ./configs/ocrnet_optic.yaml
+```
+## 七. 进行可视化
+使用下述命令启动预测和可视化
+```shell
+python pdseg/vis.py --use_gpu --cfg ./configs/ocrnet_optic.yaml
+```
+预测结果将保存在visual目录下，以下展示其中1张图片的预测效果：
+![](imgs/optic_hrnet.png)
+## 模型组合
+|预训练模型名称|Backbone|数据集|配置|
+|-|-|-|-|
+|ocrnet_w18_bn_cityscapes|OCRNet| Cityscapes | MODEL.MODEL_NAME: ocrnet <br> MODEL.HRNET.STAGE2.NUM_CHANNELS: [18, 36] <br> MODEL.HRNET.STAGE3.NUM_CHANNELS: [18, 36, 72] <br> MODEL.HRNET.STAGE4.NUM_CHANNELS: [18, 36, 72, 144] <br> MODEL.DEFAULT_NORM_TYPE: bn <br> MODEL.OCR.OCR_MID_CHANNELS: 512 <br> MODEL.OCR.OCR_KEY_CHANNELS: 256|
--- a/turtorial/finetune_pspnet.md
+++ b/turtorial/finetune_pspnet.md
--- a/turtorial/finetune_unet.md
+++ b/turtorial/finetune_unet.md
--- a/turtorial/imgs/optic.png
+++ b/turtorial/imgs/optic.png
--- a/turtorial/imgs/optic_deeplab.png
+++ b/turtorial/imgs/optic_deeplab.png
--- a/turtorial/imgs/optic_hrnet.png
+++ b/turtorial/imgs/optic_hrnet.png
--- a/turtorial/imgs/optic_icnet.png
+++ b/turtorial/imgs/optic_icnet.png
--- a/turtorial/imgs/optic_pspnet.png
+++ b/turtorial/imgs/optic_pspnet.png
--- a/turtorial/imgs/optic_unet.png
+++ b/turtorial/imgs/optic_unet.png