diff --git a/dygraph/README.md b/dygraph/README.md
index 20728e6bb5321e78122b9e925544419e9ec4483c..fd9af0d0dac1c2011479de4cab6929e34c18a515 100644
--- a/dygraph/README.md
+++ b/dygraph/README.md
@@ -1,43 +1,28 @@
 # 动态图执行
 
-## 数据集设置
-```
-data_dir='data/path'
-train_list='train/list/path'
-val_list='val/list/path'
-test_list='test/list/path'
-num_classes=number/of/dataset/classes
-```
-
 ## 训练
 ```
 python3 train.py --model_name UNet \
---data_dir $data_dir \
---train_list $train_list \
---val_list $val_list \
---num_classes $num_classes \
+--dataset OpticDiscSeg \
 --input_size 192 192 \
---num_epochs 4 \
+--num_epochs 10 \
 --save_interval_epochs 1 \
+--do_eval \
 --save_dir output
 ```
 
 ## 评估
 ```
 python3 val.py --model_name UNet \
---data_dir $data_dir \
---val_list $val_list \
---num_classes $num_classes \
+--dataset OpticDiscSeg \
 --input_size 192 192 \
---model_dir output/epoch_1
+--model_dir output/best_model
 ```
 
 ## 预测
 ```
 python3 infer.py --model_name UNet \
---data_dir $data_dir \
---test_list $test_list \
---num_classes $num_classes \
---input_size 192 192 \
---model_dir output/epoch_1
+--dataset OpticDiscSeg \
+--model_dir output/best_model \
+--input_size 192 192
 ```
diff --git a/dygraph/datasets/__init__.py b/dygraph/datasets/__init__.py
index 072a82f7409a9369d2c3b1bdba603527eac0bb7f..9a52eccf7454cd751a28ef32e7daf22b3d0164e2 100644
--- a/dygraph/datasets/__init__.py
+++ b/dygraph/datasets/__init__.py
@@ -12,5 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from .dataset import Dataset
 from .optic_disc_seg import OpticDiscSeg
 from .cityscapes import Cityscapes
diff --git a/dygraph/datasets/cityscapes.py b/dygraph/datasets/cityscapes.py
index 21f967820ec32aa37b1877ae7d583eb3e5aac674..0bfd43febbfe0d7dfbde5e25c4476fe34c8aac5d 100644
--- a/dygraph/datasets/cityscapes.py
+++ b/dygraph/datasets/cityscapes.py
@@ -14,8 +14,7 @@
 
 import os
 
-from paddle.fluid.io import Dataset
-
+from .dataset import Dataset
 from utils.download import download_file_and_uncompress
 
 DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
@@ -70,16 +69,3 @@ class Cityscapes(Dataset):
                     image_path = os.path.join(self.data_dir, items[0])
                     grt_path = os.path.join(self.data_dir, items[1])
                 self.file_list.append([image_path, grt_path])
-
-    def __getitem__(self, idx):
-        image_path, grt_path = self.file_list[idx]
-        im, im_info, label = self.transforms(im=image_path, label=grt_path)
-        if self.mode == 'train':
-            return im, label
-        elif self.mode == 'eval':
-            return im, label
-        if self.mode == 'test':
-            return im, im_info, image_path
-
-    def __len__(self):
-        return len(self.file_list)
diff --git a/dygraph/datasets/dataset.py b/dygraph/datasets/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..908e90b4f4159e997446d0da40374ccde79abf9b
--- /dev/null
+++ b/dygraph/datasets/dataset.py
@@ -0,0 +1,105 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import paddle.fluid as fluid
+import numpy as np
+from PIL import Image
+
+
+class Dataset(fluid.io.Dataset):
+    def __init__(self,
+                 data_dir,
+                 num_classes,
+                 train_list=None,
+                 val_list=None,
+                 test_list=None,
+                 separator=' ',
+                 transforms=None,
+                 mode='train'):
+        self.data_dir = data_dir
+        self.transforms = transforms
+        self.file_list = list()
+        self.mode = mode
+        self.num_classes = num_classes
+
+        if mode.lower() not in ['train', 'eval', 'test']:
+            raise Exception(
+                "mode should be 'train', 'eval' or 'test', but got {}.".format(
+                    mode))
+
+        if self.transforms is None:
+            raise Exception("transform is necessary, but it is None.")
+
+        self.data_dir = data_dir
+        if mode == 'train':
+            if train_list is None:
+                raise Exception(
+                    'When mode is "train", train_list is need, but it is None.')
+            elif not os.path.exists(train_list):
+                raise Exception(
+                    'train_list is not found: {}'.format(train_list))
+            else:
+                file_list = train_list
+        elif mode == 'eval':
+            if val_list is None:
+                raise Exception(
+                    'When mode is "eval", val_list is need, but it is None.')
+            elif not os.path.exists(val_list):
+                raise Exception('val_list is not found: {}'.format(val_list))
+            else:
+                file_list = val_list
+        else:
+            if test_list is None:
+                raise Exception(
+                    'When mode is "test", test_list is need, but it is None.')
+            elif not os.path.exists(test_list):
+                raise Exception('test_list is not found: {}'.format(test_list))
+            else:
+                file_list = test_list
+
+        with open(file_list, 'r') as f:
+            for line in f:
+                items = line.strip().split(separator)
+                if len(items) != 2:
+                    if mode == 'train' or mode == 'eval':
+                        raise Exception(
+                            "File list format incorrect! It should be"
+                            " image_name{}label_name\\n".format(separator))
+                    image_path = os.path.join(self.data_dir, items[0])
+                    grt_path = None
+                else:
+                    image_path = os.path.join(self.data_dir, items[0])
+                    grt_path = os.path.join(self.data_dir, items[1])
+                self.file_list.append([image_path, grt_path])
+
+    def __getitem__(self, idx):
+        image_path, grt_path = self.file_list[idx]
+        if self.mode == 'train':
+            im, im_info, label = self.transforms(im=image_path, label=grt_path)
+            return im, label
+        elif self.mode == 'eval':
+            im, im_info, _ = self.transforms(im=image_path)
+            im = im[np.newaxis, ...]
+            label = np.asarray(Image.open(grt_path))
+            label = label[np.newaxis, np.newaxis, :, :]
+            return im, im_info, label
+        if self.mode == 'test':
+            im, im_info, _ = self.transforms(im=image_path)
+            im = im[np.newaxis, ...]
+            return im, im_info, image_path
+
+    def __len__(self):
+        return len(self.file_list)
diff --git a/dygraph/datasets/optic_disc_seg.py b/dygraph/datasets/optic_disc_seg.py
index 0a321915e90c18e99e46d0e53473e695b1ec2317..9794949a0688e1abe29212a1cd92048b6ceab622 100644
--- a/dygraph/datasets/optic_disc_seg.py
+++ b/dygraph/datasets/optic_disc_seg.py
@@ -14,8 +14,7 @@
 
 import os
 
-from paddle.fluid.io import Dataset
-
+from .dataset import Dataset
 from utils.download import download_file_and_uncompress
 
 DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
@@ -70,16 +69,3 @@ class OpticDiscSeg(Dataset):
                     image_path = os.path.join(self.data_dir, items[0])
                     grt_path = os.path.join(self.data_dir, items[1])
                 self.file_list.append([image_path, grt_path])
-
-    def __getitem__(self, idx):
-        image_path, grt_path = self.file_list[idx]
-        im, im_info, label = self.transforms(im=image_path, label=grt_path)
-        if self.mode == 'train':
-            return im, label
-        elif self.mode == 'eval':
-            return im, label
-        if self.mode == 'test':
-            return im, im_info, image_path
-
-    def __len__(self):
-        return len(self.file_list)
diff --git a/dygraph/infer.py b/dygraph/infer.py
index af745a39b025e9e804c207989c939d454d7ff25f..0b25a48ff9c2c3ffbe9532d48c95564173364b2c 100644
--- a/dygraph/infer.py
+++ b/dygraph/infer.py
@@ -24,7 +24,7 @@ import tqdm
 
 from datasets import OpticDiscSeg, Cityscapes
 import transforms as T
-import models
+from models import MODELS
 import utils
 import utils.logging as logging
 from utils import get_environ_info
@@ -37,7 +37,8 @@ def parse_args():
     parser.add_argument(
         '--model_name',
         dest='model_name',
-        help="Model type for traing, which is one of ('UNet')",
+        help='Model type for testing, which is one of {}'.format(
+            str(list(MODELS.keys()))),
         type=str,
         default='UNet')
 
@@ -97,19 +98,20 @@ def infer(model, test_dataset=None, model_dir=None, save_dir='output'):
 
     logging.info("Start to predict...")
     for im, im_info, im_path in tqdm.tqdm(test_dataset):
-        im = im[np.newaxis, ...]
         im = to_variable(im)
         pred, _ = model(im, mode='test')
         pred = pred.numpy()
         pred = np.squeeze(pred).astype('uint8')
-        keys = list(im_info.keys())
-        for k in keys[::-1]:
-            if k == 'shape_before_resize':
-                h, w = im_info[k][0], im_info[k][1]
+        for info in im_info[::-1]:
+            if info[0] == 'resize':
+                h, w = info[1][0], info[1][1]
                 pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST)
-            elif k == 'shape_before_padding':
-                h, w = im_info[k][0], im_info[k][1]
+            elif info[0] == 'padding':
+                h, w = info[1][0], info[1][1]
                 pred = pred[0:h, 0:w]
+            else:
+                raise Exception("Unexpected info '{}' in im_info".format(
+                    info[0]))
 
         im_file = im_path.replace(test_dataset.data_dir, '')
         if im_file[0] == '/':
@@ -146,8 +148,11 @@ def main(args):
         test_transforms = T.Compose([T.Resize(args.input_size), T.Normalize()])
         test_dataset = dataset(transforms=test_transforms, mode='test')
 
-        if args.model_name == 'UNet':
-            model = models.UNet(num_classes=test_dataset.num_classes)
+        if args.model_name not in MODELS:
+            raise Exception(
+                '--model_name is invalid. it should be one of {}'.format(
+                    str(list(MODELS.keys()))))
+        model = MODELS[args.model_name](num_classes=test_dataset.num_classes)
 
         infer(
             model,
diff --git a/dygraph/models/__init__.py b/dygraph/models/__init__.py
index 792059698bdbc5f95acbd18a0f3cbc6b6ec769e5..0057d88f154f93542c00b0d3dd22a8e52005764c 100644
--- a/dygraph/models/__init__.py
+++ b/dygraph/models/__init__.py
@@ -13,3 +13,28 @@
 # limitations under the License.
 
 from .unet import UNet
+from .hrnet import *
+
+MODELS = {
+    "UNet": UNet,
+    "HRNet_W18_Small_V1": HRNet_W18_Small_V1,
+    "HRNet_W18_Small_V2": HRNet_W18_Small_V2,
+    "HRNet_W18": HRNet_W18,
+    "HRNet_W30": HRNet_W30,
+    "HRNet_W32": HRNet_W32,
+    "HRNet_W40": HRNet_W40,
+    "HRNet_W44": HRNet_W44,
+    "HRNet_W48": HRNet_W48,
+    "HRNet_W60": HRNet_W48,
+    "HRNet_W64": HRNet_W64,
+    "SE_HRNet_W18_Small_V1": SE_HRNet_W18_Small_V1,
+    "SE_HRNet_W18_Small_V2": SE_HRNet_W18_Small_V2,
+    "SE_HRNet_W18": SE_HRNet_W18,
+    "SE_HRNet_W30": SE_HRNet_W30,
+    "SE_HRNet_W32": SE_HRNet_W30,
+    "SE_HRNet_W40": SE_HRNet_W40,
+    "SE_HRNet_W44": SE_HRNet_W44,
+    "SE_HRNet_W48": SE_HRNet_W48,
+    "SE_HRNet_W60": SE_HRNet_W60,
+    "SE_HRNet_W64": SE_HRNet_W64
+}
diff --git a/dygraph/models/hrnet.py b/dygraph/models/hrnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..2dcf2ddad0aad6e83356fe884ad213660b6816f3
--- /dev/null
+++ b/dygraph/models/hrnet.py
@@ -0,0 +1,1063 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.param_attr import ParamAttr
+from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
+try:
+    from paddle.fluid.dygraph import SyncBatchNorm as BatchNorm
+except:
+    from paddle.fluid.dygraph import BatchNorm
+
+__all__ = [
+    "HRNet_W18_Small_V1", "HRNet_W18_Small_V2", "HRNet_W18", "HRNet_W30",
+    "HRNet_W32", "HRNet_W40", "HRNet_W44", "HRNet_W48", "HRNet_W60",
+    "HRNet_W64", "SE_HRNet_W18_Small_V1", "SE_HRNet_W18_Small_V2",
+    "SE_HRNet_W18", "SE_HRNet_W30", "SE_HRNet_W32", "SE_HRNet_W40",
+    "SE_HRNet_W44", "SE_HRNet_W48", "SE_HRNet_W60", "SE_HRNet_W64"
+]
+
+
+class HRNet(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_classes,
+                 stage1_num_modules=1,
+                 stage1_num_blocks=[4],
+                 stage1_num_channels=[64],
+                 stage2_num_modules=1,
+                 stage2_num_blocks=[4, 4],
+                 stage2_num_channels=[18, 36],
+                 stage3_num_modules=4,
+                 stage3_num_blocks=[4, 4, 4],
+                 stage3_num_channels=[18, 36, 72],
+                 stage4_num_modules=3,
+                 stage4_num_blocks=[4, 4, 4, 4],
+                 stage4_num_channels=[18, 36, 72, 144],
+                 has_se=False,
+                 ignore_index=255):
+        super(HRNet, self).__init__()
+
+        self.num_classes = num_classes
+        self.stage1_num_modules = stage1_num_modules
+        self.stage1_num_blocks = stage1_num_blocks
+        self.stage1_num_channels = stage1_num_channels
+        self.stage2_num_modules = stage2_num_modules
+        self.stage2_num_blocks = stage2_num_blocks
+        self.stage2_num_channels = stage2_num_channels
+        self.stage3_num_modules = stage3_num_modules
+        self.stage3_num_blocks = stage3_num_blocks
+        self.stage3_num_channels = stage3_num_channels
+        self.stage4_num_modules = stage4_num_modules
+        self.stage4_num_blocks = stage4_num_blocks
+        self.stage4_num_channels = stage4_num_channels
+        self.has_se = has_se
+        self.ignore_index = ignore_index
+        self.EPS = 1e-5
+
+        self.conv_layer1_1 = ConvBNLayer(
+            num_channels=3,
+            num_filters=64,
+            filter_size=3,
+            stride=2,
+            act='relu',
+            name="layer1_1")
+
+        self.conv_layer1_2 = ConvBNLayer(
+            num_channels=64,
+            num_filters=64,
+            filter_size=3,
+            stride=2,
+            act='relu',
+            name="layer1_2")
+
+        self.la1 = Layer1(
+            num_channels=64,
+            num_blocks=self.stage1_num_blocks[0],
+            num_filters=self.stage1_num_channels[0],
+            has_se=has_se,
+            name="layer2")
+
+        self.tr1 = TransitionLayer(
+            in_channels=[self.stage1_num_channels[0] * 4],
+            out_channels=self.stage2_num_channels,
+            name="tr1")
+
+        self.st2 = Stage(
+            num_channels=self.stage2_num_channels,
+            num_modules=self.stage2_num_modules,
+            num_blocks=self.stage2_num_blocks,
+            num_filters=self.stage2_num_channels,
+            has_se=self.has_se,
+            name="st2")
+
+        self.tr2 = TransitionLayer(
+            in_channels=self.stage2_num_channels,
+            out_channels=self.stage3_num_channels,
+            name="tr2")
+        self.st3 = Stage(
+            num_channels=self.stage3_num_channels,
+            num_modules=self.stage3_num_modules,
+            num_blocks=self.stage3_num_blocks,
+            num_filters=self.stage3_num_channels,
+            name="st3")
+
+        self.tr3 = TransitionLayer(
+            in_channels=self.stage3_num_channels,
+            out_channels=self.stage4_num_channels,
+            name="tr3")
+        self.st4 = Stage(
+            num_channels=self.stage4_num_channels,
+            num_modules=self.stage4_num_modules,
+            num_blocks=self.stage4_num_blocks,
+            num_filters=self.stage4_num_channels,
+            name="st4")
+
+        last_inp_channels = sum(self.stage4_num_channels)
+        self.conv_last_2 = ConvBNLayer(
+            num_channels=last_inp_channels,
+            num_filters=last_inp_channels,
+            filter_size=1,
+            stride=1,
+            name='conv-2')
+        self.conv_last_1 = Conv2D(
+            num_channels=last_inp_channels,
+            num_filters=self.num_classes,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            param_attr=ParamAttr(name='conv-1_weights'))
+
+    def forward(self, x, label=None, mode='train'):
+        input_shape = x.shape[2:]
+        conv1 = self.conv_layer1_1(x)
+        conv2 = self.conv_layer1_2(conv1)
+
+        la1 = self.la1(conv2)
+
+        tr1 = self.tr1([la1])
+        st2 = self.st2(tr1)
+
+        tr2 = self.tr2(st2)
+        st3 = self.st3(tr2)
+
+        tr3 = self.tr3(st3)
+        st4 = self.st4(tr3)
+
+        x0_h, x0_w = st4[0].shape[2:]
+        x1 = fluid.layers.resize_bilinear(st4[1], out_shape=(x0_h, x0_w))
+        x2 = fluid.layers.resize_bilinear(st4[2], out_shape=(x0_h, x0_w))
+        x3 = fluid.layers.resize_bilinear(st4[3], out_shape=(x0_h, x0_w))
+        x = fluid.layers.concat([st4[0], x1, x2, x3], axis=1)
+        x = self.conv_last_2(x)
+        logit = self.conv_last_1(x)
+        logit = fluid.layers.resize_bilinear(logit, input_shape)
+
+        if mode == 'train':
+            if label is None:
+                raise Exception('Label is need during training')
+            return self._get_loss(logit, label)
+        else:
+            score_map = fluid.layers.softmax(logit, axis=1)
+            score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1])
+            pred = fluid.layers.argmax(score_map, axis=3)
+            pred = fluid.layers.unsqueeze(pred, axes=[3])
+            return pred, score_map
+
+    def _get_loss(self, logit, label):
+        logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
+        label = fluid.layers.transpose(label, [0, 2, 3, 1])
+        mask = label != self.ignore_index
+        mask = fluid.layers.cast(mask, 'float32')
+        loss, probs = fluid.layers.softmax_with_cross_entropy(
+            logit,
+            label,
+            ignore_index=self.ignore_index,
+            return_softmax=True,
+            axis=-1)
+
+        loss = loss * mask
+        avg_loss = fluid.layers.mean(loss) / (
+            fluid.layers.mean(mask) + self.EPS)
+
+        label.stop_gradient = True
+        mask.stop_gradient = True
+        return avg_loss
+
+
+class ConvBNLayer(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 filter_size,
+                 stride=1,
+                 groups=1,
+                 act="relu",
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+
+        self._conv = Conv2D(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2,
+            groups=groups,
+            act=None,
+            param_attr=ParamAttr(name=name + "_weights"),
+            bias_attr=False)
+        bn_name = name + '_bn'
+        self._batch_norm = BatchNorm(
+            num_filters,
+            act=act,
+            param_attr=ParamAttr(name=bn_name + '_scale'),
+            bias_attr=ParamAttr(bn_name + '_offset'),
+            moving_mean_name=bn_name + '_mean',
+            moving_variance_name=bn_name + '_variance')
+
+    def forward(self, input):
+        y = self._conv(input)
+        y = self._batch_norm(y)
+        return y
+
+
+class Layer1(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 num_blocks,
+                 has_se=False,
+                 name=None):
+        super(Layer1, self).__init__()
+
+        self.bottleneck_block_list = []
+
+        for i in range(num_blocks):
+            bottleneck_block = self.add_sublayer(
+                "bb_{}_{}".format(name, i + 1),
+                BottleneckBlock(
+                    num_channels=num_channels if i == 0 else num_filters * 4,
+                    num_filters=num_filters,
+                    has_se=has_se,
+                    stride=1,
+                    downsample=True if i == 0 else False,
+                    name=name + '_' + str(i + 1)))
+            self.bottleneck_block_list.append(bottleneck_block)
+
+    def forward(self, input):
+        conv = input
+        for block_func in self.bottleneck_block_list:
+            conv = block_func(conv)
+        return conv
+
+
+class TransitionLayer(fluid.dygraph.Layer):
+    def __init__(self, in_channels, out_channels, name=None):
+        super(TransitionLayer, self).__init__()
+
+        num_in = len(in_channels)
+        num_out = len(out_channels)
+        self.conv_bn_func_list = []
+        for i in range(num_out):
+            residual = None
+            if i < num_in:
+                if in_channels[i] != out_channels[i]:
+                    residual = self.add_sublayer(
+                        "transition_{}_layer_{}".format(name, i + 1),
+                        ConvBNLayer(
+                            num_channels=in_channels[i],
+                            num_filters=out_channels[i],
+                            filter_size=3,
+                            name=name + '_layer_' + str(i + 1)))
+            else:
+                residual = self.add_sublayer(
+                    "transition_{}_layer_{}".format(name, i + 1),
+                    ConvBNLayer(
+                        num_channels=in_channels[-1],
+                        num_filters=out_channels[i],
+                        filter_size=3,
+                        stride=2,
+                        name=name + '_layer_' + str(i + 1)))
+            self.conv_bn_func_list.append(residual)
+
+    def forward(self, input):
+        outs = []
+        for idx, conv_bn_func in enumerate(self.conv_bn_func_list):
+            if conv_bn_func is None:
+                outs.append(input[idx])
+            else:
+                if idx < len(input):
+                    outs.append(conv_bn_func(input[idx]))
+                else:
+                    outs.append(conv_bn_func(input[-1]))
+        return outs
+
+
+class Branches(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_blocks,
+                 in_channels,
+                 out_channels,
+                 has_se=False,
+                 name=None):
+        super(Branches, self).__init__()
+
+        self.basic_block_list = []
+
+        for i in range(len(out_channels)):
+            self.basic_block_list.append([])
+            for j in range(num_blocks[i]):
+                in_ch = in_channels[i] if j == 0 else out_channels[i]
+                basic_block_func = self.add_sublayer(
+                    "bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1),
+                    BasicBlock(
+                        num_channels=in_ch,
+                        num_filters=out_channels[i],
+                        has_se=has_se,
+                        name=name + '_branch_layer_' + str(i + 1) + '_' +
+                        str(j + 1)))
+                self.basic_block_list[i].append(basic_block_func)
+
+    def forward(self, inputs):
+        outs = []
+        for idx, input in enumerate(inputs):
+            conv = input
+            for basic_block_func in self.basic_block_list[idx]:
+                conv = basic_block_func(conv)
+            outs.append(conv)
+        return outs
+
+
+class BottleneckBlock(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 has_se,
+                 stride=1,
+                 downsample=False,
+                 name=None):
+        super(BottleneckBlock, self).__init__()
+
+        self.has_se = has_se
+        self.downsample = downsample
+
+        self.conv1 = ConvBNLayer(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=1,
+            act="relu",
+            name=name + "_conv1",
+        )
+        self.conv2 = ConvBNLayer(
+            num_channels=num_filters,
+            num_filters=num_filters,
+            filter_size=3,
+            stride=stride,
+            act="relu",
+            name=name + "_conv2")
+        self.conv3 = ConvBNLayer(
+            num_channels=num_filters,
+            num_filters=num_filters * 4,
+            filter_size=1,
+            act=None,
+            name=name + "_conv3")
+
+        if self.downsample:
+            self.conv_down = ConvBNLayer(
+                num_channels=num_channels,
+                num_filters=num_filters * 4,
+                filter_size=1,
+                act=None,
+                name=name + "_downsample")
+
+        if self.has_se:
+            self.se = SELayer(
+                num_channels=num_filters * 4,
+                num_filters=num_filters * 4,
+                reduction_ratio=16,
+                name=name + '_fc')
+
+    def forward(self, input):
+        residual = input
+        conv1 = self.conv1(input)
+        conv2 = self.conv2(conv1)
+        conv3 = self.conv3(conv2)
+
+        if self.downsample:
+            residual = self.conv_down(input)
+
+        if self.has_se:
+            conv3 = self.se(conv3)
+
+        y = fluid.layers.elementwise_add(x=conv3, y=residual, act="relu")
+        return y
+
+
+class BasicBlock(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 stride=1,
+                 has_se=False,
+                 downsample=False,
+                 name=None):
+        super(BasicBlock, self).__init__()
+
+        self.has_se = has_se
+        self.downsample = downsample
+
+        self.conv1 = ConvBNLayer(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=3,
+            stride=stride,
+            act="relu",
+            name=name + "_conv1")
+        self.conv2 = ConvBNLayer(
+            num_channels=num_filters,
+            num_filters=num_filters,
+            filter_size=3,
+            stride=1,
+            act=None,
+            name=name + "_conv2")
+
+        if self.downsample:
+            self.conv_down = ConvBNLayer(
+                num_channels=num_channels,
+                num_filters=num_filters * 4,
+                filter_size=1,
+                act="relu",
+                name=name + "_downsample")
+
+        if self.has_se:
+            self.se = SELayer(
+                num_channels=num_filters,
+                num_filters=num_filters,
+                reduction_ratio=16,
+                name=name + '_fc')
+
+    def forward(self, input):
+        residual = input
+        conv1 = self.conv1(input)
+        conv2 = self.conv2(conv1)
+
+        if self.downsample:
+            residual = self.conv_down(input)
+
+        if self.has_se:
+            conv2 = self.se(conv2)
+
+        y = fluid.layers.elementwise_add(x=conv2, y=residual, act="relu")
+        return y
+
+
+class SELayer(fluid.dygraph.Layer):
+    def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
+        super(SELayer, self).__init__()
+
+        self.pool2d_gap = Pool2D(pool_type='avg', global_pooling=True)
+
+        self._num_channels = num_channels
+
+        med_ch = int(num_channels / reduction_ratio)
+        stdv = 1.0 / math.sqrt(num_channels * 1.0)
+        self.squeeze = Linear(
+            num_channels,
+            med_ch,
+            act="relu",
+            param_attr=ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=name + "_sqz_weights"),
+            bias_attr=ParamAttr(name=name + '_sqz_offset'))
+
+        stdv = 1.0 / math.sqrt(med_ch * 1.0)
+        self.excitation = Linear(
+            med_ch,
+            num_filters,
+            act="sigmoid",
+            param_attr=ParamAttr(
+                initializer=fluid.initializer.Uniform(-stdv, stdv),
+                name=name + "_exc_weights"),
+            bias_attr=ParamAttr(name=name + '_exc_offset'))
+
+    def forward(self, input):
+        pool = self.pool2d_gap(input)
+        pool = fluid.layers.reshape(pool, shape=[-1, self._num_channels])
+        squeeze = self.squeeze(pool)
+        excitation = self.excitation(squeeze)
+        excitation = fluid.layers.reshape(
+            excitation, shape=[-1, self._num_channels, 1, 1])
+        out = input * excitation
+        return out
+
+
+class Stage(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_modules,
+                 num_blocks,
+                 num_filters,
+                 has_se=False,
+                 multi_scale_output=True,
+                 name=None):
+        super(Stage, self).__init__()
+
+        self._num_modules = num_modules
+
+        self.stage_func_list = []
+        for i in range(num_modules):
+            if i == num_modules - 1 and not multi_scale_output:
+                stage_func = self.add_sublayer(
+                    "stage_{}_{}".format(name, i + 1),
+                    HighResolutionModule(
+                        num_channels=num_channels,
+                        num_blocks=num_blocks,
+                        num_filters=num_filters,
+                        has_se=has_se,
+                        multi_scale_output=False,
+                        name=name + '_' + str(i + 1)))
+            else:
+                stage_func = self.add_sublayer(
+                    "stage_{}_{}".format(name, i + 1),
+                    HighResolutionModule(
+                        num_channels=num_channels,
+                        num_blocks=num_blocks,
+                        num_filters=num_filters,
+                        has_se=has_se,
+                        name=name + '_' + str(i + 1)))
+
+            self.stage_func_list.append(stage_func)
+
+    def forward(self, input):
+        out = input
+        for idx in range(self._num_modules):
+            out = self.stage_func_list[idx](out)
+        return out
+
+
+class HighResolutionModule(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_blocks,
+                 num_filters,
+                 has_se=False,
+                 multi_scale_output=True,
+                 name=None):
+        super(HighResolutionModule, self).__init__()
+
+        self.branches_func = Branches(
+            num_blocks=num_blocks,
+            in_channels=num_channels,
+            out_channels=num_filters,
+            has_se=has_se,
+            name=name)
+
+        self.fuse_func = FuseLayers(
+            in_channels=num_filters,
+            out_channels=num_filters,
+            multi_scale_output=multi_scale_output,
+            name=name)
+
+    def forward(self, input):
+        out = self.branches_func(input)
+        out = self.fuse_func(out)
+        return out
+
+
+class FuseLayers(fluid.dygraph.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 multi_scale_output=True,
+                 name=None):
+        super(FuseLayers, self).__init__()
+
+        self._actual_ch = len(in_channels) if multi_scale_output else 1
+        self._in_channels = in_channels
+
+        self.residual_func_list = []
+        for i in range(self._actual_ch):
+            for j in range(len(in_channels)):
+                residual_func = None
+                if j > i:
+                    residual_func = self.add_sublayer(
+                        "residual_{}_layer_{}_{}".format(name, i + 1, j + 1),
+                        ConvBNLayer(
+                            num_channels=in_channels[j],
+                            num_filters=out_channels[i],
+                            filter_size=1,
+                            stride=1,
+                            act=None,
+                            name=name + '_layer_' + str(i + 1) + '_' +
+                            str(j + 1)))
+                    self.residual_func_list.append(residual_func)
+                elif j < i:
+                    pre_num_filters = in_channels[j]
+                    for k in range(i - j):
+                        if k == i - j - 1:
+                            residual_func = self.add_sublayer(
+                                "residual_{}_layer_{}_{}_{}".format(
+                                    name, i + 1, j + 1, k + 1),
+                                ConvBNLayer(
+                                    num_channels=pre_num_filters,
+                                    num_filters=out_channels[i],
+                                    filter_size=3,
+                                    stride=2,
+                                    act=None,
+                                    name=name + '_layer_' + str(i + 1) + '_' +
+                                    str(j + 1) + '_' + str(k + 1)))
+                            pre_num_filters = out_channels[i]
+                        else:
+                            residual_func = self.add_sublayer(
+                                "residual_{}_layer_{}_{}_{}".format(
+                                    name, i + 1, j + 1, k + 1),
+                                ConvBNLayer(
+                                    num_channels=pre_num_filters,
+                                    num_filters=out_channels[j],
+                                    filter_size=3,
+                                    stride=2,
+                                    act="relu",
+                                    name=name + '_layer_' + str(i + 1) + '_' +
+                                    str(j + 1) + '_' + str(k + 1)))
+                            pre_num_filters = out_channels[j]
+                        self.residual_func_list.append(residual_func)
+
+    def forward(self, input):
+        outs = []
+        residual_func_idx = 0
+        for i in range(self._actual_ch):
+            residual = input[i]
+            for j in range(len(self._in_channels)):
+                if j > i:
+                    y = self.residual_func_list[residual_func_idx](input[j])
+                    residual_func_idx += 1
+
+                    y = fluid.layers.resize_nearest(input=y, scale=2**(j - i))
+                    residual = fluid.layers.elementwise_add(
+                        x=residual, y=y, act=None)
+                elif j < i:
+                    y = input[j]
+                    for k in range(i - j):
+                        y = self.residual_func_list[residual_func_idx](y)
+                        residual_func_idx += 1
+
+                    residual = fluid.layers.elementwise_add(
+                        x=residual, y=y, act=None)
+
+            layer_helper = LayerHelper(self.full_name(), act='relu')
+            residual = layer_helper.append_activation(residual)
+            outs.append(residual)
+
+        return outs
+
+
+class LastClsOut(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_channel_list,
+                 has_se,
+                 num_filters_list=[32, 64, 128, 256],
+                 name=None):
+        super(LastClsOut, self).__init__()
+
+        self.func_list = []
+        for idx in range(len(num_channel_list)):
+            func = self.add_sublayer(
+                "conv_{}_conv_{}".format(name, idx + 1),
+                BottleneckBlock(
+                    num_channels=num_channel_list[idx],
+                    num_filters=num_filters_list[idx],
+                    has_se=has_se,
+                    downsample=True,
+                    name=name + 'conv_' + str(idx + 1)))
+            self.func_list.append(func)
+
+    def forward(self, inputs):
+        outs = []
+        for idx, input in enumerate(inputs):
+            out = self.func_list[idx](input)
+            outs.append(out)
+        return outs
+
+
+def HRNet_W18_Small_V1(num_classes):
+    model = HRNet(
+        num_classes=num_classes,
+        stage1_num_modules=1,
+        stage1_num_blocks=[1],
+        stage1_num_channels=[32],
+        stage2_num_modules=1,
+        stage2_num_blocks=[2, 2],
+        stage2_num_channels=[16, 32],
+        stage3_num_modules=1,
+        stage3_num_blocks=[2, 2, 2],
+        stage3_num_channels=[16, 32, 64],
+        stage4_num_modules=1,
+        stage4_num_blocks=[2, 2, 2, 2],
+        stage4_num_channels=[16, 32, 64, 128])
+    return model
+
+
+def HRNet_W18_Small_V2(num_classes):
+    model = HRNet(
+        num_classes=num_classes,
+        stage1_num_modules=1,
+        stage1_num_blocks=[2],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[2, 2],
+        stage2_num_channels=[18, 36],
+        stage3_num_modules=1,
+        stage3_num_blocks=[2, 2, 2],
+        stage3_num_channels=[18, 36, 72],
+        stage4_num_modules=1,
+        stage4_num_blocks=[2, 2, 2, 2],
+        stage4_num_channels=[18, 36, 72, 144])
+    return model
+
+
+def HRNet_W18(num_classes):
+    model = HRNet(
+        num_classes=num_classes,
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[18, 36],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[18, 36, 72],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[18, 36, 72, 144])
+    return model
+
+
+def HRNet_W30(num_classes):
+    model = HRNet(
+        num_classes=num_classes,
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[30, 60],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[30, 60, 120],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[30, 60, 120, 240])
+    return model
+
+
+def HRNet_W32(num_classes):
+    model = HRNet(
+        num_classes=num_classes,
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[32, 64],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[32, 64, 128],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[32, 64, 128, 256])
+    return model
+
+
+def HRNet_W40(num_classes):
+    model = HRNet(
+        num_classes=num_classes,
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[40, 80],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[40, 80, 160],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[40, 80, 160, 320])
+    return model
+
+
+def HRNet_W44(num_classes):
+    model = HRNet(
+        num_classes=num_classes,
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[44, 88],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[44, 88, 176],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[44, 88, 176, 352])
+    return model
+
+
+def HRNet_W48(num_classes):
+    model = HRNet(
+        num_classes=num_classes,
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[48, 96],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[48, 96, 192],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[48, 96, 192, 384])
+    return model
+
+
+def HRNet_W60(num_classes):
+    model = HRNet(
+        num_classes=num_classes,
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[60, 120],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[60, 120, 240],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[60, 120, 240, 480])
+    return model
+
+
+def HRNet_W64(num_classes):
+    model = HRNet(
+        num_classes=num_classes,
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[64, 128],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[64, 128, 256],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[64, 128, 256, 512])
+    return model
+
+
+def SE_HRNet_W18_Small_V1(num_classes):
+    model = HRNet(
+        num_classes=num_classes,
+        stage1_num_modules=1,
+        stage1_num_blocks=[1],
+        stage1_num_channels=[32],
+        stage2_num_modules=1,
+        stage2_num_blocks=[2, 2],
+        stage2_num_channels=[16, 32],
+        stage3_num_modules=1,
+        stage3_num_blocks=[2, 2, 2],
+        stage3_num_channels=[16, 32, 64],
+        stage4_num_modules=1,
+        stage4_num_blocks=[2, 2, 2, 2],
+        stage4_num_channels=[16, 32, 64, 128],
+        has_se=True)
+    return model
+
+
+def SE_HRNet_W18_Small_V2(num_classes):
+    model = HRNet(
+        num_classes=num_classes,
+        stage1_num_modules=1,
+        stage1_num_blocks=[2],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[2, 2],
+        stage2_num_channels=[18, 36],
+        stage3_num_modules=1,
+        stage3_num_blocks=[2, 2, 2],
+        stage3_num_channels=[18, 36, 72],
+        stage4_num_modules=1,
+        stage4_num_blocks=[2, 2, 2, 2],
+        stage4_num_channels=[18, 36, 72, 144],
+        has_se=True)
+    return model
+
+
+def SE_HRNet_W18(num_classes):
+    model = HRNet(
+        num_classes=num_classes,
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[18, 36],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[18, 36, 72],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[18, 36, 72, 144],
+        has_se=True)
+    return model
+
+
+def SE_HRNet_W30(num_classes):
+    model = HRNet(
+        num_classes=num_classes,
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[30, 60],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[30, 60, 120],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[30, 60, 120, 240],
+        has_se=True)
+    return model
+
+
+def SE_HRNet_W32(num_classes):
+    model = HRNet(
+        num_classes=num_classes,
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[32, 64],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[32, 64, 128],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[32, 64, 128, 256],
+        has_se=True)
+    return model
+
+
+def SE_HRNet_W40(num_classes):
+    model = HRNet(
+        num_classes=num_classes,
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[40, 80],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[40, 80, 160],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[40, 80, 160, 320],
+        has_se=True)
+    return model
+
+
+def SE_HRNet_W44(num_classes):
+    model = HRNet(
+        num_classes=num_classes,
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[44, 88],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[44, 88, 176],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[44, 88, 176, 352],
+        has_se=True)
+    return model
+
+
+def SE_HRNet_W48(num_classes):
+    model = HRNet(
+        num_classes=num_classes,
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[48, 96],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[48, 96, 192],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[48, 96, 192, 384],
+        has_se=True)
+    return model
+
+
+def SE_HRNet_W60(num_classes):
+    model = HRNet(
+        num_classes=num_classes,
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[60, 120],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[60, 120, 240],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[60, 120, 240, 480],
+        has_se=True)
+    return model
+
+
+def SE_HRNet_W64(num_classes):
+    model = HRNet(
+        num_classes=num_classes,
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[64, 128],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[64, 128, 256],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[64, 128, 256, 512],
+        has_se=True)
+    return model
diff --git a/dygraph/models/unet.py b/dygraph/models/unet.py
index b55e3614b6988a0102eb3e6f17093e59673eae70..970936d05e0fb67a58973879ffceef5f02816495 100644
--- a/dygraph/models/unet.py
+++ b/dygraph/models/unet.py
@@ -13,7 +13,11 @@
 # limitations under the License.
 
 import paddle.fluid as fluid
-from paddle.fluid.dygraph import Conv2D, BatchNorm, Pool2D
+from paddle.fluid.dygraph import Conv2D, Pool2D
+try:
+    from paddle.fluid.dygraph import SyncBatchNorm as BatchNorm
+except:
+    from paddle.fluid.dygraph import BatchNorm
 
 
 class UNet(fluid.dygraph.Layer):
@@ -39,6 +43,8 @@ class UNet(fluid.dygraph.Layer):
             return pred, score_map
 
     def _get_loss(self, logit, label):
+        logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
+        label = fluid.layers.transpose(label, [0, 2, 3, 1])
         mask = label != self.ignore_index
         mask = fluid.layers.cast(mask, 'float32')
         loss, probs = fluid.layers.softmax_with_cross_entropy(
diff --git a/dygraph/train.py b/dygraph/train.py
index 52aa032c454a5cc72ddf9cc3b27cee5b415511eb..70b61aaf839af9e4a6d44046037e7db703a8abcc 100644
--- a/dygraph/train.py
+++ b/dygraph/train.py
@@ -22,7 +22,7 @@ from paddle.incubate.hapi.distributed import DistributedBatchSampler
 
 from datasets import OpticDiscSeg, Cityscapes
 import transforms as T
-import models
+from models import MODELS
 import utils.logging as logging
 from utils import get_environ_info
 from utils import load_pretrained_model
@@ -38,7 +38,8 @@ def parse_args():
     parser.add_argument(
         '--model_name',
         dest='model_name',
-        help="Model type for traing, which is one of ('UNet')",
+        help='Model type for training, which is one of {}'.format(
+            str(list(MODELS.keys()))),
         type=str,
         default='UNet')
 
@@ -181,7 +182,7 @@ def train(model,
     total_steps = steps_per_epoch * (num_epochs - start_epoch)
     num_steps = 0
     best_mean_iou = -1.0
-    best_model_epoch = 1
+    best_model_epoch = -1
     for epoch in range(start_epoch, num_epochs):
         for step, data in enumerate(loader):
             images = data[0]
@@ -229,10 +230,8 @@ def train(model,
                 mean_iou, mean_acc = evaluate(
                     model,
                     eval_dataset,
-                    places=places,
                     model_dir=current_save_dir,
                     num_classes=num_classes,
-                    batch_size=batch_size,
                     ignore_index=ignore_index,
                     epoch_id=epoch + 1)
                 if mean_iou > best_mean_iou:
@@ -241,9 +240,9 @@ def train(model,
                     best_model_dir = os.path.join(save_dir, "best_model")
                     fluid.save_dygraph(model.state_dict(),
                                        os.path.join(best_model_dir, 'model'))
-                    logging.info(
-                        'Current evaluated best model in eval_dataset is epoch_{}, miou={:4f}'
-                        .format(best_model_epoch, best_mean_iou))
+                logging.info(
+                    'Current evaluated best model in eval_dataset is epoch_{}, miou={:4f}'
+                    .format(best_model_epoch, best_mean_iou))
 
                 if use_vdl:
                     log_writer.add_scalar('Evaluate/mean_iou', mean_iou,
@@ -286,9 +285,11 @@ def main(args):
                  T.Normalize()])
             eval_dataset = dataset(transforms=eval_transforms, mode='eval')
 
-        if args.model_name == 'UNet':
-            model = models.UNet(
-                num_classes=train_dataset.num_classes, ignore_index=255)
+        if args.model_name not in MODELS:
+            raise Exception(
+                '--model_name is invalid. it should be one of {}'.format(
+                    str(list(MODELS.keys()))))
+        model = MODELS[args.model_name](num_classes=train_dataset.num_classes)
 
         # Creat optimizer
         # todo, may less one than len(loader)
diff --git a/dygraph/transforms/transforms.py b/dygraph/transforms/transforms.py
index 38c3be18a2ae885bfa6238304a614935401a6330..935a2c0f8670eaa24b148844aa727efe6942e666 100644
--- a/dygraph/transforms/transforms.py
+++ b/dygraph/transforms/transforms.py
@@ -1,3 +1,4 @@
+# coding: utf8
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -12,27 +13,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .functional import *
 import random
+from collections import OrderedDict
+
 import numpy as np
 from PIL import Image
 import cv2
-from collections import OrderedDict
 
+from .functional import *
 
-class Compose:
-    """根据数据预处理/增强算子对输入数据进行操作。
-       所有操作的输入图像流形状均是[H, W, C]，其中H为图像高，W为图像宽，C为图像通道数。
-
-    Args:
-        transforms (list): 数据预处理/增强算子。
-        to_rgb (bool): 是否转化为rgb通道格式
-
-    Raises:
-        TypeError: transforms不是list对象
-        ValueError: transforms元素个数小于1。
 
-    """
+class Compose:
     def __init__(self, transforms, to_rgb=True):
         if not isinstance(transforms, list):
             raise TypeError('The transforms must be a list!')
@@ -43,20 +34,8 @@ class Compose:
         self.to_rgb = to_rgb
 
     def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (str/np.ndarray): 图像路径/图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息，dict中的字段如下：
-                - shape_before_resize (tuple): 图像resize之前的大小（h, w）。
-                - shape_before_padding (tuple): 图像padding之前的大小（h, w）。
-            label (str/np.ndarray): 标注图像路径/标注图像np.ndarray数据。
-
-        Returns:
-            tuple: 根据网络所需字段所组成的tuple；字段由transforms中的最后一个数据预处理操作决定。
-        """
-
         if im_info is None:
-            im_info = dict()
+            im_info = list()
         if isinstance(im, str):
             im = cv2.imread(im).astype('float32')
         if isinstance(label, str):
@@ -80,27 +59,10 @@ class Compose:
 
 
 class RandomHorizontalFlip:
-    """以一定的概率对图像进行水平翻转。当存在标注图像时，则同步进行翻转。
-
-    Args:
-        prob (float): 随机水平翻转的概率。默认值为0.5。
-
-    """
     def __init__(self, prob=0.5):
         self.prob = prob
 
     def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-        """
         if random.random() < self.prob:
             im = horizontal_flip(im)
             if label is not None:
@@ -112,26 +74,10 @@ class RandomHorizontalFlip:
 
 
 class RandomVerticalFlip:
-    """以一定的概率对图像进行垂直翻转。当存在标注图像时，则同步进行翻转。
-
-    Args:
-        prob (float): 随机垂直翻转的概率。默认值为0.1。
-    """
     def __init__(self, prob=0.1):
         self.prob = prob
 
     def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-        """
         if random.random() < self.prob:
             im = vertical_flip(im)
             if label is not None:
@@ -143,25 +89,6 @@ class RandomVerticalFlip:
 
 
 class Resize:
-    """调整图像大小（resize）。
-
-    - 当目标大小（target_size）类型为int时，根据插值方式，
-      将图像resize为[target_size, target_size]。
-    - 当目标大小（target_size）类型为list或tuple时，根据插值方式，
-      将图像resize为target_size。
-    注意：当插值方式为“RANDOM”时，则随机选取一种插值方式进行resize。
-
-    Args:
-        target_size (int/list/tuple): 短边目标长度。默认为608。
-        interp (str): resize的插值方式，与opencv的插值方式对应，取值范围为
-            ['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM']。默认为"LINEAR"。
-
-    Raises:
-        TypeError: 形参数据类型不满足需求。
-        ValueError: 插值方式不在['NEAREST', 'LINEAR', 'CUBIC',
-                    'AREA', 'LANCZOS4', 'RANDOM']中。
-    """
-
     # The interpolation mode
     interp_dict = {
         'NEAREST': cv2.INTER_NEAREST,
@@ -189,26 +116,9 @@ class Resize:
         self.target_size = target_size
 
     def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict, 可选): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-                其中，im_info跟新字段为：
-                    -shape_before_resize (tuple): 保存resize之前图像的形状(h, w）。
-
-        Raises:
-            TypeError: 形参数据类型不满足需求。
-            ValueError: 数据长度不匹配。
-        """
         if im_info is None:
-            im_info = OrderedDict()
-        im_info['shape_before_resize'] = im.shape[:2]
+            im_info = list()
+        im_info.append(('resize', im.shape[:2]))
         if not isinstance(im, np.ndarray):
             raise TypeError("Resize: image type is not numpy.")
         if len(im.shape) != 3:
@@ -228,32 +138,14 @@ class Resize:
 
 
 class ResizeByLong:
-    """对图像长边resize到固定值，短边按比例进行缩放。当存在标注图像时，则同步进行处理。
-
-    Args:
-        long_size (int): resize后图像的长边大小。
-    """
     def __init__(self, long_size):
         self.long_size = long_size
 
     def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-                其中，im_info新增字段为：
-                    -shape_before_resize (tuple): 保存resize之前图像的形状(h, w）。
-        """
         if im_info is None:
-            im_info = OrderedDict()
+            im_info = list()
 
-        im_info['shape_before_resize'] = im.shape[:2]
+        im_info.append(('resize', im.shape[:2]))
         im = resize_long(im, self.long_size)
         if label is not None:
             label = resize_long(label, self.long_size, cv2.INTER_NEAREST)
@@ -265,15 +157,6 @@ class ResizeByLong:
 
 
 class ResizeRangeScaling:
-    """对图像长边随机resize到指定范围内，短边按比例进行缩放。当存在标注图像时，则同步进行处理。
-
-    Args:
-        min_value (int): 图像长边resize后的最小值。默认值400。
-        max_value (int): 图像长边resize后的最大值。默认值600。
-
-    Raises:
-        ValueError: min_value大于max_value
-    """
     def __init__(self, min_value=400, max_value=600):
         if min_value > max_value:
             raise ValueError('min_value must be less than max_value, '
@@ -283,17 +166,6 @@ class ResizeRangeScaling:
         self.max_value = max_value
 
     def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-        """
         if self.min_value == self.max_value:
             random_size = self.max_value
         else:
@@ -310,17 +182,6 @@ class ResizeRangeScaling:
 
 
 class ResizeStepScaling:
-    """对图像按照某一个比例resize，这个比例以scale_step_size为步长
-    在[min_scale_factor, max_scale_factor]随机变动。当存在标注图像时，则同步进行处理。
-
-    Args:
-        min_scale_factor（float), resize最小尺度。默认值0.75。
-        max_scale_factor (float), resize最大尺度。默认值1.25。
-        scale_step_size (float), resize尺度范围间隔。默认值0.25。
-
-    Raises:
-        ValueError: min_scale_factor大于max_scale_factor
-    """
     def __init__(self,
                  min_scale_factor=0.75,
                  max_scale_factor=1.25,
@@ -335,17 +196,6 @@ class ResizeStepScaling:
         self.scale_step_size = scale_step_size
 
     def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-        """
         if self.min_scale_factor == self.max_scale_factor:
             scale_factor = self.min_scale_factor
 
@@ -375,17 +225,6 @@ class ResizeStepScaling:
 
 
 class Normalize:
-    """对图像进行标准化。
-    1.尺度缩放到 [0,1]。
-    2.对图像进行减均值除以标准差操作。
-
-    Args:
-        mean (list): 图像数据集的均值。默认值[0.5, 0.5, 0.5]。
-        std (list): 图像数据集的标准差。默认值[0.5, 0.5, 0.5]。
-
-    Raises:
-        ValueError: mean或std不是list对象。std包含0。
-    """
     def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]):
         self.mean = mean
         self.std = std
@@ -396,18 +235,6 @@ class Normalize:
             raise ValueError('{}: std is invalid!'.format(self))
 
     def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-
-         Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-        """
-
         mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
         std = np.array(self.std)[np.newaxis, np.newaxis, :]
         im = normalize(im, mean, std)
@@ -419,18 +246,6 @@ class Normalize:
 
 
 class Padding:
-    """对图像或标注图像进行padding，padding方向为右和下。
-    根据提供的值对图像或标注图像进行padding操作。
-
-    Args:
-        target_size (int|list|tuple): padding后图像的大小。
-        im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
-        label_padding_value (int): 标注图像padding的值。默认值为255。
-
-    Raises:
-        TypeError: target_size不是int|list|tuple。
-        ValueError:  target_size为list|tuple时元素个数不等于2。
-    """
     def __init__(self,
                  target_size,
                  im_padding_value=[127.5, 127.5, 127.5],
@@ -449,25 +264,9 @@ class Padding:
         self.label_padding_value = label_padding_value
 
     def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-                其中，im_info新增字段为：
-                    -shape_before_padding (tuple): 保存padding之前图像的形状(h, w）。
-
-        Raises:
-            ValueError: 输入图像im或label的形状大于目标值
-        """
         if im_info is None:
-            im_info = OrderedDict()
-        im_info['shape_before_padding'] = im.shape[:2]
+            im_info = list()
+        im_info.append(('padding', im.shape[:2]))
 
         im_height, im_width = im.shape[0], im.shape[1]
         if isinstance(self.target_size, int):
@@ -483,21 +282,23 @@ class Padding:
                 'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})'
                 .format(im_width, im_height, target_width, target_height))
         else:
-            im = cv2.copyMakeBorder(im,
-                                    0,
-                                    pad_height,
-                                    0,
-                                    pad_width,
-                                    cv2.BORDER_CONSTANT,
-                                    value=self.im_padding_value)
+            im = cv2.copyMakeBorder(
+                im,
+                0,
+                pad_height,
+                0,
+                pad_width,
+                cv2.BORDER_CONSTANT,
+                value=self.im_padding_value)
             if label is not None:
-                label = cv2.copyMakeBorder(label,
-                                           0,
-                                           pad_height,
-                                           0,
-                                           pad_width,
-                                           cv2.BORDER_CONSTANT,
-                                           value=self.label_padding_value)
+                label = cv2.copyMakeBorder(
+                    label,
+                    0,
+                    pad_height,
+                    0,
+                    pad_width,
+                    cv2.BORDER_CONSTANT,
+                    value=self.label_padding_value)
         if label is None:
             return (im, im_info)
         else:
@@ -505,17 +306,6 @@ class Padding:
 
 
 class RandomPaddingCrop:
-    """对图像和标注图进行随机裁剪，当所需要的裁剪尺寸大于原图时，则进行padding操作。
-
-    Args:
-        crop_size (int|list|tuple): 裁剪图像大小。默认为512。
-        im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
-        label_padding_value (int): 标注图像padding的值。默认值为255。
-
-    Raises:
-        TypeError: crop_size不是int/list/tuple。
-        ValueError:  target_size为list/tuple时元素个数不等于2。
-    """
     def __init__(self,
                  crop_size=512,
                  im_padding_value=[127.5, 127.5, 127.5],
@@ -534,17 +324,6 @@ class RandomPaddingCrop:
         self.label_padding_value = label_padding_value
 
     def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-
-         Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-        """
         if isinstance(self.crop_size, int):
             crop_width = self.crop_size
             crop_height = self.crop_size
@@ -564,21 +343,23 @@ class RandomPaddingCrop:
             pad_height = max(crop_height - img_height, 0)
             pad_width = max(crop_width - img_width, 0)
             if (pad_height > 0 or pad_width > 0):
-                im = cv2.copyMakeBorder(im,
-                                        0,
-                                        pad_height,
-                                        0,
-                                        pad_width,
-                                        cv2.BORDER_CONSTANT,
-                                        value=self.im_padding_value)
+                im = cv2.copyMakeBorder(
+                    im,
+                    0,
+                    pad_height,
+                    0,
+                    pad_width,
+                    cv2.BORDER_CONSTANT,
+                    value=self.im_padding_value)
                 if label is not None:
-                    label = cv2.copyMakeBorder(label,
-                                               0,
-                                               pad_height,
-                                               0,
-                                               pad_width,
-                                               cv2.BORDER_CONSTANT,
-                                               value=self.label_padding_value)
+                    label = cv2.copyMakeBorder(
+                        label,
+                        0,
+                        pad_height,
+                        0,
+                        pad_width,
+                        cv2.BORDER_CONSTANT,
+                        value=self.label_padding_value)
                 img_height = im.shape[0]
                 img_width = im.shape[1]
 
@@ -586,11 +367,11 @@ class RandomPaddingCrop:
                 h_off = np.random.randint(img_height - crop_height + 1)
                 w_off = np.random.randint(img_width - crop_width + 1)
 
-                im = im[h_off:(crop_height + h_off), w_off:(w_off +
-                                                            crop_width), :]
+                im = im[h_off:(crop_height + h_off), w_off:(
+                    w_off + crop_width), :]
                 if label is not None:
-                    label = label[h_off:(crop_height +
-                                         h_off), w_off:(w_off + crop_width)]
+                    label = label[h_off:(crop_height + h_off), w_off:(
+                        w_off + crop_width)]
         if label is None:
             return (im, im_info)
         else:
@@ -598,26 +379,10 @@ class RandomPaddingCrop:
 
 
 class RandomBlur:
-    """以一定的概率对图像进行高斯模糊。
-
-    Args：
-        prob (float): 图像模糊概率。默认为0.1。
-    """
     def __init__(self, prob=0.1):
         self.prob = prob
 
     def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-        """
         if self.prob <= 0:
             n = 0
         elif self.prob >= 1:
@@ -640,16 +405,6 @@ class RandomBlur:
 
 
 class RandomRotation:
-    """对图像进行随机旋转。
-    在不超过最大旋转角度的情况下，图像进行随机旋转，当存在标注图像时，同步进行，
-    并对旋转后的图像和标注图像进行相应的padding。
-
-    Args:
-        max_rotation (float): 最大旋转角度。默认为15度。
-        im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
-        label_padding_value (int): 标注图像padding的值。默认为255。
-
-    """
     def __init__(self,
                  max_rotation=15,
                  im_padding_value=[127.5, 127.5, 127.5],
@@ -659,17 +414,6 @@ class RandomRotation:
         self.label_padding_value = label_padding_value
 
     def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-        """
         if self.max_rotation > 0:
             (h, w) = im.shape[:2]
             do_rotation = np.random.uniform(-self.max_rotation,
@@ -686,18 +430,20 @@ class RandomRotation:
             r[0, 2] += (nw / 2) - cx
             r[1, 2] += (nh / 2) - cy
             dsize = (nw, nh)
-            im = cv2.warpAffine(im,
-                                r,
-                                dsize=dsize,
-                                flags=cv2.INTER_LINEAR,
-                                borderMode=cv2.BORDER_CONSTANT,
-                                borderValue=self.im_padding_value)
-            label = cv2.warpAffine(label,
-                                   r,
-                                   dsize=dsize,
-                                   flags=cv2.INTER_NEAREST,
-                                   borderMode=cv2.BORDER_CONSTANT,
-                                   borderValue=self.label_padding_value)
+            im = cv2.warpAffine(
+                im,
+                r,
+                dsize=dsize,
+                flags=cv2.INTER_LINEAR,
+                borderMode=cv2.BORDER_CONSTANT,
+                borderValue=self.im_padding_value)
+            label = cv2.warpAffine(
+                label,
+                r,
+                dsize=dsize,
+                flags=cv2.INTER_NEAREST,
+                borderMode=cv2.BORDER_CONSTANT,
+                borderValue=self.label_padding_value)
 
         if label is None:
             return (im, im_info)
@@ -706,29 +452,11 @@ class RandomRotation:
 
 
 class RandomScaleAspect:
-    """裁剪并resize回原始尺寸的图像和标注图像。
-    按照一定的面积比和宽高比对图像进行裁剪，并reszie回原始图像的图像，当存在标注图时，同步进行。
-
-    Args：
-        min_scale (float)：裁取图像占原始图像的面积比，取值[0，1]，为0时则返回原图。默认为0.5。
-        aspect_ratio (float): 裁取图像的宽高比范围，非负值，为0时返回原图。默认为0.33。
-    """
     def __init__(self, min_scale=0.5, aspect_ratio=0.33):
         self.min_scale = min_scale
         self.aspect_ratio = aspect_ratio
 
     def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-        """
         if self.min_scale != 0 and self.aspect_ratio != 0:
             img_height = im.shape[0]
             img_width = im.shape[1]
@@ -751,10 +479,12 @@ class RandomScaleAspect:
 
                     im = im[h1:(h1 + dh), w1:(w1 + dw), :]
                     label = label[h1:(h1 + dh), w1:(w1 + dw)]
-                    im = cv2.resize(im, (img_width, img_height),
-                                    interpolation=cv2.INTER_LINEAR)
-                    label = cv2.resize(label, (img_width, img_height),
-                                       interpolation=cv2.INTER_NEAREST)
+                    im = cv2.resize(
+                        im, (img_width, img_height),
+                        interpolation=cv2.INTER_LINEAR)
+                    label = cv2.resize(
+                        label, (img_width, img_height),
+                        interpolation=cv2.INTER_NEAREST)
                     break
         if label is None:
             return (im, im_info)
@@ -763,21 +493,6 @@ class RandomScaleAspect:
 
 
 class RandomDistort:
-    """对图像进行随机失真。
-
-    1. 对变换的操作顺序进行随机化操作。
-    2. 按照1中的顺序以一定的概率对图像进行随机像素内容变换。
-
-    Args:
-        brightness_range (float): 明亮度因子的范围。默认为0.5。
-        brightness_prob (float): 随机调整明亮度的概率。默认为0.5。
-        contrast_range (float): 对比度因子的范围。默认为0.5。
-        contrast_prob (float): 随机调整对比度的概率。默认为0.5。
-        saturation_range (float): 饱和度因子的范围。默认为0.5。
-        saturation_prob (float): 随机调整饱和度的概率。默认为0.5。
-        hue_range (int): 色调因子的范围。默认为18。
-        hue_prob (float): 随机调整色调的概率。默认为0.5。
-    """
     def __init__(self,
                  brightness_range=0.5,
                  brightness_prob=0.5,
@@ -797,17 +512,6 @@ class RandomDistort:
         self.hue_prob = hue_prob
 
     def __call__(self, im, im_info=None, label=None):
-        """
-        Args:
-            im (np.ndarray): 图像np.ndarray数据。
-            im_info (dict): 存储与图像相关的信息。
-            label (np.ndarray): 标注图像np.ndarray数据。
-
-        Returns:
-            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
-                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
-                存储与图像相关信息的字典和标注图像np.ndarray数据。
-        """
         brightness_lower = 1 - self.brightness_range
         brightness_upper = 1 + self.brightness_range
         contrast_lower = 1 - self.contrast_range
diff --git a/dygraph/utils/utils.py b/dygraph/utils/utils.py
index 46e204dd2e91f319c788eb43ca50602308ce1954..fa995d27af3f78e97bc06d586fa7bb2ecf439f83 100644
--- a/dygraph/utils/utils.py
+++ b/dygraph/utils/utils.py
@@ -52,7 +52,11 @@ def load_pretrained_model(model, pretrained_model):
         logging.info('Load pretrained model from {}'.format(pretrained_model))
         if os.path.exists(pretrained_model):
             ckpt_path = os.path.join(pretrained_model, 'model')
-            para_state_dict, _ = fluid.load_dygraph(ckpt_path)
+            try:
+                para_state_dict, _ = fluid.load_dygraph(ckpt_path)
+            except:
+                para_state_dict = fluid.load_program_state(pretrained_model)
+
             model_state_dict = model.state_dict()
             keys = model_state_dict.keys()
             num_params_loaded = 0
diff --git a/dygraph/val.py b/dygraph/val.py
index 77965f3f8a040d1bfa2f1c6cfaa3a838ddc937c7..ca36a6fe1ca169d30f2dbd06ff58da62b507ff4f 100644
--- a/dygraph/val.py
+++ b/dygraph/val.py
@@ -16,8 +16,10 @@ import argparse
 import os
 import math
 
-from paddle.fluid.dygraph.base import to_variable
 import numpy as np
+import tqdm
+import cv2
+from paddle.fluid.dygraph.base import to_variable
 import paddle.fluid as fluid
 from paddle.fluid.dygraph.parallel import ParallelEnv
 from paddle.fluid.io import DataLoader
@@ -25,7 +27,7 @@ from paddle.fluid.dataloader import BatchSampler
 
 from datasets import OpticDiscSeg, Cityscapes
 import transforms as T
-import models
+from models import MODELS
 import utils.logging as logging
 from utils import get_environ_info
 from utils import ConfusionMatrix
@@ -39,7 +41,8 @@ def parse_args():
     parser.add_argument(
         '--model_name',
         dest='model_name',
-        help="Model type for evaluation, which is one of ('UNet')",
+        help='Model type for evaluation, which is one of {}'.format(
+            str(list(MODELS.keys()))),
         type=str,
         default='UNet')
 
@@ -60,12 +63,6 @@ def parse_args():
         nargs=2,
         default=[512, 512],
         type=int)
-    parser.add_argument(
-        '--batch_size',
-        dest='batch_size',
-        help='Mini batch size',
-        type=int,
-        default=2)
     parser.add_argument(
         '--model_dir',
         dest='model_dir',
@@ -78,10 +75,8 @@ def parse_args():
 
 def evaluate(model,
              eval_dataset=None,
-             places=None,
              model_dir=None,
              num_classes=None,
-             batch_size=2,
              ignore_index=255,
              epoch_id=None):
     ckpt_path = os.path.join(model_dir, 'model')
@@ -89,15 +84,7 @@ def evaluate(model,
     model.set_dict(para_state_dict)
     model.eval()
 
-    batch_sampler = BatchSampler(
-        eval_dataset, batch_size=batch_size, shuffle=False, drop_last=False)
-    loader = DataLoader(
-        eval_dataset,
-        batch_sampler=batch_sampler,
-        places=places,
-        return_list=True,
-    )
-    total_steps = len(batch_sampler)
+    total_steps = len(eval_dataset)
     conf_mat = ConfusionMatrix(num_classes, streaming=True)
 
     logging.info(
@@ -105,15 +92,26 @@ def evaluate(model,
             len(eval_dataset), total_steps))
     timer = Timer()
     timer.start()
-    for step, data in enumerate(loader):
-        images = data[0]
-        labels = data[1].astype('int64')
-        pred, _ = model(images, mode='eval')
-
-        pred = pred.numpy()
-        labels = labels.numpy()
-        mask = labels != ignore_index
-        conf_mat.calculate(pred=pred, label=labels, ignore=mask)
+    for step, (im, im_info, label) in enumerate(eval_dataset):
+        im = to_variable(im)
+        pred, _ = model(im, mode='eval')
+        pred = pred.numpy().astype('float32')
+        pred = np.squeeze(pred)
+        for info in im_info[::-1]:
+            if info[0] == 'resize':
+                h, w = info[1][0], info[1][1]
+                pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST)
+            elif info[0] == 'padding':
+                h, w = info[1][0], info[1][1]
+                pred = pred[0:h, 0:w]
+            else:
+                raise Exception("Unexpected info '{}' in im_info".format(
+                    info[0]))
+        pred = pred[np.newaxis, :, :, np.newaxis]
+        pred = pred.astype('int64')
+        mask = label != ignore_index
+
+        conf_mat.calculate(pred=pred, label=label, ignore=mask)
         _, iou = conf_mat.mean_iou()
 
         time_step = timer.elapsed_time()
@@ -153,16 +151,17 @@ def main(args):
         eval_transforms = T.Compose([T.Resize(args.input_size), T.Normalize()])
         eval_dataset = dataset(transforms=eval_transforms, mode='eval')
 
-        if args.model_name == 'UNet':
-            model = models.UNet(num_classes=eval_dataset.num_classes)
+        if args.model_name not in MODELS:
+            raise Exception(
+                '--model_name is invalid. it should be one of {}'.format(
+                    str(list(MODELS.keys()))))
+        model = MODELS[args.model_name](num_classes=eval_dataset.num_classes)
 
         evaluate(
             model,
             eval_dataset,
-            places=places,
             model_dir=args.model_dir,
-            num_classes=eval_dataset.num_classes,
-            batch_size=args.batch_size)
+            num_classes=eval_dataset.num_classes)
 
 
 if __name__ == '__main__':