diff --git a/dygraph/core/__init__.py b/dygraph/core/__init__.py
deleted file mode 100644
index 202629f542f40a2741cb12022adb10d7a56861b5..0000000000000000000000000000000000000000
--- a/dygraph/core/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .train import train
-from .val import evaluate
-from .infer import infer
-
-__all__ = ['train', 'evaluate', 'infer']
diff --git a/dygraph/core/infer.py b/dygraph/core/infer.py
deleted file mode 100644
index 499890d216c173f4361ae7e5f18027add8cfb2a6..0000000000000000000000000000000000000000
--- a/dygraph/core/infer.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-from paddle.fluid.dygraph.base import to_variable
-import numpy as np
-import paddle.fluid as fluid
-import cv2
-import tqdm
-
-from dygraph import utils
-import dygraph.utils.logger as logger
-
-
-def mkdir(path):
-    sub_dir = os.path.dirname(path)
-    if not os.path.exists(sub_dir):
-        os.makedirs(sub_dir)
-
-
-def infer(model, test_dataset=None, model_dir=None, save_dir='output'):
-    ckpt_path = os.path.join(model_dir, 'model')
-    para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path)
-    model.set_dict(para_state_dict)
-    model.eval()
-
-    added_saved_dir = os.path.join(save_dir, 'added')
-    pred_saved_dir = os.path.join(save_dir, 'prediction')
-
-    logger.info("Start to predict...")
-    for im, im_info, im_path in tqdm.tqdm(test_dataset):
-        im = to_variable(im)
-        pred, _ = model(im)
-        pred = pred.numpy()
-        pred = np.squeeze(pred).astype('uint8')
-        for info in im_info[::-1]:
-            if info[0] == 'resize':
-                h, w = info[1][0], info[1][1]
-                pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST)
-            elif info[0] == 'padding':
-                h, w = info[1][0], info[1][1]
-                pred = pred[0:h, 0:w]
-            else:
-                raise Exception("Unexpected info '{}' in im_info".format(
-                    info[0]))
-
-        im_file = im_path.replace(test_dataset.dataset_root, '')
-        if im_file[0] == '/':
-            im_file = im_file[1:]
-        # save added image
-        added_image = utils.visualize(im_path, pred, weight=0.6)
-        added_image_path = os.path.join(added_saved_dir, im_file)
-        mkdir(added_image_path)
-        cv2.imwrite(added_image_path, added_image)
-
-        # save prediction
-        pred_im = utils.visualize(im_path, pred, weight=0.0)
-        pred_saved_path = os.path.join(pred_saved_dir, im_file)
-        mkdir(pred_saved_path)
-        cv2.imwrite(pred_saved_path, pred_im)
diff --git a/dygraph/core/train.py b/dygraph/core/train.py
deleted file mode 100644
index e7d33a1f0cbb59b39aeabc1fbeb1a4225ea2db33..0000000000000000000000000000000000000000
--- a/dygraph/core/train.py
+++ /dev/null
@@ -1,192 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.dygraph.parallel import ParallelEnv
-from paddle.fluid.io import DataLoader
-# from paddle.incubate.hapi.distributed import DistributedBatchSampler
-from paddle.io import DistributedBatchSampler
-import paddle.nn.functional as F
-
-import dygraph.utils.logger as logger
-from dygraph.utils import load_pretrained_model
-from dygraph.utils import resume
-from dygraph.utils import Timer, calculate_eta
-from .val import evaluate
-
-
-def check_logits_losses(logits, losses):
-    len_logits = len(logits)
-    len_losses = len(losses['types'])
-    if len_logits != len_losses:
-        raise RuntimeError(
-            'The length of logits should equal to the types of loss config: {} != {}.'
-            .format(len_logits, len_losses))
-
-
-def loss_computation(logits, label, losses):
-    check_logits_losses(logits, losses)
-    loss = 0
-    for i in range(len(logits)):
-        logit = logits[i]
-        if logit.shape[-2:] != label.shape[-2:]:
-            logit = F.resize_bilinear(logit, label.shape[-2:])
-        loss_i = losses['types'][i](logit, label)
-        loss += losses['coef'][i] * loss_i
-    return loss
-
-
-def train(model,
-          train_dataset,
-          places=None,
-          eval_dataset=None,
-          optimizer=None,
-          save_dir='output',
-          iters=10000,
-          batch_size=2,
-          resume_model=None,
-          save_interval_iters=1000,
-          log_iters=10,
-          num_classes=None,
-          num_workers=8,
-          use_vdl=False,
-          losses=None,
-          ignore_index=255):
-    nranks = ParallelEnv().nranks
-
-    start_iter = 0
-    if resume_model is not None:
-        start_iter = resume(model, optimizer, resume_model)
-
-    if not os.path.isdir(save_dir):
-        if os.path.exists(save_dir):
-            os.remove(save_dir)
-        os.makedirs(save_dir)
-
-    if nranks > 1:
-        strategy = fluid.dygraph.prepare_context()
-        ddp_model = fluid.dygraph.DataParallel(model, strategy)
-
-    batch_sampler = DistributedBatchSampler(
-        train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
-    loader = DataLoader(
-        train_dataset,
-        batch_sampler=batch_sampler,
-        places=places,
-        num_workers=num_workers,
-        return_list=True,
-    )
-
-    if use_vdl:
-        from visualdl import LogWriter
-        log_writer = LogWriter(save_dir)
-
-    timer = Timer()
-    avg_loss = 0.0
-    iters_per_epoch = len(batch_sampler)
-    best_mean_iou = -1.0
-    best_model_iter = -1
-    train_reader_cost = 0.0
-    train_batch_cost = 0.0
-    timer.start()
-
-    iter = start_iter
-    while iter < iters:
-        for data in loader:
-            iter += 1
-            if iter > iters:
-                break
-            train_reader_cost += timer.elapsed_time()
-            images = data[0]
-            labels = data[1].astype('int64')
-            if nranks > 1:
-                logits = ddp_model(images)
-                loss = loss_computation(logits, labels, losses)
-                # loss = ddp_model(images, labels)
-                # apply_collective_grads sum grads over multiple gpus.
-                loss = ddp_model.scale_loss(loss)
-                loss.backward()
-                ddp_model.apply_collective_grads()
-            else:
-                logits = model(images)
-                loss = loss_computation(logits, labels, losses)
-                # loss = model(images, labels)
-                loss.backward()
-            optimizer.minimize(loss)
-            model.clear_gradients()
-            avg_loss += loss.numpy()[0]
-            lr = optimizer.current_step_lr()
-            train_batch_cost += timer.elapsed_time()
-            if (iter) % log_iters == 0 and ParallelEnv().local_rank == 0:
-                avg_loss /= log_iters
-                avg_train_reader_cost = train_reader_cost / log_iters
-                avg_train_batch_cost = train_batch_cost / log_iters
-                train_reader_cost = 0.0
-                train_batch_cost = 0.0
-                remain_iters = iters - iter
-                eta = calculate_eta(remain_iters, avg_train_batch_cost)
-                logger.info(
-                    "[TRAIN] epoch={}, iter={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.4f} | ETA {}"
-                    .format((iter - 1) // iters_per_epoch + 1, iter, iters,
-                            avg_loss * nranks, lr, avg_train_batch_cost,
-                            avg_train_reader_cost, eta))
-                if use_vdl:
-                    log_writer.add_scalar('Train/loss', avg_loss * nranks, iter)
-                    log_writer.add_scalar('Train/lr', lr, iter)
-                    log_writer.add_scalar('Train/batch_cost',
-                                          avg_train_batch_cost, iter)
-                    log_writer.add_scalar('Train/reader_cost',
-                                          avg_train_reader_cost, iter)
-                avg_loss = 0.0
-
-            if (iter % save_interval_iters == 0
-                    or iter == iters) and ParallelEnv().local_rank == 0:
-                current_save_dir = os.path.join(save_dir,
-                                                "iter_{}".format(iter))
-                if not os.path.isdir(current_save_dir):
-                    os.makedirs(current_save_dir)
-                fluid.save_dygraph(model.state_dict(),
-                                   os.path.join(current_save_dir, 'model'))
-                fluid.save_dygraph(optimizer.state_dict(),
-                                   os.path.join(current_save_dir, 'model'))
-
-                if eval_dataset is not None:
-                    mean_iou, avg_acc = evaluate(
-                        model,
-                        eval_dataset,
-                        model_dir=current_save_dir,
-                        num_classes=num_classes,
-                        ignore_index=ignore_index,
-                        iter_id=iter)
-                    if mean_iou > best_mean_iou:
-                        best_mean_iou = mean_iou
-                        best_model_iter = iter
-                        best_model_dir = os.path.join(save_dir, "best_model")
-                        fluid.save_dygraph(
-                            model.state_dict(),
-                            os.path.join(best_model_dir, 'model'))
-                    logger.info(
-                        'Current evaluated best model in eval_dataset is iter_{}, miou={:4f}'
-                        .format(best_model_iter, best_mean_iou))
-
-                    if use_vdl:
-                        log_writer.add_scalar('Evaluate/mIoU', mean_iou, iter)
-                        log_writer.add_scalar('Evaluate/aAcc', avg_acc, iter)
-                    model.train()
-            timer.restart()
-    if use_vdl:
-        log_writer.close()
diff --git a/dygraph/core/val.py b/dygraph/core/val.py
deleted file mode 100644
index 22e84a314cd4ffe8093f81dad724f3d7d12a05fe..0000000000000000000000000000000000000000
--- a/dygraph/core/val.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-import numpy as np
-import tqdm
-import cv2
-from paddle.fluid.dygraph.base import to_variable
-import paddle.fluid as fluid
-import paddle.nn.functional as F
-import paddle
-
-import dygraph.utils.logger as logger
-from dygraph.utils import ConfusionMatrix
-from dygraph.utils import Timer, calculate_eta
-
-
-def evaluate(model,
-             eval_dataset=None,
-             model_dir=None,
-             num_classes=None,
-             ignore_index=255,
-             iter_id=None):
-    ckpt_path = os.path.join(model_dir, 'model')
-    para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path)
-    model.set_dict(para_state_dict)
-    model.eval()
-
-    total_iters = len(eval_dataset)
-    conf_mat = ConfusionMatrix(num_classes, streaming=True)
-
-    logger.info(
-        "Start to evaluating(total_samples={}, total_iters={})...".format(
-            len(eval_dataset), total_iters))
-    timer = Timer()
-    timer.start()
-    for iter, (im, im_info, label) in tqdm.tqdm(
-            enumerate(eval_dataset), total=total_iters):
-        im = to_variable(im)
-        # pred, _ = model(im)
-        logits = model(im)
-        pred = paddle.argmax(logits[0], axis=1)
-        pred = pred.numpy().astype('float32')
-        pred = np.squeeze(pred)
-        for info in im_info[::-1]:
-            if info[0] == 'resize':
-                h, w = info[1][0], info[1][1]
-                pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST)
-            elif info[0] == 'padding':
-                h, w = info[1][0], info[1][1]
-                pred = pred[0:h, 0:w]
-            else:
-                raise Exception("Unexpected info '{}' in im_info".format(
-                    info[0]))
-        pred = pred[np.newaxis, :, :, np.newaxis]
-        pred = pred.astype('int64')
-        mask = label != ignore_index
-
-        conf_mat.calculate(pred=pred, label=label, ignore=mask)
-        _, iou = conf_mat.mean_iou()
-
-        time_iter = timer.elapsed_time()
-        remain_iter = total_iters - iter - 1
-        logger.debug(
-            "[EVAL] iter_id={}, iter={}/{}, iou={:4f}, sec/iter={:.4f} | ETA {}"
-            .format(iter_id, iter + 1, total_iters, iou, time_iter,
-                    calculate_eta(remain_iter, time_iter)))
-        timer.restart()
-
-    category_iou, miou = conf_mat.mean_iou()
-    category_acc, macc = conf_mat.accuracy()
-    logger.info("[EVAL] #Images={} mAcc={:.4f} mIoU={:.4f}".format(
-        len(eval_dataset), macc, miou))
-    logger.info("[EVAL] Category IoU: " + str(category_iou))
-    logger.info("[EVAL] Category Acc: " + str(category_acc))
-    logger.info("[EVAL] Kappa:{:.4f} ".format(conf_mat.kappa()))
-    return miou, macc
diff --git a/dygraph/cvlibs/__init__.py b/dygraph/cvlibs/__init__.py
deleted file mode 100644
index 18812001388cbfd1ecf7dc4d38398ddd91711af4..0000000000000000000000000000000000000000
--- a/dygraph/cvlibs/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from . import manager
-from . import param_init
diff --git a/dygraph/cvlibs/manager.py b/dygraph/cvlibs/manager.py
deleted file mode 100644
index 339070069c7e39532ec7fe2c826851a8d0f53df6..0000000000000000000000000000000000000000
--- a/dygraph/cvlibs/manager.py
+++ /dev/null
@@ -1,118 +0,0 @@
-# -*- encoding: utf-8 -*-
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from collections.abc import Sequence
-import inspect
-
-
-class ComponentManager:
-    """
-    Implement a manager class to add the new component properly.
-    The component can be added as either class or function type.
-    For example:
-        >>> model_manager = ComponentManager()
-        >>> class AlexNet: ...
-        >>> class ResNet: ...
-        >>> model_manager.add_component(AlexNet)
-        >>> model_manager.add_component(ResNet)
-        or pass a sequence alliteratively:
-        >>> model_manager.add_component([AlexNet, ResNet])
-        >>> print(model_manager.components_dict)
-    output: {'AlexNet': <class '__main__.AlexNet'>, 'ResNet': <class '__main__.ResNet'>}
-
-    Or an easier way, using it as a Python decorator, while just add it above the class declaration.
-        >>> model_manager = ComponentManager()
-        >>> @model_manager.add_component
-        >>> class AlexNet: ...
-        >>> @model_manager.add_component
-        >>> class ResNet: ...
-        >>> print(model_manager.components_dict)
-    output: {'AlexNet': <class '__main__.AlexNet'>, 'ResNet': <class '__main__.ResNet'>}
-    """
-
-    def __init__(self):
-        self._components_dict = dict()
-
-    def __len__(self):
-        return len(self._components_dict)
-
-    def __repr__(self):
-        return "{}:{}".format(self.__class__.__name__,
-                              list(self._components_dict.keys()))
-
-    def __getitem__(self, item):
-        if item not in self._components_dict.keys():
-            raise KeyError("{} does not exist in the current {}".format(
-                item, self))
-        return self._components_dict[item]
-
-    @property
-    def components_dict(self):
-        return self._components_dict
-
-    def _add_single_component(self, component):
-        """
-        Add a single component into the corresponding manager
-
-        Args:
-        component (function | class): a new component
-
-        Returns:
-        None
-        """
-
-        # Currently only support class or function type
-        if not (inspect.isclass(component) or inspect.isfunction(component)):
-            raise TypeError(
-                "Expect class/function type, but received {}".format(
-                    type(component)))
-
-        # Obtain the internal name of the component
-        component_name = component.__name__
-
-        # Check whether the component was added already
-        if component_name in self._components_dict.keys():
-            raise KeyError("{} exists already!".format(component_name))
-        else:
-            # Take the internal name of the component as its key
-            self._components_dict[component_name] = component
-
-    def add_component(self, components):
-        """
-        Add component(s) into the corresponding manager
-
-        Args:
-        components (function | class | list | tuple): support three types of components
-
-        Returns:
-        None
-        """
-
-        # Check whether the type is a sequence
-        if isinstance(components, Sequence):
-            for component in components:
-                self._add_single_component(component)
-        else:
-            component = components
-            self._add_single_component(component)
-
-        return components
-
-
-MODELS = ComponentManager()
-BACKBONES = ComponentManager()
-DATASETS = ComponentManager()
-TRANSFORMS = ComponentManager()
-LOSSES = ComponentManager()
diff --git a/dygraph/cvlibs/param_init.py b/dygraph/cvlibs/param_init.py
deleted file mode 100644
index 567399c0a0c7d2310931b1c0ccae13cd0d5422b1..0000000000000000000000000000000000000000
--- a/dygraph/cvlibs/param_init.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.fluid as fluid
-
-
-def constant_init(param, **kwargs):
-    initializer = fluid.initializer.Constant(**kwargs)
-    initializer(param, param.block)
-
-
-def normal_init(param, **kwargs):
-    initializer = fluid.initializer.Normal(**kwargs)
-    initializer(param, param.block)
diff --git a/dygraph/datasets/__init__.py b/dygraph/datasets/__init__.py
deleted file mode 100644
index 37d8da36997b3ec2a74b92199242eba126a0cefc..0000000000000000000000000000000000000000
--- a/dygraph/datasets/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .dataset import Dataset
-from .optic_disc_seg import OpticDiscSeg
-from .cityscapes import Cityscapes
-from .voc import PascalVOC
-from .ade import ADE20K
-
-DATASETS = {
-    "OpticDiscSeg": OpticDiscSeg,
-    "Cityscapes": Cityscapes,
-    "PascalVOC": PascalVOC,
-    "ADE20K": ADE20K
-}
diff --git a/dygraph/datasets/ade.py b/dygraph/datasets/ade.py
deleted file mode 100644
index 8cb8ec2cebfac98d52283ccd21796553db36bffe..0000000000000000000000000000000000000000
--- a/dygraph/datasets/ade.py
+++ /dev/null
@@ -1,100 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-import numpy as np
-from PIL import Image
-
-from .dataset import Dataset
-from dygraph.utils.download import download_file_and_uncompress
-from dygraph.cvlibs import manager
-from dygraph.transforms import Compose
-
-DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
-URL = "http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip"
-
-
-@manager.DATASETS.add_component
-class ADE20K(Dataset):
-    """ADE20K dataset `http://sceneparsing.csail.mit.edu/`.
-    Args:
-        dataset_root: The dataset directory.
-        mode: Which part of dataset to use.. it is one of ('train', 'val'). Default: 'train'.
-        transforms: Transforms for image.
-        download: Whether to download dataset if `dataset_root` is None.
-    """
-
-    def __init__(self,
-                 dataset_root=None,
-                 mode='train',
-                 transforms=None,
-                 download=True):
-        self.dataset_root = dataset_root
-        self.transforms = Compose(transforms)
-        self.mode = mode
-        self.file_list = list()
-        self.num_classes = 150
-
-        if mode.lower() not in ['train', 'val']:
-            raise Exception(
-                "`mode` should be one of ('train', 'val') in ADE20K dataset, but got {}."
-                .format(mode))
-
-        if self.transforms is None:
-            raise Exception("`transforms` is necessary, but it is None.")
-
-        if self.dataset_root is None:
-            if not download:
-                raise Exception(
-                    "`dataset_root` not set and auto download disabled.")
-            self.dataset_root = download_file_and_uncompress(
-                url=URL,
-                savepath=DATA_HOME,
-                extrapath=DATA_HOME,
-                extraname='ADEChallengeData2016')
-        elif not os.path.exists(self.dataset_root):
-            raise Exception('there is not `dataset_root`: {}.'.format(
-                self.dataset_root))
-
-        if mode == 'train':
-            img_dir = os.path.join(self.dataset_root, 'images/training')
-            grt_dir = os.path.join(self.dataset_root, 'annotations/training')
-        elif mode == 'val':
-            img_dir = os.path.join(self.dataset_root, 'images/validation')
-            grt_dir = os.path.join(self.dataset_root, 'annotations/validation')
-        img_files = os.listdir(img_dir)
-        grt_files = [i.replace('.jpg', '.png') for i in img_files]
-        for i in range(len(img_files)):
-            img_path = os.path.join(img_dir, img_files[i])
-            grt_path = os.path.join(grt_dir, grt_files[i])
-            self.file_list.append([img_path, grt_path])
-
-    def __getitem__(self, idx):
-        image_path, grt_path = self.file_list[idx]
-        if self.mode == 'test':
-            im, im_info, _ = self.transforms(im=image_path)
-            im = im[np.newaxis, ...]
-            return im, im_info, image_path
-        elif self.mode == 'val':
-            im, im_info, _ = self.transforms(im=image_path)
-            im = im[np.newaxis, ...]
-            label = np.asarray(Image.open(grt_path))
-            label = label - 1
-            label = label[np.newaxis, np.newaxis, :, :]
-            return im, im_info, label
-        else:
-            im, im_info, label = self.transforms(im=image_path, label=grt_path)
-            label = label - 1
-            return im, label
diff --git a/dygraph/datasets/cityscapes.py b/dygraph/datasets/cityscapes.py
deleted file mode 100644
index ee28754d290ec9ca0526c34d10d9b0ccaa89e6b7..0000000000000000000000000000000000000000
--- a/dygraph/datasets/cityscapes.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import glob
-
-from .dataset import Dataset
-from dygraph.cvlibs import manager
-from dygraph.transforms import Compose
-
-
-@manager.DATASETS.add_component
-class Cityscapes(Dataset):
-    """Cityscapes dataset `https://www.cityscapes-dataset.com/`.
-    The folder structure is as follow:
-    cityscapes
-    |
-    |--leftImg8bit
-    |  |--train
-    |  |--val
-    |  |--test
-    |
-    |--gtFine
-    |  |--train
-    |  |--val
-    |  |--test
-    Make sure there are **labelTrainIds.png in gtFine directory. If not, please run the conver_cityscapes.py in tools.
-
-    Args:
-        dataset_root: Cityscapes dataset directory.
-        mode: Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
-        transforms: Transforms for image.
-    """
-
-    def __init__(self, dataset_root, transforms=None, mode='train'):
-        self.dataset_root = dataset_root
-        self.transforms = Compose(transforms)
-        self.file_list = list()
-        self.mode = mode
-        self.num_classes = 19
-
-        if mode.lower() not in ['train', 'val', 'test']:
-            raise Exception(
-                "mode should be 'train', 'val' or 'test', but got {}.".format(
-                    mode))
-
-        if self.transforms is None:
-            raise Exception("`transforms` is necessary, but it is None.")
-
-        img_dir = os.path.join(self.dataset_root, 'leftImg8bit')
-        grt_dir = os.path.join(self.dataset_root, 'gtFine')
-        if self.dataset_root is None or not os.path.isdir(
-                self.dataset_root) or not os.path.isdir(
-                    img_dir) or not os.path.isdir(grt_dir):
-            raise Exception(
-                "The dataset is not Found or the folder structure is nonconfoumance."
-            )
-
-        grt_files = sorted(
-            glob.glob(
-                os.path.join(grt_dir, mode, '*', '*_gtFine_labelTrainIds.png')))
-        img_files = sorted(
-            glob.glob(os.path.join(img_dir, mode, '*', '*_leftImg8bit.png')))
-
-        self.file_list = [[img_path, grt_path]
-                          for img_path, grt_path in zip(img_files, grt_files)]
diff --git a/dygraph/datasets/dataset.py b/dygraph/datasets/dataset.py
deleted file mode 100644
index c65e20fd2e97511baf4159a3a1eaf2661927a21e..0000000000000000000000000000000000000000
--- a/dygraph/datasets/dataset.py
+++ /dev/null
@@ -1,139 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-import paddle.fluid as fluid
-import numpy as np
-from PIL import Image
-from dygraph.cvlibs import manager
-
-from dygraph.transforms import Compose
-
-
-@manager.DATASETS.add_component
-class Dataset(fluid.io.Dataset):
-    """Pass in a custom dataset that conforms to the format.
-
-    Args:
-        dataset_root: The dataset directory.
-        num_classes: Number of classes.
-        mode: which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
-        train_list: The train dataset file. When image_set is 'train', train_list is necessary.
-            The contents of train_list file are as follow:
-            image1.jpg ground_truth1.png
-            image2.jpg ground_truth2.png
-        val_list: The evaluation dataset file. When image_set is 'val', val_list is necessary.
-            The contents is the same as train_list
-        test_list: The test dataset file. When image_set is 'test', test_list is necessary.
-            The annotation file is not necessary in test_list file.
-        separator: The separator of dataset list. Default: ' '.
-        transforms: Transforms for image.
-
-        Examples:
-            todo
-
-    """
-
-    def __init__(self,
-                 dataset_root,
-                 num_classes,
-                 mode='train',
-                 train_list=None,
-                 val_list=None,
-                 test_list=None,
-                 separator=' ',
-                 transforms=None):
-        self.dataset_root = dataset_root
-        self.transforms = Compose(transforms)
-        self.file_list = list()
-        self.mode = mode
-        self.num_classes = num_classes
-
-        if mode.lower() not in ['train', 'val', 'test']:
-            raise Exception(
-                "mode should be 'train', 'val' or 'test', but got {}.".format(
-                    mode))
-
-        if self.transforms is None:
-            raise Exception("`transforms` is necessary, but it is None.")
-
-        self.dataset_root = dataset_root
-        if not os.path.exists(self.dataset_root):
-            raise Exception('there is not `dataset_root`: {}.'.format(
-                self.dataset_root))
-
-        if mode == 'train':
-            if train_list is None:
-                raise Exception(
-                    'When `mode` is "train", `train_list` is necessary, but it is None.'
-                )
-            elif not os.path.exists(train_list):
-                raise Exception(
-                    '`train_list` is not found: {}'.format(train_list))
-            else:
-                file_list = train_list
-        elif mode == 'val':
-            if val_list is None:
-                raise Exception(
-                    'When `mode` is "val", `val_list` is necessary, but it is None.'
-                )
-            elif not os.path.exists(val_list):
-                raise Exception('`val_list` is not found: {}'.format(val_list))
-            else:
-                file_list = val_list
-        else:
-            if test_list is None:
-                raise Exception(
-                    'When `mode` is "test", `test_list` is necessary, but it is None.'
-                )
-            elif not os.path.exists(test_list):
-                raise Exception(
-                    '`test_list` is not found: {}'.format(test_list))
-            else:
-                file_list = test_list
-
-        with open(file_list, 'r') as f:
-            for line in f:
-                items = line.strip().split(separator)
-                if len(items) != 2:
-                    if mode == 'train' or mode == 'val':
-                        raise Exception(
-                            "File list format incorrect! In training or evaluation task it should be"
-                            " image_name{}label_name\\n".format(separator))
-                    image_path = os.path.join(self.dataset_root, items[0])
-                    grt_path = None
-                else:
-                    image_path = os.path.join(self.dataset_root, items[0])
-                    grt_path = os.path.join(self.dataset_root, items[1])
-                self.file_list.append([image_path, grt_path])
-
-    def __getitem__(self, idx):
-        image_path, grt_path = self.file_list[idx]
-        if self.mode == 'test':
-            im, im_info, _ = self.transforms(im=image_path)
-            im = im[np.newaxis, ...]
-            return im, im_info, image_path
-        elif self.mode == 'val':
-            im, im_info, _ = self.transforms(im=image_path)
-            im = im[np.newaxis, ...]
-            label = np.asarray(Image.open(grt_path))
-            label = label[np.newaxis, np.newaxis, :, :]
-            return im, im_info, label
-        else:
-            im, im_info, label = self.transforms(im=image_path, label=grt_path)
-            return im, label
-
-    def __len__(self):
-        return len(self.file_list)
diff --git a/dygraph/datasets/optic_disc_seg.py b/dygraph/datasets/optic_disc_seg.py
deleted file mode 100644
index 2c6d2b2d56febbe4b45130528c970a43e53d0fd9..0000000000000000000000000000000000000000
--- a/dygraph/datasets/optic_disc_seg.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-from .dataset import Dataset
-from dygraph.utils.download import download_file_and_uncompress
-from dygraph.cvlibs import manager
-from dygraph.transforms import Compose
-
-DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
-URL = "https://paddleseg.bj.bcebos.com/dataset/optic_disc_seg.zip"
-
-
-@manager.DATASETS.add_component
-class OpticDiscSeg(Dataset):
-    def __init__(self,
-                 dataset_root=None,
-                 transforms=None,
-                 mode='train',
-                 download=True):
-        self.dataset_root = dataset_root
-        self.transforms = Compose(transforms)
-        self.file_list = list()
-        self.mode = mode
-        self.num_classes = 2
-
-        if mode.lower() not in ['train', 'val', 'test']:
-            raise Exception(
-                "`mode` should be 'train', 'val' or 'test', but got {}.".format(
-                    mode))
-
-        if self.transforms is None:
-            raise Exception("`transforms` is necessary, but it is None.")
-
-        if self.dataset_root is None:
-            if not download:
-                raise Exception(
-                    "`data_root` not set and auto download disabled.")
-            self.dataset_root = download_file_and_uncompress(
-                url=URL, savepath=DATA_HOME, extrapath=DATA_HOME)
-        elif not os.path.exists(self.dataset_root):
-            raise Exception('there is not `dataset_root`: {}.'.format(
-                self.dataset_root))
-
-        if mode == 'train':
-            file_list = os.path.join(self.dataset_root, 'train_list.txt')
-        elif mode == 'val':
-            file_list = os.path.join(self.dataset_root, 'val_list.txt')
-        else:
-            file_list = os.path.join(self.dataset_root, 'test_list.txt')
-
-        with open(file_list, 'r') as f:
-            for line in f:
-                items = line.strip().split()
-                if len(items) != 2:
-                    if mode == 'train' or mode == 'val':
-                        raise Exception(
-                            "File list format incorrect! It should be"
-                            " image_name label_name\\n")
-                    image_path = os.path.join(self.dataset_root, items[0])
-                    grt_path = None
-                else:
-                    image_path = os.path.join(self.dataset_root, items[0])
-                    grt_path = os.path.join(self.dataset_root, items[1])
-                self.file_list.append([image_path, grt_path])
diff --git a/dygraph/datasets/voc.py b/dygraph/datasets/voc.py
deleted file mode 100644
index da1f9971ff440fbedf10ec2debc7ddaccd372226..0000000000000000000000000000000000000000
--- a/dygraph/datasets/voc.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-from .dataset import Dataset
-from dygraph.utils.download import download_file_and_uncompress
-from dygraph.cvlibs import manager
-from dygraph.transforms import Compose
-
-DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
-URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar"
-
-
-@manager.DATASETS.add_component
-class PascalVOC(Dataset):
-    """Pascal VOC dataset `http://host.robots.ox.ac.uk/pascal/VOC/`. If you want to augment the dataset,
-    please run the voc_augment.py in tools.
-    Args:
-        dataset_root: The dataset directory.
-        mode: Which part of dataset to use.. it is one of ('train', 'val', 'test'). Default: 'train'.
-        transforms: Transforms for image.
-        download: Whether to download dataset if dataset_root is None.
-    """
-
-    def __init__(self,
-                 dataset_root=None,
-                 mode='train',
-                 transforms=None,
-                 download=True):
-        self.dataset_root = dataset_root
-        self.transforms = Compose(transforms)
-        self.mode = mode
-        self.file_list = list()
-        self.num_classes = 21
-
-        if mode.lower() not in ['train', 'trainval', 'trainaug', 'val']:
-            raise Exception(
-                "`mode` should be one of ('train', 'trainval', 'trainaug', 'val') in PascalVOC dataset, but got {}."
-                .format(mode))
-
-        if self.transforms is None:
-            raise Exception("`transforms` is necessary, but it is None.")
-
-        if self.dataset_root is None:
-            if not download:
-                raise Exception(
-                    "`dataset_root` not set and auto download disabled.")
-            self.dataset_root = download_file_and_uncompress(
-                url=URL,
-                savepath=DATA_HOME,
-                extrapath=DATA_HOME,
-                extraname='VOCdevkit')
-        elif not os.path.exists(self.dataset_root):
-            raise Exception('there is not `dataset_root`: {}.'.format(
-                self.dataset_root))
-
-        image_set_dir = os.path.join(self.dataset_root, 'VOC2012', 'ImageSets',
-                                     'Segmentation')
-        if mode == 'train':
-            file_list = os.path.join(image_set_dir, 'train.txt')
-        elif mode == 'val':
-            file_list = os.path.join(image_set_dir, 'val.txt')
-        elif mode == 'trainval':
-            file_list = os.path.join(image_set_dir, 'trainval.txt')
-        elif mode == 'trainaug':
-            file_list = os.path.join(image_set_dir, 'train.txt')
-            file_list_aug = os.path.join(image_set_dir, 'aug.txt')
-
-            if not os.path.exists(file_list_aug):
-                raise Exception(
-                    "When `mode` is 'trainaug', Pascal Voc dataset should be augmented, "
-                    "Please make sure voc_augment.py has been properly run when using this mode."
-                )
-
-        img_dir = os.path.join(self.dataset_root, 'VOC2012', 'JPEGImages')
-        grt_dir = os.path.join(self.dataset_root, 'VOC2012',
-                               'SegmentationClass')
-        grt_dir_aug = os.path.join(self.dataset_root, 'VOC2012',
-                                   'SegmentationClassAug')
-
-        with open(file_list, 'r') as f:
-            for line in f:
-                line = line.strip()
-                image_path = os.path.join(img_dir, ''.join([line, '.jpg']))
-                grt_path = os.path.join(grt_dir, ''.join([line, '.png']))
-                self.file_list.append([image_path, grt_path])
-        if mode == 'trainaug':
-            with open(file_list_aug, 'r') as f:
-                for line in f:
-                    line = line.strip()
-                    image_path = os.path.join(img_dir, ''.join([line, '.jpg']))
-                    grt_path = os.path.join(grt_dir_aug, ''.join([line,
-                                                                  '.png']))
-                    self.file_list.append([image_path, grt_path])
diff --git a/dygraph/models/__init__.py b/dygraph/models/__init__.py
deleted file mode 100644
index f3a62e3b39c80b47bb4d50e54f7dae4018cd2d32..0000000000000000000000000000000000000000
--- a/dygraph/models/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .architectures import *
-from .losses import *
-from .unet import UNet
-from .deeplab import *
-from .fcn import *
-from .pspnet import *
-from .ocrnet import *
diff --git a/dygraph/models/architectures/__init__.py b/dygraph/models/architectures/__init__.py
deleted file mode 100644
index 730c8f97b44c4f85429e200ebf43d13b7439c5cf..0000000000000000000000000000000000000000
--- a/dygraph/models/architectures/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from . import layer_utils
-from .hrnet import *
-from .resnet_vd import *
-from .xception_deeplab import *
-from .mobilenetv3 import *
diff --git a/dygraph/models/architectures/hrnet.py b/dygraph/models/architectures/hrnet.py
deleted file mode 100644
index dda718f5315efb45753aed4f13ea28d4e322e60a..0000000000000000000000000000000000000000
--- a/dygraph/models/architectures/hrnet.py
+++ /dev/null
@@ -1,850 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import math
-import os
-
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
-from paddle.fluid.initializer import Normal
-from paddle.nn import SyncBatchNorm as BatchNorm
-
-from dygraph.cvlibs import manager
-from dygraph.utils import utils
-from dygraph.cvlibs import param_init
-
-__all__ = [
-    "HRNet_W18_Small_V1", "HRNet_W18_Small_V2", "HRNet_W18", "HRNet_W30",
-    "HRNet_W32", "HRNet_W40", "HRNet_W44", "HRNet_W48", "HRNet_W60", "HRNet_W64"
-]
-
-
-class HRNet(fluid.dygraph.Layer):
-    """
-    HRNet：Deep High-Resolution Representation Learning for Visual Recognition
-    https://arxiv.org/pdf/1908.07919.pdf.
-
-    Args:
-        backbone_pretrained (str): the path of pretrained model.
-        stage1_num_modules (int): number of modules for stage1. Default 1.
-        stage1_num_blocks (list): number of blocks per module for stage1. Default [4].
-        stage1_num_channels (list): number of channels per branch for stage1. Default [64].
-        stage2_num_modules (int): number of modules for stage2. Default 1.
-        stage2_num_blocks (list): number of blocks per module for stage2. Default [4, 4]
-        stage2_num_channels (list): number of channels per branch for stage2. Default [18, 36].
-        stage3_num_modules (int): number of modules for stage3. Default 4.
-        stage3_num_blocks (list): number of blocks per module for stage3. Default [4, 4, 4]
-        stage3_num_channels (list): number of channels per branch for stage3. Default [18, 36, 72].
-        stage4_num_modules (int): number of modules for stage4. Default 3.
-        stage4_num_blocks (list): number of blocks per module for stage4. Default [4, 4, 4, 4]
-        stage4_num_channels (list): number of channels per branch for stage4. Default [18, 36, 72. 144].
-        has_se (bool): whether to use Squeeze-and-Excitation module. Default False.
-    """
-
-    def __init__(self,
-                 stage1_num_modules=1,
-                 stage1_num_blocks=[4],
-                 stage1_num_channels=[64],
-                 stage2_num_modules=1,
-                 stage2_num_blocks=[4, 4],
-                 stage2_num_channels=[18, 36],
-                 stage3_num_modules=4,
-                 stage3_num_blocks=[4, 4, 4],
-                 stage3_num_channels=[18, 36, 72],
-                 stage4_num_modules=3,
-                 stage4_num_blocks=[4, 4, 4, 4],
-                 stage4_num_channels=[18, 36, 72, 144],
-                 has_se=False):
-        super(HRNet, self).__init__()
-
-        self.stage1_num_modules = stage1_num_modules
-        self.stage1_num_blocks = stage1_num_blocks
-        self.stage1_num_channels = stage1_num_channels
-        self.stage2_num_modules = stage2_num_modules
-        self.stage2_num_blocks = stage2_num_blocks
-        self.stage2_num_channels = stage2_num_channels
-        self.stage3_num_modules = stage3_num_modules
-        self.stage3_num_blocks = stage3_num_blocks
-        self.stage3_num_channels = stage3_num_channels
-        self.stage4_num_modules = stage4_num_modules
-        self.stage4_num_blocks = stage4_num_blocks
-        self.stage4_num_channels = stage4_num_channels
-        self.has_se = has_se
-
-        self.conv_layer1_1 = ConvBNLayer(
-            num_channels=3,
-            num_filters=64,
-            filter_size=3,
-            stride=2,
-            act='relu',
-            name="layer1_1")
-
-        self.conv_layer1_2 = ConvBNLayer(
-            num_channels=64,
-            num_filters=64,
-            filter_size=3,
-            stride=2,
-            act='relu',
-            name="layer1_2")
-
-        self.la1 = Layer1(
-            num_channels=64,
-            num_blocks=self.stage1_num_blocks[0],
-            num_filters=self.stage1_num_channels[0],
-            has_se=has_se,
-            name="layer2")
-
-        self.tr1 = TransitionLayer(
-            in_channels=[self.stage1_num_channels[0] * 4],
-            out_channels=self.stage2_num_channels,
-            name="tr1")
-
-        self.st2 = Stage(
-            num_channels=self.stage2_num_channels,
-            num_modules=self.stage2_num_modules,
-            num_blocks=self.stage2_num_blocks,
-            num_filters=self.stage2_num_channels,
-            has_se=self.has_se,
-            name="st2")
-
-        self.tr2 = TransitionLayer(
-            in_channels=self.stage2_num_channels,
-            out_channels=self.stage3_num_channels,
-            name="tr2")
-        self.st3 = Stage(
-            num_channels=self.stage3_num_channels,
-            num_modules=self.stage3_num_modules,
-            num_blocks=self.stage3_num_blocks,
-            num_filters=self.stage3_num_channels,
-            has_se=self.has_se,
-            name="st3")
-
-        self.tr3 = TransitionLayer(
-            in_channels=self.stage3_num_channels,
-            out_channels=self.stage4_num_channels,
-            name="tr3")
-        self.st4 = Stage(
-            num_channels=self.stage4_num_channels,
-            num_modules=self.stage4_num_modules,
-            num_blocks=self.stage4_num_blocks,
-            num_filters=self.stage4_num_channels,
-            has_se=self.has_se,
-            name="st4")
-
-    def forward(self, x, label=None, mode='train'):
-        input_shape = x.shape[2:]
-        conv1 = self.conv_layer1_1(x)
-        conv2 = self.conv_layer1_2(conv1)
-
-        la1 = self.la1(conv2)
-
-        tr1 = self.tr1([la1])
-        st2 = self.st2(tr1)
-
-        tr2 = self.tr2(st2)
-        st3 = self.st3(tr2)
-
-        tr3 = self.tr3(st3)
-        st4 = self.st4(tr3)
-
-        x0_h, x0_w = st4[0].shape[2:]
-        x1 = fluid.layers.resize_bilinear(st4[1], out_shape=(x0_h, x0_w))
-        x2 = fluid.layers.resize_bilinear(st4[2], out_shape=(x0_h, x0_w))
-        x3 = fluid.layers.resize_bilinear(st4[3], out_shape=(x0_h, x0_w))
-        x = fluid.layers.concat([st4[0], x1, x2, x3], axis=1)
-
-        return [x]
-
-
-class ConvBNLayer(fluid.dygraph.Layer):
-    def __init__(self,
-                 num_channels,
-                 num_filters,
-                 filter_size,
-                 stride=1,
-                 groups=1,
-                 act="relu",
-                 name=None):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            num_channels=num_channels,
-            num_filters=num_filters,
-            filter_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            bias_attr=False)
-        self._batch_norm = BatchNorm(num_filters)
-        self.act = act
-
-    def forward(self, input):
-        y = self._conv(input)
-        y = self._batch_norm(y)
-        if self.act == 'relu':
-            y = fluid.layers.relu(y)
-        return y
-
-
-class Layer1(fluid.dygraph.Layer):
-    def __init__(self,
-                 num_channels,
-                 num_filters,
-                 num_blocks,
-                 has_se=False,
-                 name=None):
-        super(Layer1, self).__init__()
-
-        self.bottleneck_block_list = []
-
-        for i in range(num_blocks):
-            bottleneck_block = self.add_sublayer(
-                "bb_{}_{}".format(name, i + 1),
-                BottleneckBlock(
-                    num_channels=num_channels if i == 0 else num_filters * 4,
-                    num_filters=num_filters,
-                    has_se=has_se,
-                    stride=1,
-                    downsample=True if i == 0 else False,
-                    name=name + '_' + str(i + 1)))
-            self.bottleneck_block_list.append(bottleneck_block)
-
-    def forward(self, input):
-        conv = input
-        for block_func in self.bottleneck_block_list:
-            conv = block_func(conv)
-        return conv
-
-
-class TransitionLayer(fluid.dygraph.Layer):
-    def __init__(self, in_channels, out_channels, name=None):
-        super(TransitionLayer, self).__init__()
-
-        num_in = len(in_channels)
-        num_out = len(out_channels)
-        self.conv_bn_func_list = []
-        for i in range(num_out):
-            residual = None
-            if i < num_in:
-                if in_channels[i] != out_channels[i]:
-                    residual = self.add_sublayer(
-                        "transition_{}_layer_{}".format(name, i + 1),
-                        ConvBNLayer(
-                            num_channels=in_channels[i],
-                            num_filters=out_channels[i],
-                            filter_size=3,
-                            name=name + '_layer_' + str(i + 1)))
-            else:
-                residual = self.add_sublayer(
-                    "transition_{}_layer_{}".format(name, i + 1),
-                    ConvBNLayer(
-                        num_channels=in_channels[-1],
-                        num_filters=out_channels[i],
-                        filter_size=3,
-                        stride=2,
-                        name=name + '_layer_' + str(i + 1)))
-            self.conv_bn_func_list.append(residual)
-
-    def forward(self, input):
-        outs = []
-        for idx, conv_bn_func in enumerate(self.conv_bn_func_list):
-            if conv_bn_func is None:
-                outs.append(input[idx])
-            else:
-                if idx < len(input):
-                    outs.append(conv_bn_func(input[idx]))
-                else:
-                    outs.append(conv_bn_func(input[-1]))
-        return outs
-
-
-class Branches(fluid.dygraph.Layer):
-    def __init__(self,
-                 num_blocks,
-                 in_channels,
-                 out_channels,
-                 has_se=False,
-                 name=None):
-        super(Branches, self).__init__()
-
-        self.basic_block_list = []
-
-        for i in range(len(out_channels)):
-            self.basic_block_list.append([])
-            for j in range(num_blocks[i]):
-                in_ch = in_channels[i] if j == 0 else out_channels[i]
-                basic_block_func = self.add_sublayer(
-                    "bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1),
-                    BasicBlock(
-                        num_channels=in_ch,
-                        num_filters=out_channels[i],
-                        has_se=has_se,
-                        name=name + '_branch_layer_' + str(i + 1) + '_' +
-                        str(j + 1)))
-                self.basic_block_list[i].append(basic_block_func)
-
-    def forward(self, inputs):
-        outs = []
-        for idx, input in enumerate(inputs):
-            conv = input
-            for basic_block_func in self.basic_block_list[idx]:
-                conv = basic_block_func(conv)
-            outs.append(conv)
-        return outs
-
-
-class BottleneckBlock(fluid.dygraph.Layer):
-    def __init__(self,
-                 num_channels,
-                 num_filters,
-                 has_se,
-                 stride=1,
-                 downsample=False,
-                 name=None):
-        super(BottleneckBlock, self).__init__()
-
-        self.has_se = has_se
-        self.downsample = downsample
-
-        self.conv1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=num_filters,
-            filter_size=1,
-            act="relu",
-            name=name + "_conv1",
-        )
-        self.conv2 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters,
-            filter_size=3,
-            stride=stride,
-            act="relu",
-            name=name + "_conv2")
-        self.conv3 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters * 4,
-            filter_size=1,
-            act=None,
-            name=name + "_conv3")
-
-        if self.downsample:
-            self.conv_down = ConvBNLayer(
-                num_channels=num_channels,
-                num_filters=num_filters * 4,
-                filter_size=1,
-                act=None,
-                name=name + "_downsample")
-
-        if self.has_se:
-            self.se = SELayer(
-                num_channels=num_filters * 4,
-                num_filters=num_filters * 4,
-                reduction_ratio=16,
-                name=name + '_fc')
-
-    def forward(self, input):
-        residual = input
-        conv1 = self.conv1(input)
-        conv2 = self.conv2(conv1)
-        conv3 = self.conv3(conv2)
-
-        if self.downsample:
-            residual = self.conv_down(input)
-
-        if self.has_se:
-            conv3 = self.se(conv3)
-
-        y = fluid.layers.elementwise_add(x=conv3, y=residual, act="relu")
-        return y
-
-
-class BasicBlock(fluid.dygraph.Layer):
-    def __init__(self,
-                 num_channels,
-                 num_filters,
-                 stride=1,
-                 has_se=False,
-                 downsample=False,
-                 name=None):
-        super(BasicBlock, self).__init__()
-
-        self.has_se = has_se
-        self.downsample = downsample
-
-        self.conv1 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=num_filters,
-            filter_size=3,
-            stride=stride,
-            act="relu",
-            name=name + "_conv1")
-        self.conv2 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters,
-            filter_size=3,
-            stride=1,
-            act=None,
-            name=name + "_conv2")
-
-        if self.downsample:
-            self.conv_down = ConvBNLayer(
-                num_channels=num_channels,
-                num_filters=num_filters * 4,
-                filter_size=1,
-                act="relu",
-                name=name + "_downsample")
-
-        if self.has_se:
-            self.se = SELayer(
-                num_channels=num_filters,
-                num_filters=num_filters,
-                reduction_ratio=16,
-                name=name + '_fc')
-
-    def forward(self, input):
-        residual = input
-        conv1 = self.conv1(input)
-        conv2 = self.conv2(conv1)
-
-        if self.downsample:
-            residual = self.conv_down(input)
-
-        if self.has_se:
-            conv2 = self.se(conv2)
-
-        y = fluid.layers.elementwise_add(x=conv2, y=residual, act="relu")
-        return y
-
-
-class SELayer(fluid.dygraph.Layer):
-    def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
-        super(SELayer, self).__init__()
-
-        self.pool2d_gap = Pool2D(pool_type='avg', global_pooling=True)
-
-        self._num_channels = num_channels
-
-        med_ch = int(num_channels / reduction_ratio)
-        stdv = 1.0 / math.sqrt(num_channels * 1.0)
-        self.squeeze = Linear(
-            num_channels,
-            med_ch,
-            act="relu",
-            param_attr=ParamAttr(
-                initializer=fluid.initializer.Uniform(-stdv, stdv),
-                name=name + "_sqz_weights"),
-            bias_attr=ParamAttr(name=name + '_sqz_offset'))
-
-        stdv = 1.0 / math.sqrt(med_ch * 1.0)
-        self.excitation = Linear(
-            med_ch,
-            num_filters,
-            act="sigmoid",
-            param_attr=ParamAttr(
-                initializer=fluid.initializer.Uniform(-stdv, stdv),
-                name=name + "_exc_weights"),
-            bias_attr=ParamAttr(name=name + '_exc_offset'))
-
-    def forward(self, input):
-        pool = self.pool2d_gap(input)
-        pool = fluid.layers.reshape(pool, shape=[-1, self._num_channels])
-        squeeze = self.squeeze(pool)
-        excitation = self.excitation(squeeze)
-        excitation = fluid.layers.reshape(
-            excitation, shape=[-1, self._num_channels, 1, 1])
-        out = input * excitation
-        return out
-
-
-class Stage(fluid.dygraph.Layer):
-    def __init__(self,
-                 num_channels,
-                 num_modules,
-                 num_blocks,
-                 num_filters,
-                 has_se=False,
-                 multi_scale_output=True,
-                 name=None):
-        super(Stage, self).__init__()
-
-        self._num_modules = num_modules
-
-        self.stage_func_list = []
-        for i in range(num_modules):
-            if i == num_modules - 1 and not multi_scale_output:
-                stage_func = self.add_sublayer(
-                    "stage_{}_{}".format(name, i + 1),
-                    HighResolutionModule(
-                        num_channels=num_channels,
-                        num_blocks=num_blocks,
-                        num_filters=num_filters,
-                        has_se=has_se,
-                        multi_scale_output=False,
-                        name=name + '_' + str(i + 1)))
-            else:
-                stage_func = self.add_sublayer(
-                    "stage_{}_{}".format(name, i + 1),
-                    HighResolutionModule(
-                        num_channels=num_channels,
-                        num_blocks=num_blocks,
-                        num_filters=num_filters,
-                        has_se=has_se,
-                        name=name + '_' + str(i + 1)))
-
-            self.stage_func_list.append(stage_func)
-
-    def forward(self, input):
-        out = input
-        for idx in range(self._num_modules):
-            out = self.stage_func_list[idx](out)
-        return out
-
-
-class HighResolutionModule(fluid.dygraph.Layer):
-    def __init__(self,
-                 num_channels,
-                 num_blocks,
-                 num_filters,
-                 has_se=False,
-                 multi_scale_output=True,
-                 name=None):
-        super(HighResolutionModule, self).__init__()
-
-        self.branches_func = Branches(
-            num_blocks=num_blocks,
-            in_channels=num_channels,
-            out_channels=num_filters,
-            has_se=has_se,
-            name=name)
-
-        self.fuse_func = FuseLayers(
-            in_channels=num_filters,
-            out_channels=num_filters,
-            multi_scale_output=multi_scale_output,
-            name=name)
-
-    def forward(self, input):
-        out = self.branches_func(input)
-        out = self.fuse_func(out)
-        return out
-
-
-class FuseLayers(fluid.dygraph.Layer):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 multi_scale_output=True,
-                 name=None):
-        super(FuseLayers, self).__init__()
-
-        self._actual_ch = len(in_channels) if multi_scale_output else 1
-        self._in_channels = in_channels
-
-        self.residual_func_list = []
-        for i in range(self._actual_ch):
-            for j in range(len(in_channels)):
-                residual_func = None
-                if j > i:
-                    residual_func = self.add_sublayer(
-                        "residual_{}_layer_{}_{}".format(name, i + 1, j + 1),
-                        ConvBNLayer(
-                            num_channels=in_channels[j],
-                            num_filters=out_channels[i],
-                            filter_size=1,
-                            stride=1,
-                            act=None,
-                            name=name + '_layer_' + str(i + 1) + '_' +
-                            str(j + 1)))
-                    self.residual_func_list.append(residual_func)
-                elif j < i:
-                    pre_num_filters = in_channels[j]
-                    for k in range(i - j):
-                        if k == i - j - 1:
-                            residual_func = self.add_sublayer(
-                                "residual_{}_layer_{}_{}_{}".format(
-                                    name, i + 1, j + 1, k + 1),
-                                ConvBNLayer(
-                                    num_channels=pre_num_filters,
-                                    num_filters=out_channels[i],
-                                    filter_size=3,
-                                    stride=2,
-                                    act=None,
-                                    name=name + '_layer_' + str(i + 1) + '_' +
-                                    str(j + 1) + '_' + str(k + 1)))
-                            pre_num_filters = out_channels[i]
-                        else:
-                            residual_func = self.add_sublayer(
-                                "residual_{}_layer_{}_{}_{}".format(
-                                    name, i + 1, j + 1, k + 1),
-                                ConvBNLayer(
-                                    num_channels=pre_num_filters,
-                                    num_filters=out_channels[j],
-                                    filter_size=3,
-                                    stride=2,
-                                    act="relu",
-                                    name=name + '_layer_' + str(i + 1) + '_' +
-                                    str(j + 1) + '_' + str(k + 1)))
-                            pre_num_filters = out_channels[j]
-                        self.residual_func_list.append(residual_func)
-
-    def forward(self, input):
-        outs = []
-        residual_func_idx = 0
-        for i in range(self._actual_ch):
-            residual = input[i]
-            residual_shape = residual.shape[-2:]
-            for j in range(len(self._in_channels)):
-                if j > i:
-                    y = self.residual_func_list[residual_func_idx](input[j])
-                    residual_func_idx += 1
-
-                    y = fluid.layers.resize_bilinear(
-                        input=y, out_shape=residual_shape)
-                    residual = fluid.layers.elementwise_add(
-                        x=residual, y=y, act=None)
-                elif j < i:
-                    y = input[j]
-                    for k in range(i - j):
-                        y = self.residual_func_list[residual_func_idx](y)
-                        residual_func_idx += 1
-
-                    residual = fluid.layers.elementwise_add(
-                        x=residual, y=y, act=None)
-
-            layer_helper = LayerHelper(self.full_name(), act='relu')
-            residual = layer_helper.append_activation(residual)
-            outs.append(residual)
-
-        return outs
-
-
-class LastClsOut(fluid.dygraph.Layer):
-    def __init__(self,
-                 num_channel_list,
-                 has_se,
-                 num_filters_list=[32, 64, 128, 256],
-                 name=None):
-        super(LastClsOut, self).__init__()
-
-        self.func_list = []
-        for idx in range(len(num_channel_list)):
-            func = self.add_sublayer(
-                "conv_{}_conv_{}".format(name, idx + 1),
-                BottleneckBlock(
-                    num_channels=num_channel_list[idx],
-                    num_filters=num_filters_list[idx],
-                    has_se=has_se,
-                    downsample=True,
-                    name=name + 'conv_' + str(idx + 1)))
-            self.func_list.append(func)
-
-    def forward(self, inputs):
-        outs = []
-        for idx, input in enumerate(inputs):
-            out = self.func_list[idx](input)
-            outs.append(out)
-        return outs
-
-
-@manager.BACKBONES.add_component
-def HRNet_W18_Small_V1(**kwargs):
-    model = HRNet(
-        stage1_num_modules=1,
-        stage1_num_blocks=[1],
-        stage1_num_channels=[32],
-        stage2_num_modules=1,
-        stage2_num_blocks=[2, 2],
-        stage2_num_channels=[16, 32],
-        stage3_num_modules=1,
-        stage3_num_blocks=[2, 2, 2],
-        stage3_num_channels=[16, 32, 64],
-        stage4_num_modules=1,
-        stage4_num_blocks=[2, 2, 2, 2],
-        stage4_num_channels=[16, 32, 64, 128],
-        **kwargs)
-    return model
-
-
-@manager.BACKBONES.add_component
-def HRNet_W18_Small_V2(**kwargs):
-    model = HRNet(
-        stage1_num_modules=1,
-        stage1_num_blocks=[2],
-        stage1_num_channels=[64],
-        stage2_num_modules=1,
-        stage2_num_blocks=[2, 2],
-        stage2_num_channels=[18, 36],
-        stage3_num_modules=1,
-        stage3_num_blocks=[2, 2, 2],
-        stage3_num_channels=[18, 36, 72],
-        stage4_num_modules=1,
-        stage4_num_blocks=[2, 2, 2, 2],
-        stage4_num_channels=[18, 36, 72, 144],
-        **kwargs)
-    return model
-
-
-@manager.BACKBONES.add_component
-def HRNet_W18(**kwargs):
-    model = HRNet(
-        stage1_num_modules=1,
-        stage1_num_blocks=[4],
-        stage1_num_channels=[64],
-        stage2_num_modules=1,
-        stage2_num_blocks=[4, 4],
-        stage2_num_channels=[18, 36],
-        stage3_num_modules=4,
-        stage3_num_blocks=[4, 4, 4],
-        stage3_num_channels=[18, 36, 72],
-        stage4_num_modules=3,
-        stage4_num_blocks=[4, 4, 4, 4],
-        stage4_num_channels=[18, 36, 72, 144],
-        **kwargs)
-    return model
-
-
-@manager.BACKBONES.add_component
-def HRNet_W30(**kwargs):
-    model = HRNet(
-        stage1_num_modules=1,
-        stage1_num_blocks=[4],
-        stage1_num_channels=[64],
-        stage2_num_modules=1,
-        stage2_num_blocks=[4, 4],
-        stage2_num_channels=[30, 60],
-        stage3_num_modules=4,
-        stage3_num_blocks=[4, 4, 4],
-        stage3_num_channels=[30, 60, 120],
-        stage4_num_modules=3,
-        stage4_num_blocks=[4, 4, 4, 4],
-        stage4_num_channels=[30, 60, 120, 240],
-        **kwargs)
-    return model
-
-
-@manager.BACKBONES.add_component
-def HRNet_W32(**kwargs):
-    model = HRNet(
-        stage1_num_modules=1,
-        stage1_num_blocks=[4],
-        stage1_num_channels=[64],
-        stage2_num_modules=1,
-        stage2_num_blocks=[4, 4],
-        stage2_num_channels=[32, 64],
-        stage3_num_modules=4,
-        stage3_num_blocks=[4, 4, 4],
-        stage3_num_channels=[32, 64, 128],
-        stage4_num_modules=3,
-        stage4_num_blocks=[4, 4, 4, 4],
-        stage4_num_channels=[32, 64, 128, 256],
-        **kwargs)
-    return model
-
-
-@manager.BACKBONES.add_component
-def HRNet_W40(**kwargs):
-    model = HRNet(
-        stage1_num_modules=1,
-        stage1_num_blocks=[4],
-        stage1_num_channels=[64],
-        stage2_num_modules=1,
-        stage2_num_blocks=[4, 4],
-        stage2_num_channels=[40, 80],
-        stage3_num_modules=4,
-        stage3_num_blocks=[4, 4, 4],
-        stage3_num_channels=[40, 80, 160],
-        stage4_num_modules=3,
-        stage4_num_blocks=[4, 4, 4, 4],
-        stage4_num_channels=[40, 80, 160, 320],
-        **kwargs)
-    return model
-
-
-@manager.BACKBONES.add_component
-def HRNet_W44(**kwargs):
-    model = HRNet(
-        stage1_num_modules=1,
-        stage1_num_blocks=[4],
-        stage1_num_channels=[64],
-        stage2_num_modules=1,
-        stage2_num_blocks=[4, 4],
-        stage2_num_channels=[44, 88],
-        stage3_num_modules=4,
-        stage3_num_blocks=[4, 4, 4],
-        stage3_num_channels=[44, 88, 176],
-        stage4_num_modules=3,
-        stage4_num_blocks=[4, 4, 4, 4],
-        stage4_num_channels=[44, 88, 176, 352],
-        **kwargs)
-    return model
-
-
-@manager.BACKBONES.add_component
-def HRNet_W48(**kwargs):
-    model = HRNet(
-        stage1_num_modules=1,
-        stage1_num_blocks=[4],
-        stage1_num_channels=[64],
-        stage2_num_modules=1,
-        stage2_num_blocks=[4, 4],
-        stage2_num_channels=[48, 96],
-        stage3_num_modules=4,
-        stage3_num_blocks=[4, 4, 4],
-        stage3_num_channels=[48, 96, 192],
-        stage4_num_modules=3,
-        stage4_num_blocks=[4, 4, 4, 4],
-        stage4_num_channels=[48, 96, 192, 384],
-        **kwargs)
-    return model
-
-
-@manager.BACKBONES.add_component
-def HRNet_W60(**kwargs):
-    model = HRNet(
-        stage1_num_modules=1,
-        stage1_num_blocks=[4],
-        stage1_num_channels=[64],
-        stage2_num_modules=1,
-        stage2_num_blocks=[4, 4],
-        stage2_num_channels=[60, 120],
-        stage3_num_modules=4,
-        stage3_num_blocks=[4, 4, 4],
-        stage3_num_channels=[60, 120, 240],
-        stage4_num_modules=3,
-        stage4_num_blocks=[4, 4, 4, 4],
-        stage4_num_channels=[60, 120, 240, 480],
-        **kwargs)
-    return model
-
-
-@manager.BACKBONES.add_component
-def HRNet_W64(**kwargs):
-    model = HRNet(
-        stage1_num_modules=1,
-        stage1_num_blocks=[4],
-        stage1_num_channels=[64],
-        stage2_num_modules=1,
-        stage2_num_blocks=[4, 4],
-        stage2_num_channels=[64, 128],
-        stage3_num_modules=4,
-        stage3_num_blocks=[4, 4, 4],
-        stage3_num_channels=[64, 128, 256],
-        stage4_num_modules=3,
-        stage4_num_blocks=[4, 4, 4, 4],
-        stage4_num_channels=[64, 128, 256, 512],
-        **kwargs)
-    return model
diff --git a/dygraph/models/architectures/layer_utils.py b/dygraph/models/architectures/layer_utils.py
deleted file mode 100644
index a9842f188276b6347f4f2ced100ff8c6c00f2715..0000000000000000000000000000000000000000
--- a/dygraph/models/architectures/layer_utils.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# -*- encoding: utf-8 -*-
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.nn.functional as F
-from paddle import fluid
-from paddle.fluid import dygraph
-from paddle.fluid.dygraph import Conv2D
-from paddle.nn import SyncBatchNorm as BatchNorm
-from paddle.nn.layer import activation
-
-
-class ConvBnRelu(dygraph.Layer):
-    def __init__(self,
-                 num_channels,
-                 num_filters,
-                 filter_size,
-                 using_sep_conv=False,
-                 **kwargs):
-
-        super(ConvBnRelu, self).__init__()
-
-        if using_sep_conv:
-            self.conv = DepthwiseConvBnRelu(num_channels,
-                                            num_filters,
-                                            filter_size,
-                                            **kwargs)
-        else:
-
-            self.conv = Conv2D(num_channels,
-                               num_filters,
-                               filter_size,
-                               **kwargs)
-
-        self.batch_norm = BatchNorm(num_filters)
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.batch_norm(x)
-        x = F.relu(x)
-        return x
-
-
-class ConvBn(dygraph.Layer):
-    def __init__(self, num_channels, num_filters, filter_size, **kwargs):
-        super(ConvBn, self).__init__()
-        self.conv = Conv2D(num_channels,
-                           num_filters,
-                           filter_size,
-                           **kwargs)
-        self.batch_norm = BatchNorm(num_filters)
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.batch_norm(x)
-        return x
-
-
-class ConvReluPool(dygraph.Layer):
-    def __init__(self, num_channels, num_filters):
-        super(ConvReluPool, self).__init__()
-        self.conv = Conv2D(num_channels,
-                           num_filters,
-                           filter_size=3,
-                           stride=1,
-                           padding=1,
-                           dilation=1)
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = F.relu(x)
-        x = fluid.layers.pool2d(x, pool_size=2, pool_type="max", pool_stride=2)
-        return x
-
-
-class ConvBnReluUpsample(dygraph.Layer):
-    def __init__(self, num_channels, num_filters):
-        super(ConvBnReluUpsample, self).__init__()
-        self.conv_bn_relu = ConvBnRelu(num_channels, num_filters)
-
-    def forward(self, x, upsample_scale=2):
-        x = self.conv_bn_relu(x)
-        new_shape = [x.shape[2] * upsample_scale, x.shape[3] * upsample_scale]
-        x = fluid.layers.resize_bilinear(x, new_shape)
-        return x
-
-
-class DepthwiseConvBnRelu(dygraph.Layer):
-    def __init__(self,
-                 num_channels,
-                 num_filters,
-                 filter_size,
-                 **kwargs):
-        super(DepthwiseConvBnRelu, self).__init__()
-        self.depthwise_conv = ConvBn(num_channels,
-                                     num_filters=num_channels,
-                                     filter_size=filter_size,
-                                     groups=num_channels,
-                                     use_cudnn=False,
-                                     **kwargs)
-        self.piontwise_conv = ConvBnRelu(num_channels,
-                                         num_filters,
-                                         filter_size=1,
-                                         groups=1)
-
-    def forward(self, x):
-        x = self.depthwise_conv(x)
-        x = self.piontwise_conv(x)
-        return x
-
-
-class Activation(fluid.dygraph.Layer):
-    """
-    The wrapper of activations
-    For example:
-        >>> relu = Activation("relu")
-        >>> print(relu)
-        <class 'paddle.nn.layer.activation.ReLU'>
-        >>> sigmoid = Activation("sigmoid")
-        >>> print(sigmoid)
-        <class 'paddle.nn.layer.activation.Sigmoid'>
-        >>> not_exit_one = Activation("not_exit_one")
-        KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', 
-        'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', 
-        'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])"
-
-    Args:
-        act (str): the activation name in lowercase
-    """
-
-    def __init__(self, act=None):
-        super(Activation, self).__init__()
-
-        self._act = act
-        upper_act_names = activation.__all__
-        lower_act_names = [act.lower() for act in upper_act_names]
-        act_dict = dict(zip(lower_act_names, upper_act_names))
-
-        if act is not None:
-            if act in act_dict.keys():
-                act_name = act_dict[act]
-                self.act_func = eval("activation.{}()".format(act_name))
-            else:
-                raise KeyError("{} does not exist in the current {}".format(act, act_dict.keys()))
-
-    def forward(self, x):
-
-        if self._act is not None:
-            return self.act_func(x)
-        else:
-            return x
\ No newline at end of file
diff --git a/dygraph/models/architectures/mobilenetv3.py b/dygraph/models/architectures/mobilenetv3.py
deleted file mode 100644
index 07805c1b806d18f47d96b8ae1a35c734625f67b3..0000000000000000000000000000000000000000
--- a/dygraph/models/architectures/mobilenetv3.py
+++ /dev/null
@@ -1,452 +0,0 @@
-# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import os
-
-import numpy as np
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
-from paddle.nn import SyncBatchNorm as BatchNorm
-
-from dygraph.models.architectures import layer_utils
-from dygraph.cvlibs import manager
-from dygraph.utils import utils
-
-__all__ = [
-    "MobileNetV3_small_x0_35", "MobileNetV3_small_x0_5",
-    "MobileNetV3_small_x0_75", "MobileNetV3_small_x1_0",
-    "MobileNetV3_small_x1_25", "MobileNetV3_large_x0_35",
-    "MobileNetV3_large_x0_5", "MobileNetV3_large_x0_75",
-    "MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25"
-]
-
-
-def make_divisible(v, divisor=8, min_value=None):
-    if min_value is None:
-        min_value = divisor
-    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
-    if new_v < 0.9 * v:
-        new_v += divisor
-    return new_v
-
-
-def get_padding_same(kernel_size, dilation_rate):
-    """
-    SAME padding implementation given kernel_size and dilation_rate.
-    The calculation formula as following:
-        (F-(k+(k -1)*(r-1))+2*p)/s + 1 = F_new
-        where F: a feature map
-              k: kernel size, r: dilation rate, p: padding value, s: stride
-              F_new: new feature map
-    Args:
-        kernel_size (int)
-        dilation_rate (int)
-
-    Returns:
-        padding_same (int): padding value
-    """
-    k = kernel_size
-    r = dilation_rate
-    padding_same = (k + (k - 1) * (r - 1) - 1) // 2
-
-    return padding_same
-
-
-class MobileNetV3(fluid.dygraph.Layer):
-    def __init__(self,
-                 backbone_pretrained=None,
-                 scale=1.0,
-                 model_name="small",
-                 class_dim=1000,
-                 output_stride=None,
-                 **kwargs):
-        super(MobileNetV3, self).__init__()
-
-        inplanes = 16
-        if model_name == "large":
-            self.cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, False, "relu", 1],
-                [3, 64, 24, False, "relu", 2],
-                [3, 72, 24, False, "relu", 1],  # output 1 -> out_index=2
-                [5, 72, 40, True, "relu", 2],
-                [5, 120, 40, True, "relu", 1],
-                [5, 120, 40, True, "relu", 1],  # output 2 -> out_index=5
-                [3, 240, 80, False, "hard_swish", 2],
-                [3, 200, 80, False, "hard_swish", 1],
-                [3, 184, 80, False, "hard_swish", 1],
-                [3, 184, 80, False, "hard_swish", 1],
-                [3, 480, 112, True, "hard_swish", 1],
-                [3, 672, 112, True, "hard_swish",
-                 1],  # output 3 -> out_index=11
-                [5, 672, 160, True, "hard_swish", 2],
-                [5, 960, 160, True, "hard_swish", 1],
-                [5, 960, 160, True, "hard_swish",
-                 1],  # output 3 -> out_index=14
-            ]
-            self.out_indices = [2, 5, 11, 14]
-
-            self.cls_ch_squeeze = 960
-            self.cls_ch_expand = 1280
-        elif model_name == "small":
-            self.cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, True, "relu", 2],  # output 1 -> out_index=0
-                [3, 72, 24, False, "relu", 2],
-                [3, 88, 24, False, "relu", 1],  # output 2 -> out_index=3
-                [5, 96, 40, True, "hard_swish", 2],
-                [5, 240, 40, True, "hard_swish", 1],
-                [5, 240, 40, True, "hard_swish", 1],
-                [5, 120, 48, True, "hard_swish", 1],
-                [5, 144, 48, True, "hard_swish", 1],  # output 3 -> out_index=7
-                [5, 288, 96, True, "hard_swish", 2],
-                [5, 576, 96, True, "hard_swish", 1],
-                [5, 576, 96, True, "hard_swish", 1],  # output 4 -> out_index=10
-            ]
-            self.out_indices = [0, 3, 7, 10]
-
-            self.cls_ch_squeeze = 576
-            self.cls_ch_expand = 1280
-        else:
-            raise NotImplementedError(
-                "mode[{}_model] is not implemented!".format(model_name))
-
-        ###################################################
-        # modify stride and dilation based on output_stride
-        self.dilation_cfg = [1] * len(self.cfg)
-        self.modify_bottle_params(output_stride=output_stride)
-        ###################################################
-
-        self.conv1 = ConvBNLayer(
-            in_c=3,
-            out_c=make_divisible(inplanes * scale),
-            filter_size=3,
-            stride=2,
-            padding=1,
-            num_groups=1,
-            if_act=True,
-            act="hard_swish",
-            name="conv1")
-
-        self.block_list = []
-
-        inplanes = make_divisible(inplanes * scale)
-        for i, (k, exp, c, se, nl, s) in enumerate(self.cfg):
-            ######################################
-            # add dilation rate
-            dilation_rate = self.dilation_cfg[i]
-            ######################################
-            self.block_list.append(
-                ResidualUnit(
-                    in_c=inplanes,
-                    mid_c=make_divisible(scale * exp),
-                    out_c=make_divisible(scale * c),
-                    filter_size=k,
-                    stride=s,
-                    dilation=dilation_rate,
-                    use_se=se,
-                    act=nl,
-                    name="conv" + str(i + 2)))
-            self.add_sublayer(
-                sublayer=self.block_list[-1], name="conv" + str(i + 2))
-            inplanes = make_divisible(scale * c)
-
-        self.last_second_conv = ConvBNLayer(
-            in_c=inplanes,
-            out_c=make_divisible(scale * self.cls_ch_squeeze),
-            filter_size=1,
-            stride=1,
-            padding=0,
-            num_groups=1,
-            if_act=True,
-            act="hard_swish",
-            name="conv_last")
-
-        self.pool = Pool2D(
-            pool_type="avg", global_pooling=True, use_cudnn=False)
-
-        self.last_conv = Conv2D(
-            num_channels=make_divisible(scale * self.cls_ch_squeeze),
-            num_filters=self.cls_ch_expand,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            act=None,
-            param_attr=ParamAttr(name="last_1x1_conv_weights"),
-            bias_attr=False)
-
-        self.out = Linear(
-            input_dim=self.cls_ch_expand,
-            output_dim=class_dim,
-            param_attr=ParamAttr("fc_weights"),
-            bias_attr=ParamAttr(name="fc_offset"))
-
-        self.init_weight(backbone_pretrained)
-
-    def modify_bottle_params(self, output_stride=None):
-
-        if output_stride is not None and output_stride % 2 != 0:
-            raise Exception("output stride must to be even number")
-        if output_stride is not None:
-            stride = 2
-            rate = 1
-            for i, _cfg in enumerate(self.cfg):
-                stride = stride * _cfg[-1]
-                if stride > output_stride:
-                    rate = rate * _cfg[-1]
-                    self.cfg[i][-1] = 1
-
-                self.dilation_cfg[i] = rate
-
-    def forward(self, inputs, label=None, dropout_prob=0.2):
-        x = self.conv1(inputs)
-        # A feature list saves each downsampling feature.
-        feat_list = []
-        for i, block in enumerate(self.block_list):
-            x = block(x)
-            if i in self.out_indices:
-                feat_list.append(x)
-            #print("block {}:".format(i),x.shape, self.dilation_cfg[i])
-        x = self.last_second_conv(x)
-        x = self.pool(x)
-        x = self.last_conv(x)
-        x = fluid.layers.hard_swish(x)
-        x = fluid.layers.dropout(x=x, dropout_prob=dropout_prob)
-        x = fluid.layers.reshape(x, shape=[x.shape[0], x.shape[1]])
-        x = self.out(x)
-
-        return x, feat_list
-
-    def init_weight(self, pretrained_model=None):
-        """
-        Initialize the parameters of model parts.
-        Args:
-            pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
-        """
-        if pretrained_model is not None:
-            if os.path.exists(pretrained_model):
-                utils.load_pretrained_model(self, pretrained_model)
-            else:
-                raise Exception('Pretrained model is not found: {}'.format(
-                    pretrained_model))
-
-
-class ConvBNLayer(fluid.dygraph.Layer):
-    def __init__(self,
-                 in_c,
-                 out_c,
-                 filter_size,
-                 stride,
-                 padding,
-                 dilation=1,
-                 num_groups=1,
-                 if_act=True,
-                 act=None,
-                 use_cudnn=True,
-                 name=""):
-        super(ConvBNLayer, self).__init__()
-        self.if_act = if_act
-        self.act = act
-
-        self.conv = fluid.dygraph.Conv2D(
-            num_channels=in_c,
-            num_filters=out_c,
-            filter_size=filter_size,
-            stride=stride,
-            padding=padding,
-            dilation=dilation,
-            groups=num_groups,
-            param_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False,
-            use_cudnn=use_cudnn,
-            act=None)
-        self.bn = BatchNorm(
-            num_features=out_c,
-            weight_attr=ParamAttr(
-                name=name + "_bn_scale",
-                regularizer=fluid.regularizer.L2DecayRegularizer(
-                    regularization_coeff=0.0)),
-            bias_attr=ParamAttr(
-                name=name + "_bn_offset",
-                regularizer=fluid.regularizer.L2DecayRegularizer(
-                    regularization_coeff=0.0)))
-
-        self._act_op = layer_utils.Activation(act=None)
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        if self.if_act:
-            if self.act == "relu":
-                x = fluid.layers.relu(x)
-            elif self.act == "hard_swish":
-                x = fluid.layers.hard_swish(x)
-            else:
-                print("The activation function is selected incorrectly.")
-                exit()
-        return x
-
-
-class ResidualUnit(fluid.dygraph.Layer):
-    def __init__(self,
-                 in_c,
-                 mid_c,
-                 out_c,
-                 filter_size,
-                 stride,
-                 use_se,
-                 dilation=1,
-                 act=None,
-                 name=''):
-        super(ResidualUnit, self).__init__()
-        self.if_shortcut = stride == 1 and in_c == out_c
-        self.if_se = use_se
-
-        self.expand_conv = ConvBNLayer(
-            in_c=in_c,
-            out_c=mid_c,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            if_act=True,
-            act=act,
-            name=name + "_expand")
-
-        self.bottleneck_conv = ConvBNLayer(
-            in_c=mid_c,
-            out_c=mid_c,
-            filter_size=filter_size,
-            stride=stride,
-            padding=get_padding_same(
-                filter_size,
-                dilation),  #int((filter_size - 1) // 2) + (dilation - 1),
-            dilation=dilation,
-            num_groups=mid_c,
-            if_act=True,
-            act=act,
-            name=name + "_depthwise")
-        if self.if_se:
-            self.mid_se = SEModule(mid_c, name=name + "_se")
-        self.linear_conv = ConvBNLayer(
-            in_c=mid_c,
-            out_c=out_c,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            if_act=False,
-            act=None,
-            name=name + "_linear")
-        self.dilation = dilation
-
-    def forward(self, inputs):
-        x = self.expand_conv(inputs)
-        x = self.bottleneck_conv(x)
-        if self.if_se:
-            x = self.mid_se(x)
-        x = self.linear_conv(x)
-        if self.if_shortcut:
-            x = fluid.layers.elementwise_add(inputs, x)
-        return x
-
-
-class SEModule(fluid.dygraph.Layer):
-    def __init__(self, channel, reduction=4, name=""):
-        super(SEModule, self).__init__()
-        self.avg_pool = fluid.dygraph.Pool2D(
-            pool_type="avg", global_pooling=True, use_cudnn=False)
-        self.conv1 = fluid.dygraph.Conv2D(
-            num_channels=channel,
-            num_filters=channel // reduction,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            act="relu",
-            param_attr=ParamAttr(name=name + "_1_weights"),
-            bias_attr=ParamAttr(name=name + "_1_offset"))
-        self.conv2 = fluid.dygraph.Conv2D(
-            num_channels=channel // reduction,
-            num_filters=channel,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            act=None,
-            param_attr=ParamAttr(name + "_2_weights"),
-            bias_attr=ParamAttr(name=name + "_2_offset"))
-
-    def forward(self, inputs):
-        outputs = self.avg_pool(inputs)
-        outputs = self.conv1(outputs)
-        outputs = self.conv2(outputs)
-        outputs = fluid.layers.hard_sigmoid(outputs)
-        return fluid.layers.elementwise_mul(x=inputs, y=outputs, axis=0)
-
-
-def MobileNetV3_small_x0_35(**kwargs):
-    model = MobileNetV3(model_name="small", scale=0.35, **kwargs)
-    return model
-
-
-def MobileNetV3_small_x0_5(**kwargs):
-    model = MobileNetV3(model_name="small", scale=0.5, **kwargs)
-    return model
-
-
-def MobileNetV3_small_x0_75(**kwargs):
-    model = MobileNetV3(model_name="small", scale=0.75, **kwargs)
-    return model
-
-
-@manager.BACKBONES.add_component
-def MobileNetV3_small_x1_0(**kwargs):
-    model = MobileNetV3(model_name="small", scale=1.0, **kwargs)
-    return model
-
-
-def MobileNetV3_small_x1_25(**kwargs):
-    model = MobileNetV3(model_name="small", scale=1.25, **kwargs)
-    return model
-
-
-def MobileNetV3_large_x0_35(**kwargs):
-    model = MobileNetV3(model_name="large", scale=0.35, **kwargs)
-    return model
-
-
-def MobileNetV3_large_x0_5(**kwargs):
-    model = MobileNetV3(model_name="large", scale=0.5, **kwargs)
-    return model
-
-
-def MobileNetV3_large_x0_75(**kwargs):
-    model = MobileNetV3(model_name="large", scale=0.75, **kwargs)
-    return model
-
-
-@manager.BACKBONES.add_component
-def MobileNetV3_large_x1_0(**kwargs):
-    model = MobileNetV3(model_name="large", scale=1.0, **kwargs)
-    return model
-
-
-def MobileNetV3_large_x1_25(**kwargs):
-    model = MobileNetV3(model_name="large", scale=1.25, **kwargs)
-    return model
diff --git a/dygraph/models/architectures/resnet_vd.py b/dygraph/models/architectures/resnet_vd.py
deleted file mode 100644
index 582934505385872c60ff92204fd862836e6ae7fb..0000000000000000000000000000000000000000
--- a/dygraph/models/architectures/resnet_vd.py
+++ /dev/null
@@ -1,419 +0,0 @@
-# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import math
-
-import numpy as np
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
-from paddle.nn import SyncBatchNorm as BatchNorm
-
-from dygraph.utils import utils
-from dygraph.models.architectures import layer_utils
-from dygraph.cvlibs import manager
-from dygraph.utils import utils
-
-__all__ = [
-    "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd"
-]
-
-
-class ConvBNLayer(fluid.dygraph.Layer):
-    def __init__(
-            self,
-            num_channels,
-            num_filters,
-            filter_size,
-            stride=1,
-            dilation=1,
-            groups=1,
-            is_vd_mode=False,
-            act=None,
-            name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-
-        self.is_vd_mode = is_vd_mode
-        self._pool2d_avg = Pool2D(
-            pool_size=2,
-            pool_stride=2,
-            pool_padding=0,
-            pool_type='avg',
-            ceil_mode=True)
-        self._conv = Conv2D(
-            num_channels=num_channels,
-            num_filters=num_filters,
-            filter_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2 if dilation == 1 else 0,
-            dilation=dilation,
-            groups=groups,
-            act=None,
-            param_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False)
-        if name == "conv1":
-            bn_name = "bn_" + name
-        else:
-            bn_name = "bn" + name[3:]
-        self._batch_norm = BatchNorm(
-            num_filters,
-            weight_attr=ParamAttr(name=bn_name + '_scale'),
-            bias_attr=ParamAttr(bn_name + '_offset'))
-        self._act_op = layer_utils.Activation(act=act)
-
-    def forward(self, inputs):
-        if self.is_vd_mode:
-            inputs = self._pool2d_avg(inputs)
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        y = self._act_op(y)
-
-        return y
-
-
-class BottleneckBlock(fluid.dygraph.Layer):
-    def __init__(self,
-                 num_channels,
-                 num_filters,
-                 stride,
-                 shortcut=True,
-                 if_first=False,
-                 dilation=1,
-                 name=None):
-        super(BottleneckBlock, self).__init__()
-
-        self.conv0 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=num_filters,
-            filter_size=1,
-            act='relu',
-            name=name + "_branch2a")
-
-        self.dilation = dilation
-
-        self.conv1 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters,
-            filter_size=3,
-            stride=stride,
-            act='relu',
-            dilation=dilation,
-            name=name + "_branch2b")
-        self.conv2 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters * 4,
-            filter_size=1,
-            act=None,
-            name=name + "_branch2c")
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                num_channels=num_channels,
-                num_filters=num_filters * 4,
-                filter_size=1,
-                stride=1,
-                is_vd_mode=False if if_first or stride == 1 else True,
-                name=name + "_branch1")
-
-        self.shortcut = shortcut
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-
-        ####################################################################
-        # If given dilation rate > 1, using corresponding padding
-        if self.dilation > 1:
-            padding = self.dilation
-            y = fluid.layers.pad(
-                y, [0, 0, 0, 0, padding, padding, padding, padding])
-        #####################################################################
-        conv1 = self.conv1(y)
-        conv2 = self.conv2(conv1)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-
-        y = fluid.layers.elementwise_add(x=short, y=conv2)
-        layer_helper = LayerHelper(self.full_name(), act='relu')
-        return layer_helper.append_activation(y)
-
-
-class BasicBlock(fluid.dygraph.Layer):
-    def __init__(self,
-                 num_channels,
-                 num_filters,
-                 stride,
-                 shortcut=True,
-                 if_first=False,
-                 name=None):
-        super(BasicBlock, self).__init__()
-        self.stride = stride
-        self.conv0 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=num_filters,
-            filter_size=3,
-            stride=stride,
-            act='relu',
-            name=name + "_branch2a")
-        self.conv1 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters,
-            filter_size=3,
-            act=None,
-            name=name + "_branch2b")
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                num_channels=num_channels,
-                num_filters=num_filters,
-                filter_size=1,
-                stride=1,
-                is_vd_mode=False if if_first else True,
-                name=name + "_branch1")
-
-        self.shortcut = shortcut
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-        y = fluid.layers.elementwise_add(x=short, y=conv1)
-
-        layer_helper = LayerHelper(self.full_name(), act='relu')
-        return layer_helper.append_activation(y)
-
-
-class ResNet_vd(fluid.dygraph.Layer):
-    def __init__(self,
-                 backbone_pretrained=None,
-                 layers=50,
-                 class_dim=1000,
-                 output_stride=None,
-                 multi_grid=(1, 2, 4),
-                 **kwargs):
-        super(ResNet_vd, self).__init__()
-
-        self.layers = layers
-        supported_layers = [18, 34, 50, 101, 152, 200]
-        assert layers in supported_layers, \
-            "supported layers are {} but input layer is {}".format(
-                supported_layers, layers)
-
-        if layers == 18:
-            depth = [2, 2, 2, 2]
-        elif layers == 34 or layers == 50:
-            depth = [3, 4, 6, 3]
-        elif layers == 101:
-            depth = [3, 4, 23, 3]
-        elif layers == 152:
-            depth = [3, 8, 36, 3]
-        elif layers == 200:
-            depth = [3, 12, 48, 3]
-        num_channels = [64, 256, 512, 1024
-                        ] if layers >= 50 else [64, 64, 128, 256]
-        num_filters = [64, 128, 256, 512]
-
-        dilation_dict = None
-        if output_stride == 8:
-            dilation_dict = {2: 2, 3: 4}
-        elif output_stride == 16:
-            dilation_dict = {3: 2}
-
-        self.conv1_1 = ConvBNLayer(
-            num_channels=3,
-            num_filters=32,
-            filter_size=3,
-            stride=2,
-            act='relu',
-            name="conv1_1")
-        self.conv1_2 = ConvBNLayer(
-            num_channels=32,
-            num_filters=32,
-            filter_size=3,
-            stride=1,
-            act='relu',
-            name="conv1_2")
-        self.conv1_3 = ConvBNLayer(
-            num_channels=32,
-            num_filters=64,
-            filter_size=3,
-            stride=1,
-            act='relu',
-            name="conv1_3")
-        self.pool2d_max = Pool2D(
-            pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
-
-        # self.block_list = []
-        self.stage_list = []
-        if layers >= 50:
-            for block in range(len(depth)):
-                shortcut = False
-                block_list = []
-                for i in range(depth[block]):
-                    if layers in [101, 152] and block == 2:
-                        if i == 0:
-                            conv_name = "res" + str(block + 2) + "a"
-                        else:
-                            conv_name = "res" + str(block + 2) + "b" + str(i)
-                    else:
-                        conv_name = "res" + str(block + 2) + chr(97 + i)
-
-                    ###############################################################################
-                    # Add dilation rate for some segmentation tasks, if dilation_dict is not None.
-                    dilation_rate = dilation_dict[
-                        block] if dilation_dict and block in dilation_dict else 1
-
-                    # Actually block here is 'stage', and i is 'block' in 'stage'
-                    # At the stage 4, expand the the dilation_rate using multi_grid, default (1, 2, 4)
-                    if block == 3:
-                        dilation_rate = dilation_rate * multi_grid[i]
-                    #print("stage {}, block {}: dilation rate".format(block, i), dilation_rate)
-                    ###############################################################################
-
-                    bottleneck_block = self.add_sublayer(
-                        'bb_%d_%d' % (block, i),
-                        BottleneckBlock(
-                            num_channels=num_channels[block]
-                            if i == 0 else num_filters[block] * 4,
-                            num_filters=num_filters[block],
-                            stride=2 if i == 0 and block != 0
-                            and dilation_rate == 1 else 1,
-                            shortcut=shortcut,
-                            if_first=block == i == 0,
-                            name=conv_name,
-                            dilation=dilation_rate))
-
-                    block_list.append(bottleneck_block)
-                    shortcut = True
-                self.stage_list.append(block_list)
-        else:
-            for block in range(len(depth)):
-                shortcut = False
-                block_list = []
-                for i in range(depth[block]):
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
-                    basic_block = self.add_sublayer(
-                        'bb_%d_%d' % (block, i),
-                        BasicBlock(
-                            num_channels=num_channels[block]
-                            if i == 0 else num_filters[block],
-                            num_filters=num_filters[block],
-                            stride=2 if i == 0 and block != 0 else 1,
-                            shortcut=shortcut,
-                            if_first=block == i == 0,
-                            name=conv_name))
-                    block_list.append(basic_block)
-                    shortcut = True
-                self.stage_list.append(block_list)
-
-        self.pool2d_avg = Pool2D(
-            pool_size=7, pool_type='avg', global_pooling=True)
-
-        self.pool2d_avg_channels = num_channels[-1] * 2
-
-        stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
-
-        self.out = Linear(
-            self.pool2d_avg_channels,
-            class_dim,
-            param_attr=ParamAttr(
-                initializer=fluid.initializer.Uniform(-stdv, stdv),
-                name="fc_0.w_0"),
-            bias_attr=ParamAttr(name="fc_0.b_0"))
-
-        self.init_weight(backbone_pretrained)
-
-    def forward(self, inputs):
-        y = self.conv1_1(inputs)
-        y = self.conv1_2(y)
-        y = self.conv1_3(y)
-        y = self.pool2d_max(y)
-
-        # A feature list saves the output feature map of each stage.
-        feat_list = []
-        for i, stage in enumerate(self.stage_list):
-            for j, block in enumerate(stage):
-                y = block(y)
-                #print("stage {} block {}".format(i+1, j+1), y.shape)
-            feat_list.append(y)
-
-        y = self.pool2d_avg(y)
-        y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_channels])
-        y = self.out(y)
-        return y, feat_list
-
-    # def init_weight(self, pretrained_model=None):
-
-    #     if pretrained_model is not None:
-    #         if os.path.exists(pretrained_model):
-    #             utils.load_pretrained_model(self, pretrained_model)
-
-    def init_weight(self, pretrained_model=None):
-        """
-        Initialize the parameters of model parts.
-        Args:
-            pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
-        """
-        if pretrained_model is not None:
-            if os.path.exists(pretrained_model):
-                utils.load_pretrained_model(self, pretrained_model)
-            else:
-                raise Exception('Pretrained model is not found: {}'.format(
-                    pretrained_model))
-
-
-def ResNet18_vd(**args):
-    model = ResNet_vd(layers=18, **args)
-    return model
-
-
-def ResNet34_vd(**args):
-    model = ResNet_vd(layers=34, **args)
-    return model
-
-
-@manager.BACKBONES.add_component
-def ResNet50_vd(**args):
-    model = ResNet_vd(layers=50, **args)
-    return model
-
-
-@manager.BACKBONES.add_component
-def ResNet101_vd(**args):
-    model = ResNet_vd(layers=101, **args)
-    return model
-
-
-def ResNet152_vd(**args):
-    model = ResNet_vd(layers=152, **args)
-    return model
-
-
-def ResNet200_vd(**args):
-    model = ResNet_vd(layers=200, **args)
-    return model
diff --git a/dygraph/models/architectures/xception_deeplab.py b/dygraph/models/architectures/xception_deeplab.py
deleted file mode 100644
index 4f7d97f837fcc2b7394be3ceef15b06387a5844a..0000000000000000000000000000000000000000
--- a/dygraph/models/architectures/xception_deeplab.py
+++ /dev/null
@@ -1,453 +0,0 @@
-# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
-from paddle.nn import SyncBatchNorm as BatchNorm
-
-from dygraph.models.architectures import layer_utils
-from dygraph.cvlibs import manager
-from dygraph.utils import utils
-
-__all__ = ["Xception41_deeplab", "Xception65_deeplab", "Xception71_deeplab"]
-
-
-def check_data(data, number):
-    if type(data) == int:
-        return [data] * number
-    assert len(data) == number
-    return data
-
-
-def check_stride(s, os):
-    if s <= os:
-        return True
-    else:
-        return False
-
-
-def check_points(count, points):
-    if points is None:
-        return False
-    else:
-        if isinstance(points, list):
-            return (True if count in points else False)
-        else:
-            return (True if count == points else False)
-
-
-def gen_bottleneck_params(backbone='xception_65'):
-    if backbone == 'xception_65':
-        bottleneck_params = {
-            "entry_flow": (3, [2, 2, 2], [128, 256, 728]),
-            "middle_flow": (16, 1, 728),
-            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
-        }
-    elif backbone == 'xception_41':
-        bottleneck_params = {
-            "entry_flow": (3, [2, 2, 2], [128, 256, 728]),
-            "middle_flow": (8, 1, 728),
-            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
-        }
-    elif backbone == 'xception_71':
-        bottleneck_params = {
-            "entry_flow": (5, [2, 1, 2, 1, 2], [128, 256, 256, 728, 728]),
-            "middle_flow": (16, 1, 728),
-            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
-        }
-    else:
-        raise Exception(
-            "xception backbont only support xception_41/xception_65/xception_71"
-        )
-    return bottleneck_params
-
-
-class ConvBNLayer(fluid.dygraph.Layer):
-    def __init__(self,
-                 input_channels,
-                 output_channels,
-                 filter_size,
-                 stride=1,
-                 padding=0,
-                 act=None,
-                 name=None):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            num_channels=input_channels,
-            num_filters=output_channels,
-            filter_size=filter_size,
-            stride=stride,
-            padding=padding,
-            param_attr=ParamAttr(name=name + "/weights"),
-            bias_attr=False)
-        self._bn = BatchNorm(
-            num_features=output_channels,
-            epsilon=1e-3,
-            momentum=0.99,
-            weight_attr=ParamAttr(name=name + "/BatchNorm/gamma"),
-            bias_attr=ParamAttr(name=name + "/BatchNorm/beta"))
-
-        self._act_op = layer_utils.Activation(act=act)
-
-    def forward(self, inputs):
-
-        return self._act_op(self._bn(self._conv(inputs)))
-
-
-class Seperate_Conv(fluid.dygraph.Layer):
-    def __init__(self,
-                 input_channels,
-                 output_channels,
-                 stride,
-                 filter,
-                 dilation=1,
-                 act=None,
-                 name=None):
-        super(Seperate_Conv, self).__init__()
-
-        self._conv1 = Conv2D(
-            num_channels=input_channels,
-            num_filters=input_channels,
-            filter_size=filter,
-            stride=stride,
-            groups=input_channels,
-            padding=(filter) // 2 * dilation,
-            dilation=dilation,
-            param_attr=ParamAttr(name=name + "/depthwise/weights"),
-            bias_attr=False)
-        self._bn1 = BatchNorm(
-            input_channels,
-            epsilon=1e-3,
-            momentum=0.99,
-            weight_attr=ParamAttr(name=name + "/depthwise/BatchNorm/gamma"),
-            bias_attr=ParamAttr(name=name + "/depthwise/BatchNorm/beta"))
-
-        self._act_op1 = layer_utils.Activation(act=act)
-
-        self._conv2 = Conv2D(
-            input_channels,
-            output_channels,
-            1,
-            stride=1,
-            groups=1,
-            padding=0,
-            param_attr=ParamAttr(name=name + "/pointwise/weights"),
-            bias_attr=False)
-        self._bn2 = BatchNorm(
-            output_channels,
-            epsilon=1e-3,
-            momentum=0.99,
-            weight_attr=ParamAttr(name=name + "/pointwise/BatchNorm/gamma"),
-            bias_attr=ParamAttr(name=name + "/pointwise/BatchNorm/beta"))
-
-        self._act_op2 = layer_utils.Activation(act=act)
-
-    def forward(self, inputs):
-        x = self._conv1(inputs)
-        x = self._bn1(x)
-        x = self._act_op1(x)
-        x = self._conv2(x)
-        x = self._bn2(x)
-        x = self._act_op2(x)
-        return x
-
-
-class Xception_Block(fluid.dygraph.Layer):
-    def __init__(self,
-                 input_channels,
-                 output_channels,
-                 strides=1,
-                 filter_size=3,
-                 dilation=1,
-                 skip_conv=True,
-                 has_skip=True,
-                 activation_fn_in_separable_conv=False,
-                 name=None):
-        super(Xception_Block, self).__init__()
-
-        repeat_number = 3
-        output_channels = check_data(output_channels, repeat_number)
-        filter_size = check_data(filter_size, repeat_number)
-        strides = check_data(strides, repeat_number)
-
-        self.has_skip = has_skip
-        self.skip_conv = skip_conv
-        self.activation_fn_in_separable_conv = activation_fn_in_separable_conv
-        if not activation_fn_in_separable_conv:
-            self._conv1 = Seperate_Conv(
-                input_channels,
-                output_channels[0],
-                stride=strides[0],
-                filter=filter_size[0],
-                dilation=dilation,
-                name=name + "/separable_conv1")
-            self._conv2 = Seperate_Conv(
-                output_channels[0],
-                output_channels[1],
-                stride=strides[1],
-                filter=filter_size[1],
-                dilation=dilation,
-                name=name + "/separable_conv2")
-            self._conv3 = Seperate_Conv(
-                output_channels[1],
-                output_channels[2],
-                stride=strides[2],
-                filter=filter_size[2],
-                dilation=dilation,
-                name=name + "/separable_conv3")
-        else:
-            self._conv1 = Seperate_Conv(
-                input_channels,
-                output_channels[0],
-                stride=strides[0],
-                filter=filter_size[0],
-                act="relu",
-                dilation=dilation,
-                name=name + "/separable_conv1")
-            self._conv2 = Seperate_Conv(
-                output_channels[0],
-                output_channels[1],
-                stride=strides[1],
-                filter=filter_size[1],
-                act="relu",
-                dilation=dilation,
-                name=name + "/separable_conv2")
-            self._conv3 = Seperate_Conv(
-                output_channels[1],
-                output_channels[2],
-                stride=strides[2],
-                filter=filter_size[2],
-                act="relu",
-                dilation=dilation,
-                name=name + "/separable_conv3")
-
-        if has_skip and skip_conv:
-            self._short = ConvBNLayer(
-                input_channels,
-                output_channels[-1],
-                1,
-                stride=strides[-1],
-                padding=0,
-                name=name + "/shortcut")
-
-    def forward(self, inputs):
-        layer_helper = LayerHelper(self.full_name(), act='relu')
-        if not self.activation_fn_in_separable_conv:
-            x = layer_helper.append_activation(inputs)
-            x = self._conv1(x)
-            x = layer_helper.append_activation(x)
-            x = self._conv2(x)
-            x = layer_helper.append_activation(x)
-            x = self._conv3(x)
-        else:
-            x = self._conv1(inputs)
-            x = self._conv2(x)
-            x = self._conv3(x)
-        if self.has_skip is False:
-            return x
-        if self.skip_conv:
-            skip = self._short(inputs)
-        else:
-            skip = inputs
-        return fluid.layers.elementwise_add(x, skip)
-
-
-class XceptionDeeplab(fluid.dygraph.Layer):
-
-    #def __init__(self, backbone, class_dim=1000):
-    # add output_stride
-    def __init__(self,
-                 backbone,
-                 backbone_pretrained=None,
-                 output_stride=16,
-                 class_dim=1000,
-                 **kwargs):
-
-        super(XceptionDeeplab, self).__init__()
-
-        bottleneck_params = gen_bottleneck_params(backbone)
-        self.backbone = backbone
-
-        self._conv1 = ConvBNLayer(
-            3,
-            32,
-            3,
-            stride=2,
-            padding=1,
-            act="relu",
-            name=self.backbone + "/entry_flow/conv1")
-        self._conv2 = ConvBNLayer(
-            32,
-            64,
-            3,
-            stride=1,
-            padding=1,
-            act="relu",
-            name=self.backbone + "/entry_flow/conv2")
-        """
-            bottleneck_params = {
-            "entry_flow": (3, [2, 2, 2], [128, 256, 728]),
-            "middle_flow": (16, 1, 728),
-            "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
-        }
-
-        if output_stride == 16:
-            entry_block3_stride = 2
-            middle_block_dilation = 1
-            exit_block_dilations = (1, 2)
-        elif output_stride == 8:
-            entry_block3_stride = 1
-            middle_block_dilation = 2
-            exit_block_dilations = (2, 4)
-
-        """
-        self.block_num = bottleneck_params["entry_flow"][0]
-        self.strides = bottleneck_params["entry_flow"][1]
-        self.chns = bottleneck_params["entry_flow"][2]
-        self.strides = check_data(self.strides, self.block_num)
-        self.chns = check_data(self.chns, self.block_num)
-
-        self.entry_flow = []
-        self.middle_flow = []
-
-        self.stride = 2
-        self.output_stride = output_stride
-        s = self.stride
-
-        for i in range(self.block_num):
-            stride = self.strides[i] if check_stride(s * self.strides[i],
-                                                     self.output_stride) else 1
-            xception_block = self.add_sublayer(
-                self.backbone + "/entry_flow/block" + str(i + 1),
-                Xception_Block(
-                    input_channels=64 if i == 0 else self.chns[i - 1],
-                    output_channels=self.chns[i],
-                    strides=[1, 1, self.stride],
-                    name=self.backbone + "/entry_flow/block" + str(i + 1)))
-            self.entry_flow.append(xception_block)
-            s = s * stride
-        self.stride = s
-
-        self.block_num = bottleneck_params["middle_flow"][0]
-        self.strides = bottleneck_params["middle_flow"][1]
-        self.chns = bottleneck_params["middle_flow"][2]
-        self.strides = check_data(self.strides, self.block_num)
-        self.chns = check_data(self.chns, self.block_num)
-        s = self.stride
-
-        for i in range(self.block_num):
-            stride = self.strides[i] if check_stride(s * self.strides[i],
-                                                     self.output_stride) else 1
-            xception_block = self.add_sublayer(
-                self.backbone + "/middle_flow/block" + str(i + 1),
-                Xception_Block(
-                    input_channels=728,
-                    output_channels=728,
-                    strides=[1, 1, self.strides[i]],
-                    skip_conv=False,
-                    name=self.backbone + "/middle_flow/block" + str(i + 1)))
-            self.middle_flow.append(xception_block)
-            s = s * stride
-        self.stride = s
-
-        self.block_num = bottleneck_params["exit_flow"][0]
-        self.strides = bottleneck_params["exit_flow"][1]
-        self.chns = bottleneck_params["exit_flow"][2]
-        self.strides = check_data(self.strides, self.block_num)
-        self.chns = check_data(self.chns, self.block_num)
-        s = self.stride
-        stride = self.strides[0] if check_stride(s * self.strides[0],
-                                                 self.output_stride) else 1
-        self._exit_flow_1 = Xception_Block(
-            728,
-            self.chns[0], [1, 1, stride],
-            name=self.backbone + "/exit_flow/block1")
-        s = s * stride
-        stride = self.strides[1] if check_stride(s * self.strides[1],
-                                                 self.output_stride) else 1
-        self._exit_flow_2 = Xception_Block(
-            self.chns[0][-1],
-            self.chns[1], [1, 1, stride],
-            dilation=2,
-            has_skip=False,
-            activation_fn_in_separable_conv=True,
-            name=self.backbone + "/exit_flow/block2")
-        s = s * stride
-
-        self.stride = s
-
-        self._drop = Dropout(p=0.5)
-        self._pool = Pool2D(pool_type="avg", global_pooling=True)
-        self._fc = Linear(
-            self.chns[1][-1],
-            class_dim,
-            param_attr=ParamAttr(name="fc_weights"),
-            bias_attr=ParamAttr(name="fc_bias"))
-
-        self.init_weight(backbone_pretrained)
-
-    def forward(self, inputs):
-        x = self._conv1(inputs)
-        x = self._conv2(x)
-        feat_list = []
-        for i, ef in enumerate(self.entry_flow):
-            x = ef(x)
-            if i == 0:
-                feat_list.append(x)
-        for mf in self.middle_flow:
-            x = mf(x)
-        x = self._exit_flow_1(x)
-        x = self._exit_flow_2(x)
-        feat_list.append(x)
-
-        x = self._drop(x)
-        x = self._pool(x)
-        x = fluid.layers.squeeze(x, axes=[2, 3])
-        x = self._fc(x)
-        return x, feat_list
-
-    def init_weight(self, pretrained_model=None):
-        """
-        Initialize the parameters of model parts.
-        Args:
-            pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
-        """
-        if pretrained_model is not None:
-            if os.path.exists(pretrained_model):
-                utils.load_pretrained_model(self, pretrained_model)
-            else:
-                raise Exception('Pretrained model is not found: {}'.format(
-                    pretrained_model))
-
-
-def Xception41_deeplab(**args):
-    model = XceptionDeeplab('xception_41', **args)
-    return model
-
-
-@manager.BACKBONES.add_component
-def Xception65_deeplab(**args):
-    model = XceptionDeeplab("xception_65", **args)
-    return model
-
-
-def Xception71_deeplab(**args):
-    model = XceptionDeeplab("xception_71", **args)
-    return model
diff --git a/dygraph/models/deeplab.py b/dygraph/models/deeplab.py
deleted file mode 100644
index 6911b63900d62b427e94a2b22e4919f6b664f250..0000000000000000000000000000000000000000
--- a/dygraph/models/deeplab.py
+++ /dev/null
@@ -1,363 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-from dygraph.cvlibs import manager
-from dygraph.models.architectures import layer_utils
-from paddle import fluid
-from paddle.fluid import dygraph
-from paddle.fluid.dygraph import Conv2D
-
-from dygraph.utils import utils
-
-__all__ = [
-    'DeepLabV3P', "deeplabv3p_resnet101_vd", "deeplabv3p_resnet101_vd_os8",
-    "deeplabv3p_resnet50_vd", "deeplabv3p_resnet50_vd_os8",
-    "deeplabv3p_xception65_deeplab", "deeplabv3p_mobilenetv3_large",
-    "deeplabv3p_mobilenetv3_small"
-]
-
-
-class ImageAverage(dygraph.Layer):
-    """
-    Global average pooling
-
-    Args:
-        num_channels (int): the number of input channels.
-
-    """
-
-    def __init__(self, num_channels):
-        super(ImageAverage, self).__init__()
-        self.conv_bn_relu = layer_utils.ConvBnRelu(
-            num_channels, num_filters=256, filter_size=1)
-
-    def forward(self, input):
-        x = fluid.layers.reduce_mean(input, dim=[2, 3], keep_dim=True)
-        x = self.conv_bn_relu(x)
-        x = fluid.layers.resize_bilinear(x, out_shape=input.shape[2:])
-        return x
-
-
-class ASPP(dygraph.Layer):
-    """
-     Decoder module of DeepLabV3P model
-
-    Args:
-        output_stride (int): the ratio of input size and final feature size. Support 16 or 8.
-        in_channels (int): the number of input channels in decoder module.
-        using_sep_conv (bool): whether use separable conv or not. Default to True.
-    """
-
-    def __init__(self, output_stride, in_channels, using_sep_conv=True):
-        super(ASPP, self).__init__()
-
-        if output_stride == 16:
-            aspp_ratios = (6, 12, 18)
-        elif output_stride == 8:
-            aspp_ratios = (12, 24, 36)
-        else:
-            raise NotImplementedError(
-                "Only support output_stride is 8 or 16, but received{}".format(
-                    output_stride))
-
-        self.image_average = ImageAverage(num_channels=in_channels)
-
-        # The first aspp using 1*1 conv
-        self.aspp1 = layer_utils.ConvBnRelu(
-            num_channels=in_channels,
-            num_filters=256,
-            filter_size=1,
-            using_sep_conv=False)
-
-        # The second aspp using 3*3 (separable) conv at dilated rate aspp_ratios[0]
-        self.aspp2 = layer_utils.ConvBnRelu(
-            num_channels=in_channels,
-            num_filters=256,
-            filter_size=3,
-            using_sep_conv=using_sep_conv,
-            dilation=aspp_ratios[0],
-            padding=aspp_ratios[0])
-
-        # The Third aspp using 3*3 (separable) conv at dilated rate aspp_ratios[1]
-        self.aspp3 = layer_utils.ConvBnRelu(
-            num_channels=in_channels,
-            num_filters=256,
-            filter_size=3,
-            using_sep_conv=using_sep_conv,
-            dilation=aspp_ratios[1],
-            padding=aspp_ratios[1])
-
-        # The Third aspp using 3*3 (separable) conv at dilated rate aspp_ratios[2]
-        self.aspp4 = layer_utils.ConvBnRelu(
-            num_channels=in_channels,
-            num_filters=256,
-            filter_size=3,
-            using_sep_conv=using_sep_conv,
-            dilation=aspp_ratios[2],
-            padding=aspp_ratios[2])
-
-        # After concat op, using 1*1 conv
-        self.conv_bn_relu = layer_utils.ConvBnRelu(
-            num_channels=1280, num_filters=256, filter_size=1)
-
-    def forward(self, x):
-
-        x1 = self.image_average(x)
-        x2 = self.aspp1(x)
-        x3 = self.aspp2(x)
-        x4 = self.aspp3(x)
-        x5 = self.aspp4(x)
-        x = fluid.layers.concat([x1, x2, x3, x4, x5], axis=1)
-
-        x = self.conv_bn_relu(x)
-        x = fluid.layers.dropout(x, dropout_prob=0.1)
-        return x
-
-
-class Decoder(dygraph.Layer):
-    """
-    Decoder module of DeepLabV3P model
-
-    Args:
-        num_classes (int): the number of classes.
-        in_channels (int): the number of input channels in decoder module.
-        using_sep_conv (bool): whether use separable conv or not. Default to True.
-
-    """
-
-    def __init__(self, num_classes, in_channels, using_sep_conv=True):
-        super(Decoder, self).__init__()
-
-        self.conv_bn_relu1 = layer_utils.ConvBnRelu(
-            num_channels=in_channels, num_filters=48, filter_size=1)
-
-        self.conv_bn_relu2 = layer_utils.ConvBnRelu(
-            num_channels=304,
-            num_filters=256,
-            filter_size=3,
-            using_sep_conv=using_sep_conv,
-            padding=1)
-        self.conv_bn_relu3 = layer_utils.ConvBnRelu(
-            num_channels=256,
-            num_filters=256,
-            filter_size=3,
-            using_sep_conv=using_sep_conv,
-            padding=1)
-        self.conv = Conv2D(
-            num_channels=256, num_filters=num_classes, filter_size=1)
-
-    def forward(self, x, low_level_feat):
-        low_level_feat = self.conv_bn_relu1(low_level_feat)
-        x = fluid.layers.resize_bilinear(x, low_level_feat.shape[2:])
-        x = fluid.layers.concat([x, low_level_feat], axis=1)
-        x = self.conv_bn_relu2(x)
-        x = self.conv_bn_relu3(x)
-        x = self.conv(x)
-        return x
-
-
-@manager.MODELS.add_component
-class DeepLabV3P(dygraph.Layer):
-    """
-    The DeepLabV3P consists of three main components, Backbone, ASPP and Decoder
-    The orginal artile refers to
-    "Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation"
-     Liang-Chieh Chen, Yukun Zhu, George Papandreou, Florian Schroff, Hartwig Adam.
-     (https://arxiv.org/abs/1802.02611)
-
-    Args:
-        num_classes (int): the unique number of target classes.
-
-        backbone (paddle.nn.Layer): backbone networks, currently support Xception65, Resnet101_vd. Default Resnet101_vd.
-
-        model_pretrained (str): the path of pretrained model.
-
-        output_stride (int): the ratio of input size and final feature size. Default 16.
-
-        backbone_indices (tuple): two values in the tuple indicte the indices of output of backbone.
-                        the first index will be taken as a low-level feature in Deconder component;
-                        the second one will be taken as input of ASPP component.
-                        Usually backbone consists of four downsampling stage, and return an output of
-                        each stage, so we set default (0, 3), which means taking feature map of the first
-                        stage in backbone as low-level feature used in Decoder, and feature map of the fourth
-                        stage as input of ASPP.
-
-        backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index.
-
-        ignore_index (int): the value of ground-truth mask would be ignored while doing evaluation. Default 255.
-
-        using_sep_conv (bool): a bool value indicates whether using separable convolutions
-                        in ASPP and Decoder components. Default True.
-    """
-
-    def __init__(self,
-                 num_classes,
-                 backbone,
-                 model_pretrained=None,
-                 output_stride=16,
-                 backbone_indices=(0, 3),
-                 backbone_channels=(256, 2048),
-                 ignore_index=255,
-                 using_sep_conv=True):
-
-        super(DeepLabV3P, self).__init__()
-
-        # self.backbone = manager.BACKBONES[backbone](output_stride=output_stride)
-        self.backbone = backbone
-        self.aspp = ASPP(output_stride, backbone_channels[1], using_sep_conv)
-        self.decoder = Decoder(num_classes, backbone_channels[0],
-                               using_sep_conv)
-        self.ignore_index = ignore_index
-        self.EPS = 1e-5
-        self.backbone_indices = backbone_indices
-        self.init_weight(model_pretrained)
-
-    def forward(self, input, label=None):
-
-        _, feat_list = self.backbone(input)
-        low_level_feat = feat_list[self.backbone_indices[0]]
-        x = feat_list[self.backbone_indices[1]]
-        x = self.aspp(x)
-        logit = self.decoder(x, low_level_feat)
-        logit = fluid.layers.resize_bilinear(logit, input.shape[2:])
-
-        if self.training:
-            return self._get_loss(logit, label)
-        else:
-            score_map = fluid.layers.softmax(logit, axis=1)
-            score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1])
-            pred = fluid.layers.argmax(score_map, axis=3)
-            pred = fluid.layers.unsqueeze(pred, axes=[3])
-            return pred, score_map
-
-    def init_weight(self, pretrained_model=None):
-        """
-        Initialize the parameters of model parts.
-        Args:
-            pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
-        """
-        if pretrained_model is not None:
-            if os.path.exists(pretrained_model):
-                utils.load_pretrained_model(self, pretrained_model)
-            else:
-                raise Exception('Pretrained model is not found: {}'.format(
-                    pretrained_model))
-
-    def _get_loss(self, logit, label):
-        """
-        compute forward loss of the model
-
-        Args:
-            logit (tensor): the logit of model output
-            label (tensor): ground truth
-
-        Returns:
-            avg_loss (tensor): forward loss
-        """
-        logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
-        label = fluid.layers.transpose(label, [0, 2, 3, 1])
-        mask = label != self.ignore_index
-        mask = fluid.layers.cast(mask, 'float32')
-        loss, probs = fluid.layers.softmax_with_cross_entropy(
-            logit,
-            label,
-            ignore_index=self.ignore_index,
-            return_softmax=True,
-            axis=-1)
-
-        loss = loss * mask
-        avg_loss = fluid.layers.mean(loss) / (
-            fluid.layers.mean(mask) + self.EPS)
-
-        label.stop_gradient = True
-        mask.stop_gradient = True
-
-        return avg_loss
-
-
-def build_aspp(output_stride, using_sep_conv):
-    return ASPP(output_stride=output_stride, using_sep_conv=using_sep_conv)
-
-
-def build_decoder(num_classes, using_sep_conv):
-    return Decoder(num_classes, using_sep_conv=using_sep_conv)
-
-
-@manager.MODELS.add_component
-def deeplabv3p_resnet101_vd(*args, **kwargs):
-    pretrained_model = None
-    return DeepLabV3P(
-        backbone='ResNet101_vd', pretrained_model=pretrained_model, **kwargs)
-
-
-@manager.MODELS.add_component
-def deeplabv3p_resnet101_vd_os8(*args, **kwargs):
-    pretrained_model = None
-    return DeepLabV3P(
-        backbone='ResNet101_vd',
-        output_stride=8,
-        pretrained_model=pretrained_model,
-        **kwargs)
-
-
-@manager.MODELS.add_component
-def deeplabv3p_resnet50_vd(*args, **kwargs):
-    pretrained_model = None
-    return DeepLabV3P(
-        backbone='ResNet50_vd', pretrained_model=pretrained_model, **kwargs)
-
-
-@manager.MODELS.add_component
-def deeplabv3p_resnet50_vd_os8(*args, **kwargs):
-    pretrained_model = None
-    return DeepLabV3P(
-        backbone='ResNet50_vd',
-        output_stride=8,
-        pretrained_model=pretrained_model,
-        **kwargs)
-
-
-@manager.MODELS.add_component
-def deeplabv3p_xception65_deeplab(*args, **kwargs):
-    pretrained_model = None
-    return DeepLabV3P(
-        backbone='Xception65_deeplab',
-        pretrained_model=pretrained_model,
-        backbone_indices=(0, 1),
-        backbone_channels=(128, 2048),
-        **kwargs)
-
-
-@manager.MODELS.add_component
-def deeplabv3p_mobilenetv3_large(*args, **kwargs):
-    pretrained_model = None
-    return DeepLabV3P(
-        backbone='MobileNetV3_large_x1_0',
-        pretrained_model=pretrained_model,
-        backbone_indices=(0, 3),
-        backbone_channels=(24, 160),
-        **kwargs)
-
-
-@manager.MODELS.add_component
-def deeplabv3p_mobilenetv3_small(*args, **kwargs):
-    pretrained_model = None
-    return DeepLabV3P(
-        backbone='MobileNetV3_small_x1_0',
-        pretrained_model=pretrained_model,
-        backbone_indices=(0, 3),
-        backbone_channels=(16, 96),
-        **kwargs)
diff --git a/dygraph/models/fast_scnn.py b/dygraph/models/fast_scnn.py
deleted file mode 100644
index 6bd9b4d6e656bcb3530c50be120293b4f3fb05c6..0000000000000000000000000000000000000000
--- a/dygraph/models/fast_scnn.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle import fluid, nn
-
-from dygraph.cvlibs import manager
-from dygraph.models import model_utils, pspnet
-from dygraph.models.architectures import layer_utils
-
-
-@manager.MODELS.add_component
-class FastSCNN(fluid.dygraph.Layer):
-    """
-    The FastSCNN implementation.
-
-    As mentioned in original paper, FastSCNN is a real-time segmentation algorithm (123.5fps) 
-    even for high resolution images (1024x2048).
-
-    The orginal artile refers to 
-        Poudel, Rudra PK, et al. "Fast-scnn: Fast semantic segmentation network."
-        (https://arxiv.org/pdf/1902.04502.pdf)
-
-    Args:
-
-        num_classes (int): the unique number of target classes. Default to 2.
-
-        enable_auxiliary_loss (bool): a bool values indictes whether adding auxiliary loss.
-        if true, auxiliary loss will be added after LearningToDownsample module, where the weight is 0.4. Default to False.
-
-        ignore_index (int): the value of ground-truth mask would be ignored while doing evaluation. Default to 255.
-    """
-
-    def __init__(self,
-                 num_classes=2,
-                 enable_auxiliary_loss=False,
-                 ignore_index=255):
-
-        super(FastSCNN, self).__init__()
-
-        self.learning_to_downsample = LearningToDownsample(32, 48, 64)
-        self.global_feature_extractor = GlobalFeatureExtractor(64, [64, 96, 128], 128, 6, [3, 3, 3])
-        self.feature_fusion = FeatureFusionModule(64, 128, 128)
-        self.classifier = Classifier(128, num_classes)
-
-        if enable_auxiliary_loss:
-            self.auxlayer = model_utils.AuxLayer(64, 32, num_classes)
-
-        self.enable_auxiliary_loss = enable_auxiliary_loss
-        self.ignore_index = ignore_index
-
-    def forward(self, input, label=None):
-
-        higher_res_features = self.learning_to_downsample(input)
-        x = self.global_feature_extractor(higher_res_features)
-        x = self.feature_fusion(higher_res_features, x)
-        logit = self.classifier(x)
-        logit = fluid.layers.resize_bilinear(logit, input.shape[2:])
-
-        if self.enable_auxiliary_loss:
-            auxiliary_logit = self.auxlayer(higher_res_features)
-            auxiliary_logit = fluid.layers.resize_bilinear(auxiliary_logit, input.shape[2:])
-
-        if self.training:
-            loss = model_utils.get_loss(logit, label)
-            if self.enable_auxiliary_loss:
-                auxiliary_loss = model_utils.get_loss(auxiliary_logit, label)
-                loss += (0.4 * auxiliary_loss)
-            return loss
-        else:
-            pred, score_map = model_utils.get_pred_score_map(logit)
-            return pred, score_map
-
-
-class LearningToDownsample(fluid.dygraph.Layer):
-    """
-    Learning to downsample module.
-
-    This module consists of three downsampling blocks (one Conv and two separable Conv)
-
-    Args:
-        dw_channels1 (int): the input channels of the first sep conv. Default to 32.
-
-        dw_channels2 (int): the input channels of the second sep conv. Default to 48.
-
-        out_channels (int): the output channels of LearningToDownsample module. Default to 64.
-    """
-
-    def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64):
-        super(LearningToDownsample, self).__init__()
-
-        self.conv_bn_relu = layer_utils.ConvBnRelu(num_channels=3,
-                                                   num_filters=dw_channels1,
-                                                   filter_size=3,
-                                                   stride=2)
-        self.dsconv_bn_relu1 = layer_utils.ConvBnRelu(num_channels=dw_channels1,
-                                                      num_filters=dw_channels2,
-                                                      filter_size=3,
-                                                      using_sep_conv=True,  # using sep conv
-                                                      stride=2,
-                                                      padding=1)
-        self.dsconv_bn_relu2 = layer_utils.ConvBnRelu(num_channels=dw_channels2,
-                                                      num_filters=out_channels,
-                                                      filter_size=3,
-                                                      using_sep_conv=True,  # using sep conv
-                                                      stride=2,
-                                                      padding=1)
-
-    def forward(self, x):
-        x = self.conv_bn_relu(x)
-        x = self.dsconv_bn_relu1(x)
-        x = self.dsconv_bn_relu2(x)
-        return x
-
-
-class GlobalFeatureExtractor(fluid.dygraph.Layer):
-    """
-    Global feature extractor module
-
-    This module consists of three LinearBottleneck blocks (like inverted residual introduced by MobileNetV2) and 
-    a PPModule (introduced by PSPNet).
-
-    Args:
-        in_channels (int): the number of input channels to the module. Default to 64.
-        block_channels (tuple): a tuple represents output channels of each bottleneck block. Default to (64, 96, 128).
-        out_channels (int): the number of output channels of the module. Default to 128.
-        expansion (int): the expansion factor in bottleneck. Default to 6.
-        num_blocks (tuple): it indicates the repeat time of each bottleneck. Default to (3, 3, 3).
-    """
-
-    def __init__(self, in_channels=64, block_channels=(64, 96, 128),
-                 out_channels=128, expansion=6, num_blocks=(3, 3, 3)):
-        super(GlobalFeatureExtractor, self).__init__()
-
-        self.bottleneck1 = self._make_layer(LinearBottleneck, in_channels, block_channels[0], num_blocks[0], expansion,
-                                            2)
-        self.bottleneck2 = self._make_layer(LinearBottleneck, block_channels[0], block_channels[1], num_blocks[1],
-                                            expansion, 2)
-        self.bottleneck3 = self._make_layer(LinearBottleneck, block_channels[1], block_channels[2], num_blocks[2],
-                                            expansion, 1)
-
-        self.ppm = pspnet.PPModule(block_channels[2], out_channels, dim_reduction=True)
-
-    def _make_layer(self, block, in_channels, out_channels, blocks, expansion=6, stride=1):
-        layers = []
-        layers.append(block(in_channels, out_channels, expansion, stride))
-        for i in range(1, blocks):
-            layers.append(block(out_channels, out_channels, expansion, 1))
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        x = self.bottleneck1(x)
-        x = self.bottleneck2(x)
-        x = self.bottleneck3(x)
-        x = self.ppm(x)
-        return x
-
-
-class LinearBottleneck(fluid.dygraph.Layer):
-    """
-    Single bottleneck implementation.
-
-    Args:
-        in_channels (int): the number of input channels to bottleneck block.
-
-        out_channels (int): the number of output channels of bottleneck block.
-
-        expansion (int). the expansion factor in bottleneck. Default to 6.
-
-        stride (int). the stride used in depth-wise conv.
-    """
-
-    def __init__(self, in_channels, out_channels, expansion=6, stride=2, **kwargs):
-        super(LinearBottleneck, self).__init__()
-
-        self.use_shortcut = stride == 1 and in_channels == out_channels
-
-        expand_channels = in_channels * expansion
-        self.block = nn.Sequential(
-            # pw
-            layer_utils.ConvBnRelu(num_channels=in_channels,
-                                   num_filters=expand_channels,
-                                   filter_size=1,
-                                   bias_attr=False),
-            # dw
-            layer_utils.ConvBnRelu(num_channels=expand_channels,
-                                   num_filters=expand_channels,
-                                   filter_size=3,
-                                   stride=stride,
-                                   padding=1,
-                                   groups=expand_channels,
-                                   bias_attr=False),
-            # pw-linear
-            nn.Conv2D(num_channels=expand_channels,
-                      num_filters=out_channels,
-                      filter_size=1,
-                      bias_attr=False),
-
-            nn.BatchNorm(out_channels)
-        )
-
-    def forward(self, x):
-        out = self.block(x)
-        if self.use_shortcut:
-            out = x + out
-        return out
-
-
-class FeatureFusionModule(fluid.dygraph.Layer):
-    """
-    Feature Fusion Module Implememtation.
-
-    This module fuses high-resolution feature and low-resolution feature.
-
-    Args:
-        high_in_channels (int): the channels of high-resolution feature (output of LearningToDownsample).
-
-        low_in_channels (int). the channels of low-resolution feature (output of GlobalFeatureExtractor).
-
-        out_channels (int). the output channels of this module.
-    """
-
-    def __init__(self, high_in_channels, low_in_channels, out_channels):
-        super(FeatureFusionModule, self).__init__()
-
-        # There only depth-wise conv is used WITHOUT point-sied conv
-        self.dwconv = layer_utils.ConvBnRelu(num_channels=low_in_channels,
-                                             num_filters=out_channels,
-                                             filter_size=3,
-                                             padding=1,
-                                             groups=128)
-
-        self.conv_low_res = nn.Sequential(
-            nn.Conv2D(num_channels=out_channels, num_filters=out_channels, filter_size=1),
-            nn.BatchNorm(out_channels))
-
-        self.conv_high_res = nn.Sequential(
-            nn.Conv2D(num_channels=high_in_channels, num_filters=out_channels, filter_size=1),
-            nn.BatchNorm(out_channels))
-
-        self.relu = nn.ReLU(True)
-
-    def forward(self, high_res_input, low_res_input):
-        low_res_input = fluid.layers.resize_bilinear(input=low_res_input, scale=4)
-        low_res_input = self.dwconv(low_res_input)
-        low_res_input = self.conv_low_res(low_res_input)
-
-        high_res_input = self.conv_high_res(high_res_input)
-
-        x = high_res_input + low_res_input
-
-        return self.relu(x)
-
-
-class Classifier(fluid.dygraph.Layer):
-    """
-    The Classifier module implemetation.
-
-    This module consists of two depth-wsie conv and one conv.
-
-    Args:
-        input_channels (int): the input channels to this module.
-
-        num_classes (int). the unique number of target classes.
-
-    """
-
-    def __init__(self, input_channels, num_classes):
-        super(Classifier, self).__init__()
-
-        self.dsconv1 = layer_utils.ConvBnRelu(num_channels=input_channels,
-                                              num_filters=input_channels,
-                                              filter_size=3,
-                                              using_sep_conv=True  # using sep conv
-                                              )
-
-        self.dsconv2 = layer_utils.ConvBnRelu(num_channels=input_channels,
-                                              num_filters=input_channels,
-                                              filter_size=3,
-                                              using_sep_conv=True  # using sep conv
-                                              )
-
-        self.conv = nn.Conv2D(num_channels=input_channels,
-                              num_filters=num_classes,
-                              filter_size=1)
-
-    def forward(self, x):
-        x = self.dsconv1(x)
-        x = self.dsconv2(x)
-        x = fluid.layers.dropout(x, dropout_prob=0.1)
-        x = self.conv(x)
-        return x
diff --git a/dygraph/models/fcn.py b/dygraph/models/fcn.py
deleted file mode 100644
index 5201cc98b090cc6e0fd48e093a8b43c277aca631..0000000000000000000000000000000000000000
--- a/dygraph/models/fcn.py
+++ /dev/null
@@ -1,204 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import math
-import os
-
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
-from paddle.fluid.initializer import Normal
-from paddle.nn import SyncBatchNorm as BatchNorm
-
-from dygraph.cvlibs import manager
-from dygraph import utils
-from dygraph.cvlibs import param_init
-from dygraph.utils import logger
-
-__all__ = [
-    "fcn_hrnet_w18_small_v1", "fcn_hrnet_w18_small_v2", "fcn_hrnet_w18",
-    "fcn_hrnet_w30", "fcn_hrnet_w32", "fcn_hrnet_w40", "fcn_hrnet_w44",
-    "fcn_hrnet_w48", "fcn_hrnet_w60", "fcn_hrnet_w64"
-]
-
-
-@manager.MODELS.add_component
-class FCN(fluid.dygraph.Layer):
-    """
-    Fully Convolutional Networks for Semantic Segmentation.
-    https://arxiv.org/abs/1411.4038
-
-    Args:
-        num_classes (int): the unique number of target classes.
-
-        backbone (paddle.nn.Layer): backbone networks.
-
-        model_pretrained (str): the path of pretrained model.
-
-        backbone_indices (tuple): one values in the tuple indicte the indices of output of backbone.Default -1.
-
-        backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index.
-
-        channels (int): channels after conv layer before the last one.
-    """
-
-    def __init__(self,
-                 num_classes,
-                 backbone,
-                 backbone_pretrained=None,
-                 model_pretrained=None,
-                 backbone_indices=(-1, ),
-                 backbone_channels=(270, ),
-                 channels=None):
-        super(FCN, self).__init__()
-
-        self.num_classes = num_classes
-        self.backbone_pretrained = backbone_pretrained
-        self.model_pretrained = model_pretrained
-        self.backbone_indices = backbone_indices
-        if channels is None:
-            channels = backbone_channels[backbone_indices[0]]
-
-        self.backbone = backbone
-        self.conv_last_2 = ConvBNLayer(
-            num_channels=backbone_channels[backbone_indices[0]],
-            num_filters=channels,
-            filter_size=1,
-            stride=1)
-        self.conv_last_1 = Conv2D(
-            num_channels=channels,
-            num_filters=self.num_classes,
-            filter_size=1,
-            stride=1,
-            padding=0)
-        if self.training:
-            self.init_weight()
-
-    def forward(self, x):
-        input_shape = x.shape[2:]
-        fea_list = self.backbone(x)
-        x = fea_list[self.backbone_indices[0]]
-        x = self.conv_last_2(x)
-        logit = self.conv_last_1(x)
-        logit = fluid.layers.resize_bilinear(logit, input_shape)
-        return [logit]
-
-    def init_weight(self):
-        params = self.parameters()
-        for param in params:
-            param_name = param.name
-            if 'batch_norm' in param_name:
-                if 'w_0' in param_name:
-                    param_init.constant_init(param, value=1.0)
-                elif 'b_0' in param_name:
-                    param_init.constant_init(param, value=0.0)
-            if 'conv' in param_name and 'w_0' in param_name:
-                param_init.normal_init(param, scale=0.001)
-
-        if self.model_pretrained is not None:
-            if os.path.exists(self.model_pretrained):
-                utils.load_pretrained_model(self, self.model_pretrained)
-            else:
-                raise Exception('Pretrained model is not found: {}'.format(
-                    self.model_pretrained))
-        elif self.backbone_pretrained is not None:
-            if os.path.exists(self.backbone_pretrained):
-                utils.load_pretrained_model(self.backbone,
-                                            self.backbone_pretrained)
-            else:
-                raise Exception('Pretrained model is not found: {}'.format(
-                    self.backbone_pretrained))
-        else:
-            logger.warning('No pretrained model to load, train from scratch')
-
-
-class ConvBNLayer(fluid.dygraph.Layer):
-    def __init__(self,
-                 num_channels,
-                 num_filters,
-                 filter_size,
-                 stride=1,
-                 groups=1,
-                 act="relu"):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            num_channels=num_channels,
-            num_filters=num_filters,
-            filter_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            bias_attr=False)
-        self._batch_norm = BatchNorm(num_filters)
-        self.act = act
-
-    def forward(self, input):
-        y = self._conv(input)
-        y = self._batch_norm(y)
-        if self.act == 'relu':
-            y = fluid.layers.relu(y)
-        return y
-
-
-@manager.MODELS.add_component
-def fcn_hrnet_w18_small_v1(*args, **kwargs):
-    return FCN(backbone='HRNet_W18_Small_V1', backbone_channels=(240), **kwargs)
-
-
-@manager.MODELS.add_component
-def fcn_hrnet_w18_small_v2(*args, **kwargs):
-    return FCN(backbone='HRNet_W18_Small_V2', backbone_channels=(270), **kwargs)
-
-
-@manager.MODELS.add_component
-def fcn_hrnet_w18(*args, **kwargs):
-    return FCN(backbone='HRNet_W18', backbone_channels=(270), **kwargs)
-
-
-@manager.MODELS.add_component
-def fcn_hrnet_w30(*args, **kwargs):
-    return FCN(backbone='HRNet_W30', backbone_channels=(450), **kwargs)
-
-
-@manager.MODELS.add_component
-def fcn_hrnet_w32(*args, **kwargs):
-    return FCN(backbone='HRNet_W32', backbone_channels=(480), **kwargs)
-
-
-@manager.MODELS.add_component
-def fcn_hrnet_w40(*args, **kwargs):
-    return FCN(backbone='HRNet_W40', backbone_channels=(600), **kwargs)
-
-
-@manager.MODELS.add_component
-def fcn_hrnet_w44(*args, **kwargs):
-    return FCN(backbone='HRNet_W44', backbone_channels=(660), **kwargs)
-
-
-@manager.MODELS.add_component
-def fcn_hrnet_w48(*args, **kwargs):
-    return FCN(backbone='HRNet_W48', backbone_channels=(720), **kwargs)
-
-
-@manager.MODELS.add_component
-def fcn_hrnet_w60(*args, **kwargs):
-    return FCN(backbone='HRNet_W60', backbone_channels=(900), **kwargs)
-
-
-@manager.MODELS.add_component
-def fcn_hrnet_w64(*args, **kwargs):
-    return FCN(backbone='HRNet_W64', backbone_channels=(960), **kwargs)
diff --git a/dygraph/models/losses/__init__.py b/dygraph/models/losses/__init__.py
deleted file mode 100644
index f58a9fe1dccce025fa5ee9dec8887fbfc3b9deb8..0000000000000000000000000000000000000000
--- a/dygraph/models/losses/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .cross_entroy_loss import CrossEntropyLoss
diff --git a/dygraph/models/losses/cross_entroy_loss.py b/dygraph/models/losses/cross_entroy_loss.py
deleted file mode 100644
index a3de4eb68e4b13cdc66d182218433308c0902398..0000000000000000000000000000000000000000
--- a/dygraph/models/losses/cross_entroy_loss.py
+++ /dev/null
@@ -1,141 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle
-from paddle import nn
-import paddle.nn.functional as F
-import paddle.fluid as fluid
-
-from dygraph.cvlibs import manager
-'''
-@manager.LOSSES.add_component
-class CrossEntropyLoss(nn.CrossEntropyLoss):
-    """
-    Implements the cross entropy loss function.
-
-    Args:
-        weight (Tensor): Weight tensor, a manual rescaling weight given
-            to each class and the shape is (C). It has the same dimensions as class
-	        number and the data type is float32, float64. Default ``'None'``.
-        ignore_index (int64): Specifies a target value that is ignored
-            and does not contribute to the input gradient. Default ``255``.
-        reduction (str): Indicate how to average the loss by batch_size,
-            the candicates are ``'none'`` | ``'mean'`` | ``'sum'``.
-            If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned;
-            If :attr:`size_average` is ``'sum'``, the reduced sum loss is returned.
-            If :attr:`reduction` is ``'none'``, the unreduced loss is returned.
-            Default ``'mean'``.
-
-    """
-
-    def __init__(self, weight=None, ignore_index=255, reduction='mean'):
-        self.weight = weight
-        self.ignore_index = ignore_index
-        self.reduction = reduction
-        self.EPS = 1e-5
-        if self.reduction not in ['sum', 'mean', 'none']:
-            raise ValueError(
-                "The value of 'reduction' in cross_entropy_loss should be 'sum', 'mean' or"
-                " 'none', but received %s, which is not allowed." %
-                self.reduction)
-
-    def forward(self, logit, label):
-        """
-        Forward computation.
-        Args:
-            logit (Tensor): logit tensor, the data type is float32, float64. Shape is
-	            (N, C), where C is number of classes, and if shape is more than 2D, this
-	            is (N, C, D1, D2,..., Dk), k >= 1.
-            label (Variable): label tensor, the data type is int64. Shape is (N), where each
-	            value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is
-	            (N, D1, D2,..., Dk), k >= 1.
-        """
-        loss = paddle.nn.functional.cross_entropy(
-            logit,
-            label,
-            weight=self.weight,
-            ignore_index=self.ignore_index,
-            reduction=self.reduction)
-
-        mask = label != self.ignore_index
-        mask = paddle.cast(mask, 'float32')
-        avg_loss = loss / (paddle.mean(mask) + self.EPS)
-
-        label.stop_gradient = True
-        mask.stop_gradient = True
-        return avg_loss
-'''
-
-
-@manager.LOSSES.add_component
-class CrossEntropyLoss(nn.Layer):
-    """
-    Implements the cross entropy loss function.
-
-    Args:
-        ignore_index (int64): Specifies a target value that is ignored
-            and does not contribute to the input gradient. Default ``255``.
-    """
-
-    def __init__(self, ignore_index=255):
-        super(CrossEntropyLoss, self).__init__()
-        self.ignore_index = ignore_index
-        self.EPS = 1e-5
-
-    def forward(self, logit, label):
-        """
-        Forward computation.
-        Args:
-            logit (Tensor): logit tensor, the data type is float32, float64. Shape is
-	            (N, C), where C is number of classes, and if shape is more than 2D, this
-	            is (N, C, D1, D2,..., Dk), k >= 1.
-            label (Variable): label tensor, the data type is int64. Shape is (N), where each
-	            value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is
-	            (N, D1, D2,..., Dk), k >= 1.
-        """
-        if len(label.shape) != len(logit.shape):
-            label = paddle.unsqueeze(label, 1)
-
-        # logit = paddle.transpose(logit, [0, 2, 3, 1])
-        # label = paddle.transpose(label, [0, 2, 3, 1])
-        # loss = F.softmax_with_cross_entropy(
-        #     logit, label, ignore_index=self.ignore_index, axis=-1)
-        # loss = paddle.reduce_mean(loss)
-
-        # mask = label != self.ignore_index
-        # mask = paddle.cast(mask, 'float32')
-        # avg_loss = loss / (paddle.mean(mask) + self.EPS)
-
-        # label.stop_gradient = True
-        # mask.stop_gradient = True
-        # return avg_loss
-
-        logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
-        label = fluid.layers.transpose(label, [0, 2, 3, 1])
-        mask = label != self.ignore_index
-        mask = fluid.layers.cast(mask, 'float32')
-        loss, probs = fluid.layers.softmax_with_cross_entropy(
-            logit,
-            label,
-            ignore_index=self.ignore_index,
-            return_softmax=True,
-            axis=-1)
-
-        loss = loss * mask
-        avg_loss = fluid.layers.mean(loss) / (
-            fluid.layers.mean(mask) + self.EPS)
-
-        label.stop_gradient = True
-        mask.stop_gradient = True
-        return avg_loss
diff --git a/dygraph/models/model_utils.py b/dygraph/models/model_utils.py
deleted file mode 100644
index 7f52919915faf3fa2cca6b567e0c6b8a105e7e0b..0000000000000000000000000000000000000000
--- a/dygraph/models/model_utils.py
+++ /dev/null
@@ -1,130 +0,0 @@
-# -*- encoding: utf-8 -*-
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle
-import paddle.nn.functional as F
-from paddle import fluid
-from paddle.fluid import dygraph
-from paddle.fluid.dygraph import Conv2D
-#from paddle.nn import SyncBatchNorm as BatchNorm
-from paddle.fluid.dygraph import SyncBatchNorm as BatchNorm
-
-from dygraph.models.architectures import layer_utils
-
-
-class FCNHead(fluid.dygraph.Layer):
-    """
-    The FCNHead implementation used in auxilary layer
-
-    Args:
-        in_channels (int): the number of input channels
-        out_channels (int): the number of output channels
-    """
-
-    def __init__(self, in_channels, out_channels):
-        super(FCNHead, self).__init__()
-
-        inter_channels = in_channels // 4
-        self.conv_bn_relu = layer_utils.ConvBnRelu(num_channels=in_channels,
-                                                   num_filters=inter_channels,
-                                                   filter_size=3,
-                                                   padding=1)
-
-        self.conv = Conv2D(num_channels=inter_channels,
-                           num_filters=out_channels,
-                           filter_size=1)
-
-    def forward(self, x):
-        x = self.conv_bn_relu(x)
-        x = F.dropout(x, dropout_prob=0.1)
-        x = self.conv(x)
-        return x
-
-class AuxLayer(fluid.dygraph.Layer):
-    """
-    The auxilary layer implementation for auxilary loss
-
-    Args:
-        in_channels (int): the number of input channels.
-        inter_channels (int): intermediate channels.
-        out_channels (int): the number of output channels, which is usually num_classes.
-    """
-
-    def __init__(self, in_channels, inter_channels, out_channels):
-        super(AuxLayer, self).__init__()
-
-        self.conv_bn_relu = layer_utils.ConvBnRelu(num_channels=in_channels,
-                                                   num_filters=inter_channels,
-                                                   filter_size=3,
-                                                   padding=1)
-
-        self.conv = Conv2D(num_channels=inter_channels,
-                           num_filters=out_channels,
-                           filter_size=1)
-
-    def forward(self, x):
-        x = self.conv_bn_relu(x)
-        x = F.dropout(x, dropout_prob=0.1)
-        x = self.conv(x)
-        return x
-
-def get_loss(logit, label, ignore_index=255, EPS=1e-5):
-    """
-    compute forward loss of the model
-
-    Args:
-        logit (tensor): the logit of model output
-        label (tensor): ground truth
-
-    Returns:
-        avg_loss (tensor): forward loss
-    """
-    logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
-    label = fluid.layers.transpose(label, [0, 2, 3, 1])
-    mask = label != ignore_index
-    mask = fluid.layers.cast(mask, 'float32')
-    loss, probs = fluid.layers.softmax_with_cross_entropy(
-        logit,
-        label,
-        ignore_index=ignore_index,
-        return_softmax=True,
-        axis=-1)
-
-    loss = loss * mask
-    avg_loss = paddle.mean(loss) / (paddle.mean(mask) + EPS)
-
-    label.stop_gradient = True
-    mask.stop_gradient = True
-
-    return avg_loss
-
-
-def get_pred_score_map(logit):
-    """
-    Get prediction and score map output in inference phase.
-
-    Args:
-        logit (tensor): output logit of network
-
-    Returns:
-        pred (tensor): predition map
-        score_map (tensor): score map
-    """
-    score_map = F.softmax(logit, axis=1)
-    score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1])
-    pred = fluid.layers.argmax(score_map, axis=3)
-    pred = fluid.layers.unsqueeze(pred, axes=[3])
-
-    return pred, score_map
\ No newline at end of file
diff --git a/dygraph/models/ocrnet.py b/dygraph/models/ocrnet.py
deleted file mode 100644
index bdadd6d5b2a1e1946a9207eaa166705fb51da06e..0000000000000000000000000000000000000000
--- a/dygraph/models/ocrnet.py
+++ /dev/null
@@ -1,215 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-import paddle.fluid as fluid
-from paddle.fluid.dygraph import Sequential, Conv2D
-
-from dygraph.cvlibs import manager
-from dygraph.models.architectures.layer_utils import ConvBnRelu
-from dygraph import utils
-
-
-class SpatialGatherBlock(fluid.dygraph.Layer):
-    def forward(self, pixels, regions):
-        n, c, h, w = pixels.shape
-        _, k, _, _ = regions.shape
-
-        # pixels: from (n, c, h, w) to (n, h*w, c)
-        pixels = fluid.layers.reshape(pixels, (n, c, h * w))
-        pixels = fluid.layers.transpose(pixels, (0, 2, 1))
-
-        # regions: from (n, k, h, w) to (n, k, h*w)
-        regions = fluid.layers.reshape(regions, (n, k, h * w))
-        regions = fluid.layers.softmax(regions, axis=2)
-
-        # feats: from (n, k, c) to (n, c, k, 1)
-        feats = fluid.layers.matmul(regions, pixels)
-        feats = fluid.layers.transpose(feats, (0, 2, 1))
-        feats = fluid.layers.unsqueeze(feats, axes=[-1])
-
-        return feats
-
-
-class SpatialOCRModule(fluid.dygraph.Layer):
-    def __init__(self,
-                 in_channels,
-                 key_channels,
-                 out_channels,
-                 dropout_rate=0.1):
-        super(SpatialOCRModule, self).__init__()
-
-        self.attention_block = ObjectAttentionBlock(in_channels, key_channels)
-        self.dropout_rate = dropout_rate
-        self.conv1x1 = Conv2D(2 * in_channels, out_channels, 1)
-
-    def forward(self, pixels, regions):
-        context = self.attention_block(pixels, regions)
-        feats = fluid.layers.concat([context, pixels], axis=1)
-
-        feats = self.conv1x1(feats)
-        feats = fluid.layers.dropout(feats, self.dropout_rate)
-
-        return feats
-
-
-class ObjectAttentionBlock(fluid.dygraph.Layer):
-    def __init__(self, in_channels, key_channels):
-        super(ObjectAttentionBlock, self).__init__()
-
-        self.in_channels = in_channels
-        self.key_channels = key_channels
-
-        self.f_pixel = Sequential(
-            ConvBnRelu(in_channels, key_channels, 1),
-            ConvBnRelu(key_channels, key_channels, 1))
-
-        self.f_object = Sequential(
-            ConvBnRelu(in_channels, key_channels, 1),
-            ConvBnRelu(key_channels, key_channels, 1))
-
-        self.f_down = ConvBnRelu(in_channels, key_channels, 1)
-
-        self.f_up = ConvBnRelu(key_channels, in_channels, 1)
-
-    def forward(self, x, proxy):
-        n, _, h, w = x.shape
-
-        # query : from (n, c1, h1, w1) to (n, h1*w1, key_channels)
-        query = self.f_pixel(x)
-        query = fluid.layers.reshape(query, (n, self.key_channels, -1))
-        query = fluid.layers.transpose(query, (0, 2, 1))
-
-        # key : from (n, c2, h2, w2) to (n, key_channels, h2*w2)
-        key = self.f_object(proxy)
-        key = fluid.layers.reshape(key, (n, self.key_channels, -1))
-
-        # value : from (n, c2, h2, w2) to (n, h2*w2, key_channels)
-        value = self.f_down(proxy)
-        value = fluid.layers.reshape(value, (n, self.key_channels, -1))
-        value = fluid.layers.transpose(value, (0, 2, 1))
-
-        # sim_map (n, h1*w1, h2*w2)
-        sim_map = fluid.layers.matmul(query, key)
-        sim_map = (self.key_channels**-.5) * sim_map
-        sim_map = fluid.layers.softmax(sim_map, axis=-1)
-
-        # context from (n, h1*w1, key_channels) to (n , out_channels, h1, w1)
-        context = fluid.layers.matmul(sim_map, value)
-        context = fluid.layers.transpose(context, (0, 2, 1))
-        context = fluid.layers.reshape(context, (n, self.key_channels, h, w))
-        context = self.f_up(context)
-
-        return context
-
-
-@manager.MODELS.add_component
-class OCRNet(fluid.dygraph.Layer):
-    def __init__(self,
-                 num_classes,
-                 backbone,
-                 model_pretrained=None,
-                 in_channels=None,
-                 ocr_mid_channels=512,
-                 ocr_key_channels=256,
-                 ignore_index=255):
-        super(OCRNet, self).__init__()
-
-        self.ignore_index = ignore_index
-        self.num_classes = num_classes
-        self.EPS = 1e-5
-
-        self.backbone = backbone
-        self.spatial_gather = SpatialGatherBlock()
-        self.spatial_ocr = SpatialOCRModule(ocr_mid_channels, ocr_key_channels,
-                                            ocr_mid_channels)
-        self.conv3x3_ocr = ConvBnRelu(
-            in_channels, ocr_mid_channels, 3, padding=1)
-        self.cls_head = Conv2D(ocr_mid_channels, self.num_classes, 1)
-
-        self.aux_head = Sequential(
-            ConvBnRelu(in_channels, in_channels, 3, padding=1),
-            Conv2D(in_channels, self.num_classes, 1))
-
-        self.init_weight(model_pretrained)
-
-    def forward(self, x, label=None):
-        feats = self.backbone(x)
-
-        soft_regions = self.aux_head(feats)
-        pixels = self.conv3x3_ocr(feats)
-
-        object_regions = self.spatial_gather(pixels, soft_regions)
-        ocr = self.spatial_ocr(pixels, object_regions)
-
-        logit = self.cls_head(ocr)
-        logit = fluid.layers.resize_bilinear(logit, x.shape[2:])
-
-        if self.training:
-            soft_regions = fluid.layers.resize_bilinear(soft_regions,
-                                                        x.shape[2:])
-            cls_loss = self._get_loss(logit, label)
-            aux_loss = self._get_loss(soft_regions, label)
-            return cls_loss + 0.4 * aux_loss
-
-        score_map = fluid.layers.softmax(logit, axis=1)
-        score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1])
-        pred = fluid.layers.argmax(score_map, axis=3)
-        pred = fluid.layers.unsqueeze(pred, axes=[3])
-        return pred, score_map
-
-    def init_weight(self, pretrained_model=None):
-        """
-        Initialize the parameters of model parts.
-        Args:
-            pretrained_model ([str], optional): the path of pretrained model.. Defaults to None.
-        """
-        if pretrained_model is not None:
-            if os.path.exists(pretrained_model):
-                utils.load_pretrained_model(self, pretrained_model)
-            else:
-                raise Exception('Pretrained model is not found: {}'.format(
-                    pretrained_model))
-
-    def _get_loss(self, logit, label):
-        """
-        compute forward loss of the model
-
-        Args:
-            logit (tensor): the logit of model output
-            label (tensor): ground truth
-
-        Returns:
-            avg_loss (tensor): forward loss
-        """
-        logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
-        label = fluid.layers.transpose(label, [0, 2, 3, 1])
-        mask = label != self.ignore_index
-        mask = fluid.layers.cast(mask, 'float32')
-        loss, probs = fluid.layers.softmax_with_cross_entropy(
-            logit,
-            label,
-            ignore_index=self.ignore_index,
-            return_softmax=True,
-            axis=-1)
-
-        loss = loss * mask
-        avg_loss = fluid.layers.mean(loss) / (
-            fluid.layers.mean(mask) + self.EPS)
-
-        label.stop_gradient = True
-        mask.stop_gradient = True
-
-        return avg_loss
diff --git a/dygraph/models/pspnet.py b/dygraph/models/pspnet.py
deleted file mode 100644
index 0e376e21ca7d6c57b2d0b121e82a3ca0f5a57c10..0000000000000000000000000000000000000000
--- a/dygraph/models/pspnet.py
+++ /dev/null
@@ -1,246 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-import paddle.nn.functional as F
-from paddle import fluid
-from paddle.fluid.dygraph import Conv2D
-
-from dygraph.cvlibs import manager
-from dygraph.models import model_utils
-from dygraph.models.architectures import layer_utils
-from dygraph.utils import utils
-
-
-class PSPNet(fluid.dygraph.Layer):
-    """
-    The PSPNet implementation
-
-    The orginal artile refers to
-        Zhao, Hengshuang, et al. "Pyramid scene parsing network."
-        Proceedings of the IEEE conference on computer vision and pattern recognition. 2017.
-        (https://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf)
-
-    Args:
-        num_classes (int): the unique number of target classes.
-
-        backbone (Paddle.nn.Layer): backbone name, currently support Resnet50/101.
-
-        model_pretrained (str): the path of pretrained model.
-
-        output_stride (int): the ratio of input size and final feature size. Default 16.
-
-        backbone_indices (tuple): two values in the tuple indicte the indices of output of backbone.
-                        the first index will be taken as a deep-supervision feature in auxiliary layer;
-                        the second one will be taken as input of Pyramid Pooling Module (PPModule).
-                        Usually backbone consists of four downsampling stage, and return an output of
-                        each stage, so we set default (2, 3), which means taking feature map of the third
-                        stage (res4b22) in backbone, and feature map of the fourth stage (res5c) as input of PPModule.
-
-        backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index.
-
-        pp_out_channels (int): output channels after Pyramid Pooling Module. Default to 1024.
-
-        bin_sizes (tuple): the out size of pooled feature maps. Default to (1,2,3,6).
-
-        enable_auxiliary_loss (bool): a bool values indictes whether adding auxiliary loss. Default to True.
-
-        ignore_index (int): the value of ground-truth mask would be ignored while doing evaluation. Default to 255.
-    """
-
-    def __init__(self,
-                 num_classes,
-                 backbone,
-                 model_pretrained=None,
-                 output_stride=16,
-                 backbone_indices=(2, 3),
-                 backbone_channels=(1024, 2048),
-                 pp_out_channels=1024,
-                 bin_sizes=(1, 2, 3, 6),
-                 enable_auxiliary_loss=True,
-                 ignore_index=255):
-
-        super(PSPNet, self).__init__()
-        # self.backbone = manager.BACKBONES[backbone](output_stride=output_stride,
-        #                                             multi_grid=(1, 1, 1))
-        self.backbone = backbone
-        self.backbone_indices = backbone_indices
-
-        self.psp_module = PPModule(
-            in_channels=backbone_channels[1],
-            out_channels=pp_out_channels,
-            bin_sizes=bin_sizes)
-
-        self.conv = Conv2D(
-            num_channels=pp_out_channels,
-            num_filters=num_classes,
-            filter_size=1)
-
-        if enable_auxiliary_loss:
-            self.fcn_head = model_utils.FCNHead(
-                in_channels=backbone_channels[0], out_channels=num_classes)
-
-        self.enable_auxiliary_loss = enable_auxiliary_loss
-        self.ignore_index = ignore_index
-
-        self.init_weight(model_pretrained)
-
-    def forward(self, input, label=None):
-
-        _, feat_list = self.backbone(input)
-
-        x = feat_list[self.backbone_indices[1]]
-        x = self.psp_module(x)
-        x = F.dropout(x, dropout_prob=0.1)
-        logit = self.conv(x)
-        logit = fluid.layers.resize_bilinear(logit, input.shape[2:])
-
-        if self.enable_auxiliary_loss:
-            auxiliary_feat = feat_list[self.backbone_indices[0]]
-            auxiliary_logit = self.fcn_head(auxiliary_feat)
-            auxiliary_logit = fluid.layers.resize_bilinear(
-                auxiliary_logit, input.shape[2:])
-
-        if self.training:
-            loss = model_utils.get_loss(logit, label)
-            if self.enable_auxiliary_loss:
-                auxiliary_loss = model_utils.get_loss(auxiliary_logit, label)
-                loss += (0.4 * auxiliary_loss)
-            return loss
-
-        else:
-            pred, score_map = model_utils.get_pred_score_map(logit)
-            return pred, score_map
-
-    def init_weight(self, pretrained_model=None):
-        """
-        Initialize the parameters of model parts.
-        Args:
-            pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
-        """
-        if pretrained_model is not None:
-            if os.path.exists(pretrained_model):
-                utils.load_pretrained_model(self, pretrained_model)
-            else:
-                raise Exception('Pretrained model is not found: {}'.format(
-                    pretrained_model))
-
-
-class PPModule(fluid.dygraph.Layer):
-    """
-    Pyramid pooling module
-
-    Args:
-        in_channels (int): the number of intput channels to pyramid pooling module.
-
-        out_channels (int): the number of output channels after pyramid pooling module.
-
-        bin_sizes (tuple): the out size of pooled feature maps. Default to (1,2,3,6).
-
-        dim_reduction (bool): a bool value represent if reduing dimention after pooling. Default to True.
-    """
-
-    def __init__(self, in_channels, out_channels, bin_sizes=(1, 2, 3, 6), dim_reduction=True):
-        super(PPModule, self).__init__()
-        self.bin_sizes = bin_sizes
-
-        inter_channels = in_channels
-        if dim_reduction:
-            inter_channels = in_channels // len(bin_sizes)
-        
-        # we use dimension reduction after pooling mentioned in original implementation.
-        self.stages = fluid.dygraph.LayerList([self._make_stage(in_channels, inter_channels, size) for size in bin_sizes])
-
-        self.conv_bn_relu2 = layer_utils.ConvBnRelu(num_channels=in_channels + inter_channels * len(bin_sizes),
-                                                    num_filters=out_channels,
-                                                    filter_size=3,
-                                                    padding=1)
-
-    def _make_stage(self, in_channels, out_channels, size):
-        """
-        Create one pooling layer.
-
-        In our implementation, we adopt the same dimention reduction as the original paper that might be
-        slightly different with other implementations.
-
-        After pooling, the channels are reduced to 1/len(bin_sizes) immediately, while some other implementations
-        keep the channels to be same.
-
-
-        Args:
-            in_channels (int): the number of intput channels to pyramid pooling module.
-
-            size (int): the out size of the pooled layer.
-
-        Returns:
-            conv (tensor): a tensor after Pyramid Pooling Module
-        """
-
-        # this paddle version does not support AdaptiveAvgPool2d, so skip it here.
-        # prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
-        conv = layer_utils.ConvBnRelu(num_channels=in_channels,
-                                      num_filters=out_channels,
-                                      filter_size=1)
-
-        return conv
-
-    def forward(self, input):
-        cat_layers = []
-        for i, stage in enumerate(self.stages):
-            size = self.bin_sizes[i]
-            x = fluid.layers.adaptive_pool2d(
-                input, pool_size=(size, size), pool_type="max")
-            x = stage(x)
-            x = fluid.layers.resize_bilinear(x, out_shape=input.shape[2:])
-            cat_layers.append(x)
-        cat_layers = [input] + cat_layers[::-1]
-        cat = fluid.layers.concat(cat_layers, axis=1)
-        out = self.conv_bn_relu2(cat)
-
-        return out
-
-
-@manager.MODELS.add_component
-def pspnet_resnet101_vd(*args, **kwargs):
-    pretrained_model = None
-    return PSPNet(
-        backbone='ResNet101_vd', pretrained_model=pretrained_model, **kwargs)
-
-
-@manager.MODELS.add_component
-def pspnet_resnet101_vd_os8(*args, **kwargs):
-    pretrained_model = None
-    return PSPNet(
-        backbone='ResNet101_vd',
-        output_stride=8,
-        pretrained_model=pretrained_model,
-        **kwargs)
-
-
-@manager.MODELS.add_component
-def pspnet_resnet50_vd(*args, **kwargs):
-    pretrained_model = None
-    return PSPNet(
-        backbone='ResNet50_vd', pretrained_model=pretrained_model, **kwargs)
-
-
-@manager.MODELS.add_component
-def pspnet_resnet50_vd_os8(*args, **kwargs):
-    pretrained_model = None
-    return PSPNet(
-        backbone='ResNet50_vd',
-        output_stride=8,
-        pretrained_model=pretrained_model,
-        **kwargs)
diff --git a/dygraph/models/unet.py b/dygraph/models/unet.py
deleted file mode 100644
index e2a7c007caa68a74deb322cc4d4d8b66a1b75035..0000000000000000000000000000000000000000
--- a/dygraph/models/unet.py
+++ /dev/null
@@ -1,203 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-import paddle.fluid as fluid
-from paddle.fluid.dygraph import Conv2D, Pool2D
-from paddle.nn import SyncBatchNorm as BatchNorm
-
-from dygraph.cvlibs import manager
-from dygraph import utils
-
-
-class UNet(fluid.dygraph.Layer):
-    """
-    U-Net: Convolutional Networks for Biomedical Image Segmentation.
-    https://arxiv.org/abs/1505.04597
-
-    Args:
-        num_classes (int): the unique number of target classes.
-        pretrained_model (str): the path of pretrained model.
-        ignore_index (int): the value of ground-truth mask would be ignored while computing loss or doing evaluation. Default 255.
-    """
-
-    def __init__(self, num_classes, model_pretrained=None, ignore_index=255):
-        super(UNet, self).__init__()
-        self.encode = UnetEncoder()
-        self.decode = UnetDecode()
-        self.get_logit = GetLogit(64, num_classes)
-        self.ignore_index = ignore_index
-        self.EPS = 1e-5
-
-        self.init_weight(model_pretrained)
-
-    def forward(self, x, label=None):
-        encode_data, short_cuts = self.encode(x)
-        decode_data = self.decode(encode_data, short_cuts)
-        logit = self.get_logit(decode_data)
-        if self.training:
-            return self._get_loss(logit, label)
-        else:
-            score_map = fluid.layers.softmax(logit, axis=1)
-            score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1])
-            pred = fluid.layers.argmax(score_map, axis=3)
-            pred = fluid.layers.unsqueeze(pred, axes=[3])
-            return pred, score_map
-
-    def init_weight(self, pretrained_model=None):
-        """
-        Initialize the parameters of model parts.
-        Args:
-            pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
-        """
-        if pretrained_model is not None:
-            if os.path.exists(pretrained_model):
-                utils.load_pretrained_model(self, pretrained_model)
-            else:
-                raise Exception('Pretrained model is not found: {}'.format(
-                    pretrained_model))
-
-    def _get_loss(self, logit, label):
-        logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
-        label = fluid.layers.transpose(label, [0, 2, 3, 1])
-        mask = label != self.ignore_index
-        mask = fluid.layers.cast(mask, 'float32')
-        loss, probs = fluid.layers.softmax_with_cross_entropy(
-            logit,
-            label,
-            ignore_index=self.ignore_index,
-            return_softmax=True,
-            axis=-1)
-
-        loss = loss * mask
-        avg_loss = fluid.layers.mean(loss) / (
-            fluid.layers.mean(mask) + self.EPS)
-
-        label.stop_gradient = True
-        mask.stop_gradient = True
-        return avg_loss
-
-
-class UnetEncoder(fluid.dygraph.Layer):
-    def __init__(self):
-        super(UnetEncoder, self).__init__()
-        self.double_conv = DoubleConv(3, 64)
-        self.down1 = Down(64, 128)
-        self.down2 = Down(128, 256)
-        self.down3 = Down(256, 512)
-        self.down4 = Down(512, 512)
-
-    def forward(self, x):
-        short_cuts = []
-        x = self.double_conv(x)
-        short_cuts.append(x)
-        x = self.down1(x)
-        short_cuts.append(x)
-        x = self.down2(x)
-        short_cuts.append(x)
-        x = self.down3(x)
-        short_cuts.append(x)
-        x = self.down4(x)
-        return x, short_cuts
-
-
-class UnetDecode(fluid.dygraph.Layer):
-    def __init__(self):
-        super(UnetDecode, self).__init__()
-        self.up1 = Up(512, 256)
-        self.up2 = Up(256, 128)
-        self.up3 = Up(128, 64)
-        self.up4 = Up(64, 64)
-
-    def forward(self, x, short_cuts):
-        x = self.up1(x, short_cuts[3])
-        x = self.up2(x, short_cuts[2])
-        x = self.up3(x, short_cuts[1])
-        x = self.up4(x, short_cuts[0])
-        return x
-
-
-class DoubleConv(fluid.dygraph.Layer):
-    def __init__(self, num_channels, num_filters):
-        super(DoubleConv, self).__init__()
-        self.conv0 = Conv2D(
-            num_channels=num_channels,
-            num_filters=num_filters,
-            filter_size=3,
-            stride=1,
-            padding=1)
-        self.bn0 = BatchNorm(num_filters)
-        self.conv1 = Conv2D(
-            num_channels=num_filters,
-            num_filters=num_filters,
-            filter_size=3,
-            stride=1,
-            padding=1)
-        self.bn1 = BatchNorm(num_filters)
-
-    def forward(self, x):
-        x = self.conv0(x)
-        x = self.bn0(x)
-        x = fluid.layers.relu(x)
-        x = self.conv1(x)
-        x = self.bn1(x)
-        x = fluid.layers.relu(x)
-        return x
-
-
-class Down(fluid.dygraph.Layer):
-    def __init__(self, num_channels, num_filters):
-        super(Down, self).__init__()
-        self.max_pool = Pool2D(
-            pool_size=2, pool_type='max', pool_stride=2, pool_padding=0)
-        self.double_conv = DoubleConv(num_channels, num_filters)
-
-    def forward(self, x):
-        x = self.max_pool(x)
-        x = self.double_conv(x)
-        return x
-
-
-class Up(fluid.dygraph.Layer):
-    def __init__(self, num_channels, num_filters):
-        super(Up, self).__init__()
-        self.double_conv = DoubleConv(2 * num_channels, num_filters)
-
-    def forward(self, x, short_cut):
-        short_cut_shape = fluid.layers.shape(short_cut)
-        x = fluid.layers.resize_bilinear(x, short_cut_shape[2:])
-        x = fluid.layers.concat([x, short_cut], axis=1)
-        x = self.double_conv(x)
-        return x
-
-
-class GetLogit(fluid.dygraph.Layer):
-    def __init__(self, num_channels, num_classes):
-        super(GetLogit, self).__init__()
-        self.conv = Conv2D(
-            num_channels=num_channels,
-            num_filters=num_classes,
-            filter_size=3,
-            stride=1,
-            padding=1)
-
-    def forward(self, x):
-        x = self.conv(x)
-        return x
-
-
-@manager.MODELS.add_component
-def unet(*args, **kwargs):
-    return UNet(*args, **kwargs)
diff --git a/dygraph/paddleseg/datasets/rice.py b/dygraph/paddleseg/datasets/rice.py
deleted file mode 100644
index f8041526fa2e265e0eac70709e9c295e860df9ad..0000000000000000000000000000000000000000
--- a/dygraph/paddleseg/datasets/rice.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-from .dataset import Dataset
-
-
-class Rice(Dataset):
-    def __init__(self, transforms=None, mode='train', download=True):
-        self.data_dir = "/mnt/liuyi22/PaddlePaddle/POC/rice_dataset"
-        self.transforms = transforms
-        self.file_list = list()
-        self.mode = mode
-        self.num_classes = 2
-
-        if mode.lower() not in ['train', 'eval', 'test']:
-            raise Exception(
-                "mode should be 'train', 'eval' or 'test', but got {}.".format(
-                    mode))
-
-        if self.transforms is None:
-            raise Exception("transform is necessary, but it is None.")
-
-        if mode == 'train':
-            file_list = os.path.join(self.data_dir, 'train_list.txt')
-        elif mode == 'eval':
-            file_list = os.path.join(self.data_dir, 'val_list.txt')
-        else:
-            file_list = os.path.join(self.data_dir, 'test_list.txt')
-
-        with open(file_list, 'r') as f:
-            for line in f:
-                items = line.strip().split()
-                if len(items) != 2:
-                    if mode == 'train' or mode == 'eval':
-                        raise Exception(
-                            "File list format incorrect! It should be"
-                            " image_name label_name\\n")
-                    image_path = os.path.join(self.data_dir, items[0])
-                    grt_path = None
-                else:
-                    image_path = os.path.join(self.data_dir, items[0])
-                    grt_path = os.path.join(self.data_dir, items[1])
-                self.file_list.append([image_path, grt_path])
diff --git a/dygraph/paddleseg/models/ann.py b/dygraph/paddleseg/models/ann.py
index 48c381d26308ac6c6632abcd202b84409e22e7f7..3cde299280d7a39edb14787502ad440acac12ea5 100644
--- a/dygraph/paddleseg/models/ann.py
+++ b/dygraph/paddleseg/models/ann.py
@@ -17,8 +17,9 @@ import os
 import paddle
 import paddle.nn.functional as F
 from paddle import nn
+
 from paddleseg.cvlibs import manager
-from paddleseg.models.common import layer_utils, model_utils
+from paddleseg.models.common import layer_libs
 from paddleseg.utils import utils
 
 
@@ -88,7 +89,7 @@ class ANN(nn.Layer):
             psp_size=psp_size)
 
         self.context = nn.Sequential(
-            layer_utils.ConvBnRelu(
+            layer_libs.ConvBnRelu(
                 in_channels=high_in_channels,
                 out_channels=inter_channels,
                 kernel_size=3,
@@ -106,7 +107,7 @@ class ANN(nn.Layer):
             in_channels=inter_channels,
             out_channels=num_classes,
             kernel_size=1)
-        self.auxlayer = model_utils.AuxLayer(
+        self.auxlayer = layer_libs.AuxLayer(
             in_channels=low_in_channels,
             inter_channels=low_in_channels // 2,
             out_channels=num_classes,
@@ -189,7 +190,7 @@ class AFNB(nn.Layer):
                                     key_channels, value_channels, out_channels,
                                     size) for size in sizes
         ])
-        self.conv_bn = layer_utils.ConvBn(
+        self.conv_bn = layer_libs.ConvBn(
             in_channels=out_channels + high_in_channels,
             out_channels=out_channels,
             kernel_size=1)
@@ -243,7 +244,7 @@ class APNB(nn.Layer):
             SelfAttentionBlock_APNB(in_channels, out_channels, key_channels,
                                     value_channels, size) for size in sizes
         ])
-        self.conv_bn = layer_utils.ConvBnRelu(
+        self.conv_bn = layer_libs.ConvBnRelu(
             in_channels=in_channels * 2,
             out_channels=out_channels,
             kernel_size=1)
@@ -310,11 +311,11 @@ class SelfAttentionBlock_AFNB(nn.Layer):
         if out_channels == None:
             self.out_channels = high_in_channels
         self.pool = nn.Pool2D(pool_size=(scale, scale), pool_type="max")
-        self.f_key = layer_utils.ConvBnRelu(
+        self.f_key = layer_libs.ConvBnRelu(
             in_channels=low_in_channels,
             out_channels=key_channels,
             kernel_size=1)
-        self.f_query = layer_utils.ConvBnRelu(
+        self.f_query = layer_libs.ConvBnRelu(
             in_channels=high_in_channels,
             out_channels=key_channels,
             kernel_size=1)
@@ -393,7 +394,7 @@ class SelfAttentionBlock_APNB(nn.Layer):
         self.value_channels = value_channels
 
         self.pool = nn.Pool2D(pool_size=(scale, scale), pool_type="max")
-        self.f_key = layer_utils.ConvBnRelu(
+        self.f_key = layer_libs.ConvBnRelu(
             in_channels=self.in_channels,
             out_channels=self.key_channels,
             kernel_size=1)
diff --git a/dygraph/paddleseg/models/backbones/mobilenetv3.py b/dygraph/paddleseg/models/backbones/mobilenetv3.py
index 6204d7733a45326a70b7cbc423820b987b046708..ac1778ad207945b96d7aacbd88691e5910d4d3b4 100644
--- a/dygraph/paddleseg/models/backbones/mobilenetv3.py
+++ b/dygraph/paddleseg/models/backbones/mobilenetv3.py
@@ -27,7 +27,7 @@ from paddle.fluid.layer_helper import LayerHelper
 from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
 from paddle.nn import SyncBatchNorm as BatchNorm
 
-from paddleseg.models.common import layer_utils
+from paddleseg.models.common import layer_libs
 from paddleseg.cvlibs import manager
 from paddleseg.utils import utils
 
diff --git a/dygraph/paddleseg/models/backbones/resnet_vd.py b/dygraph/paddleseg/models/backbones/resnet_vd.py
index d7dfc66fd5dc44a6a27c04eea73dc692f857c61c..787f6a3b48bd7ee2bf2e0d31ac62f15b704b3e15 100644
--- a/dygraph/paddleseg/models/backbones/resnet_vd.py
+++ b/dygraph/paddleseg/models/backbones/resnet_vd.py
@@ -28,7 +28,7 @@ from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
 from paddle.nn import SyncBatchNorm as BatchNorm
 
 from paddleseg.utils import utils
-from paddleseg.models.common import layer_utils
+from paddleseg.models.common import layer_libs, activation
 from paddleseg.cvlibs import manager
 
 __all__ = [
@@ -77,7 +77,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
             num_filters,
             weight_attr=ParamAttr(name=bn_name + '_scale'),
             bias_attr=ParamAttr(bn_name + '_offset'))
-        self._act_op = layer_utils.Activation(act=act)
+        self._act_op = activation.Activation(act=act)
 
     def forward(self, inputs):
         if self.is_vd_mode:
@@ -213,7 +213,7 @@ class ResNet_vd(fluid.dygraph.Layer):
                  layers=50,
                  class_dim=1000,
                  output_stride=None,
-                 multi_grid=(1, 2, 4)):
+                 multi_grid=(1, 1, 1)):
         super(ResNet_vd, self).__init__()
 
         self.layers = layers
diff --git a/dygraph/paddleseg/models/backbones/xception_deeplab.py b/dygraph/paddleseg/models/backbones/xception_deeplab.py
index f512e31ab372b8bc453d8d0506bbc45839a08d27..b07d3ac1271baadeb199d1fb39feb70f45f91e23 100644
--- a/dygraph/paddleseg/models/backbones/xception_deeplab.py
+++ b/dygraph/paddleseg/models/backbones/xception_deeplab.py
@@ -21,7 +21,7 @@ from paddle.fluid.layer_helper import LayerHelper
 from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
 from paddle.nn import SyncBatchNorm as BatchNorm
 
-from paddleseg.models.common import layer_utils
+from paddleseg.models.common import layer_libs
 from paddleseg.cvlibs import manager
 from paddleseg.utils import utils
 
diff --git a/dygraph/paddleseg/models/common/__init__.py b/dygraph/paddleseg/models/common/__init__.py
index 9f30b50f2fc80c9effd59dbf3c134de66de04c44..33b2611df67fbfe22604512c1f0d03c9012cb3b4 100644
--- a/dygraph/paddleseg/models/common/__init__.py
+++ b/dygraph/paddleseg/models/common/__init__.py
@@ -13,5 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from . import layer_utils
-from . import model_utils
\ No newline at end of file
+from . import layer_libs
+from . import activation
+from . import pyramid_pool
\ No newline at end of file
diff --git a/dygraph/paddleseg/models/common/activation.py b/dygraph/paddleseg/models/common/activation.py
new file mode 100644
index 0000000000000000000000000000000000000000..69af72e0ea96cd389e48511ff7f7d4bee8680a8a
--- /dev/null
+++ b/dygraph/paddleseg/models/common/activation.py
@@ -0,0 +1,60 @@
+# -*- encoding: utf-8 -*-
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle import nn
+from paddle.nn.layer import activation
+
+
+class Activation(nn.Layer):
+    """
+    The wrapper of activations
+    For example:
+        >>> relu = Activation("relu")
+        >>> print(relu)
+        <class 'paddle.nn.layer.activation.ReLU'>
+        >>> sigmoid = Activation("sigmoid")
+        >>> print(sigmoid)
+        <class 'paddle.nn.layer.activation.Sigmoid'>
+        >>> not_exit_one = Activation("not_exit_one")
+        KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', 
+        'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', 
+        'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])"
+
+    Args:
+        act (str): the activation name in lowercase
+    """
+
+    def __init__(self, act=None):
+        super(Activation, self).__init__()
+
+        self._act = act
+        upper_act_names = activation.__all__
+        lower_act_names = [act.lower() for act in upper_act_names]
+        act_dict = dict(zip(lower_act_names, upper_act_names))
+
+        if act is not None:
+            if act in act_dict.keys():
+                act_name = act_dict[act]
+                self.act_func = eval("activation.{}()".format(act_name))
+            else:
+                raise KeyError("{} does not exist in the current {}".format(
+                    act, act_dict.keys()))
+
+    def forward(self, x):
+
+        if self._act is not None:
+            return self.act_func(x)
+        else:
+            return x
\ No newline at end of file
diff --git a/dygraph/paddleseg/models/common/layer_utils.py b/dygraph/paddleseg/models/common/layer_libs.py
similarity index 59%
rename from dygraph/paddleseg/models/common/layer_utils.py
rename to dygraph/paddleseg/models/common/layer_libs.py
index 8d41ebb130cbbca11feebfd87e030628ea44cd27..8da38bcae5efb9960a012a58dac747136e81941a 100644
--- a/dygraph/paddleseg/models/common/layer_utils.py
+++ b/dygraph/paddleseg/models/common/layer_libs.py
@@ -70,18 +70,6 @@ class ConvReluPool(nn.Layer):
         return x
 
 
-# class ConvBnReluUpsample(nn.Layer):
-#     def __init__(self, in_channels, out_channels):
-#         super(ConvBnReluUpsample, self).__init__()
-#         self.conv_bn_relu = ConvBnRelu(in_channels, out_channels)
-
-#     def forward(self, x, upsample_scale=2):
-#         x = self.conv_bn_relu(x)
-#         new_shape = [x.shape[2] * upsample_scale, x.shape[3] * upsample_scale]
-#         x = F.resize_bilinear(x, new_shape)
-#         return x
-
-
 class DepthwiseConvBnRelu(nn.Layer):
     def __init__(self, in_channels, out_channels, kernel_size, **kwargs):
         super(DepthwiseConvBnRelu, self).__init__()
@@ -100,44 +88,43 @@ class DepthwiseConvBnRelu(nn.Layer):
         return x
 
 
-class Activation(nn.Layer):
+class AuxLayer(nn.Layer):
     """
-    The wrapper of activations
-    For example:
-        >>> relu = Activation("relu")
-        >>> print(relu)
-        <class 'paddle.nn.layer.activation.ReLU'>
-        >>> sigmoid = Activation("sigmoid")
-        >>> print(sigmoid)
-        <class 'paddle.nn.layer.activation.Sigmoid'>
-        >>> not_exit_one = Activation("not_exit_one")
-        KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', 
-        'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', 
-        'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])"
+    The auxilary layer implementation for auxilary loss
 
     Args:
-        act (str): the activation name in lowercase
+        in_channels (int): the number of input channels.
+
+        inter_channels (int): intermediate channels.
+
+        out_channels (int): the number of output channels, which is usually num_classes.
+
+        dropout_prob (float): the droput rate. Default to 0.1.
     """
 
-    def __init__(self, act=None):
-        super(Activation, self).__init__()
+    def __init__(self,
+                 in_channels,
+                 inter_channels,
+                 out_channels,
+                 dropout_prob=0.1):
+        super(AuxLayer, self).__init__()
+
+        self.conv_bn_relu = ConvBnRelu(
+            in_channels=in_channels,
+            out_channels=inter_channels,
+            kernel_size=3,
+            padding=1)
 
-        self._act = act
-        upper_act_names = activation.__all__
-        lower_act_names = [act.lower() for act in upper_act_names]
-        act_dict = dict(zip(lower_act_names, upper_act_names))
+        self.conv = nn.Conv2d(
+            in_channels=inter_channels,
+            out_channels=out_channels,
+            kernel_size=1)
 
-        if act is not None:
-            if act in act_dict.keys():
-                act_name = act_dict[act]
-                self.act_func = eval("activation.{}()".format(act_name))
-            else:
-                raise KeyError("{} does not exist in the current {}".format(
-                    act, act_dict.keys()))
+        self.dropout_prob = dropout_prob
 
     def forward(self, x):
+        x = self.conv_bn_relu(x)
+        x = F.dropout(x, p=self.dropout_prob)
+        x = self.conv(x)
+        return x
 
-        if self._act is not None:
-            return self.act_func(x)
-        else:
-            return x
diff --git a/dygraph/paddleseg/models/common/model_utils.py b/dygraph/paddleseg/models/common/pyramid_pool.py
similarity index 58%
rename from dygraph/paddleseg/models/common/model_utils.py
rename to dygraph/paddleseg/models/common/pyramid_pool.py
index 7de39c8e77fad0021d3e910a9c02f3f6d774c32d..a69eb0f60ff11e5413f813b62183f81a84e2c232 100644
--- a/dygraph/paddleseg/models/common/model_utils.py
+++ b/dygraph/paddleseg/models/common/pyramid_pool.py
@@ -13,85 +13,96 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+
 import paddle
 from paddle import nn
 import paddle.nn.functional as F
 from paddle.nn import SyncBatchNorm as BatchNorm
 
-from paddleseg.models.common import layer_utils
+from paddleseg.models.common import layer_libs
 
 
-class FCNHead(nn.Layer):
+class ASPPModule(nn.Layer):
     """
-    The FCNHead implementation used in auxilary layer
+     Atrous Spatial Pyramid Pooling
 
     Args:
-        in_channels (int): the number of input channels
-        out_channels (int): the number of output channels
-    """
+        aspp_ratios (tuple): the dilation rate using in ASSP module.
 
-    def __init__(self, in_channels, out_channels):
-        super(FCNHead, self).__init__()
-
-        inter_channels = in_channels // 4
-        self.conv_bn_relu = layer_utils.ConvBnRelu(
-            in_channels=in_channels,
-            out_channels=inter_channels,
-            kernel_size=3,
-            padding=1)
-
-        self.conv = nn.Conv2d(
-            in_channels=inter_channels,
-            out_channels=out_channels,
-            kernel_size=1)
+        in_channels (int): the number of input channels.
 
-    def forward(self, x):
-        x = self.conv_bn_relu(x)
-        x = F.dropout(x, p=0.1)
-        x = self.conv(x)
-        return x
+        out_channels (int): the number of output channels.
 
+        sep_conv (bool): if using separable conv in ASPP module.
 
-class AuxLayer(nn.Layer):
-    """
-    The auxilary layer implementation for auxilary loss
+        image_pooling: if augmented with image-level features.
 
-    Args:
-        in_channels (int): the number of input channels.
-        inter_channels (int): intermediate channels.
-        out_channels (int): the number of output channels, which is usually num_classes.
     """
 
-    def __init__(self,
-                 in_channels,
-                 inter_channels,
-                 out_channels,
-                 dropout_prob=0.1):
-        super(AuxLayer, self).__init__()
-
-        self.conv_bn_relu = layer_utils.ConvBnRelu(
-            in_channels=in_channels,
-            out_channels=inter_channels,
-            kernel_size=3,
-            padding=1)
-
-        self.conv = nn.Conv2d(
-            in_channels=inter_channels,
-            out_channels=out_channels,
+    def __init__(self, 
+                 aspp_ratios, 
+                 in_channels, 
+                 out_channels, 
+                 sep_conv=False, 
+                 image_pooling=False):
+        super(ASPPModule, self).__init__()
+
+        self.aspp_blocks = []
+
+        for ratio in aspp_ratios:
+
+            if sep_conv and ratio > 1:
+                conv_func = layer_libs.DepthwiseConvBnRelu
+            else:
+                conv_func = layer_libs.ConvBnRelu
+
+            block = conv_func(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=1 if ratio == 1 else 3,
+                dilation=ratio,
+                padding=0 if ratio == 1 else ratio
+            )
+            self.aspp_blocks.append(block)
+        
+        out_size = len(self.aspp_blocks)
+
+        if image_pooling:
+            self.global_avg_pool = nn.Sequential(
+                nn.AdaptiveAvgPool2d(output_size=(1, 1)),
+                layer_libs.ConvBnRelu(in_channels, out_channels, kernel_size=1, bias_attr=False)
+            )
+            out_size += 1
+        self.image_pooling = image_pooling
+
+        self.conv_bn_relu = layer_libs.ConvBnRelu(
+            in_channels=out_channels * out_size, 
+            out_channels=out_channels, 
             kernel_size=1)
 
-        self.dropout_prob = dropout_prob
+        self.dropout = nn.Dropout(p=0.1) # drop rate
 
     def forward(self, x):
+
+        outputs = []
+        for block in self.aspp_blocks:
+            outputs.append(block(x))
+        
+        if self.image_pooling:
+            img_avg = self.global_avg_pool(x)
+            img_avg = F.resize_bilinear(img_avg, out_shape=x.shape[2:])
+            outputs.append(img_avg)
+
+        x = paddle.concat(outputs, axis=1)
         x = self.conv_bn_relu(x)
-        x = F.dropout(x, p=self.dropout_prob)
-        x = self.conv(x)
-        return x
+        x = self.dropout(x)
 
+        return x
+        
 
 class PPModule(nn.Layer):
     """
-    Pyramid pooling module
+    Pyramid pooling module orginally in PSPNet
 
     Args:
         in_channels (int): the number of intput channels to pyramid pooling module.
@@ -109,6 +120,7 @@ class PPModule(nn.Layer):
                  bin_sizes=(1, 2, 3, 6),
                  dim_reduction=True):
         super(PPModule, self).__init__()
+
         self.bin_sizes = bin_sizes
 
         inter_channels = in_channels
@@ -121,7 +133,7 @@ class PPModule(nn.Layer):
             for size in bin_sizes
         ])
 
-        self.conv_bn_relu2 = layer_utils.ConvBnRelu(
+        self.conv_bn_relu2 = layer_libs.ConvBnRelu(
             in_channels=in_channels + inter_channels * len(bin_sizes),
             out_channels=out_channels,
             kernel_size=3,
@@ -147,24 +159,21 @@ class PPModule(nn.Layer):
             conv (tensor): a tensor after Pyramid Pooling Module
         """
 
-        # this paddle version does not support AdaptiveAvgPool2d, so skip it here.
-        # prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
-        conv = layer_utils.ConvBnRelu(
+        prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
+        conv = layer_libs.ConvBnRelu(
             in_channels=in_channels, out_channels=out_channels, kernel_size=1)
 
-        return conv
+        return nn.Sequential(prior, conv)
 
     def forward(self, input):
         cat_layers = []
         for i, stage in enumerate(self.stages):
             size = self.bin_sizes[i]
-            x = F.adaptive_pool2d(
-                input, pool_size=(size, size), pool_type="max")
-            x = stage(x)
+            x = stage(input)
             x = F.resize_bilinear(x, out_shape=input.shape[2:])
             cat_layers.append(x)
         cat_layers = [input] + cat_layers[::-1]
         cat = paddle.concat(cat_layers, axis=1)
         out = self.conv_bn_relu2(cat)
 
-        return out
+        return out
\ No newline at end of file
diff --git a/dygraph/paddleseg/models/deeplab.py b/dygraph/paddleseg/models/deeplab.py
index 7c7e0cb187808baa8c7543d8eda7773a53c5b0fc..ff530b2fa5ad2f08700a1d9ba7f2b7c81a09015a 100644
--- a/dygraph/paddleseg/models/deeplab.py
+++ b/dygraph/paddleseg/models/deeplab.py
@@ -18,7 +18,7 @@ import paddle
 import paddle.nn.functional as F
 from paddle import nn
 from paddleseg.cvlibs import manager
-from paddleseg.models.common import layer_utils
+from paddleseg.models.common import pyramid_pool, layer_libs
 from paddleseg.utils import utils
 
 __all__ = ['DeepLabV3P', 'DeepLabV3']
@@ -43,8 +43,9 @@ class DeepLabV3P(nn.Layer):
 
         model_pretrained (str): the path of pretrained model.
 
-        output_stride (int): the ratio of input size and final feature size. 
-        Support 16 or 8. Default to 16.
+        aspp_ratios (tuple): the dilation rate using in ASSP module.
+        if output_stride=16, aspp_ratios should be set as (1, 6, 12, 18).
+        if output_stride=8, aspp_ratios is (1, 12, 24, 36).
 
         backbone_indices (tuple): two values in the tuple indicte the indices of output of backbone.
                         the first index will be taken as a low-level feature in Deconder component;
@@ -61,18 +62,24 @@ class DeepLabV3P(nn.Layer):
     def __init__(self,
                  num_classes,
                  backbone,
+                 backbone_pretrained=None,
                  model_pretrained=None,
                  backbone_indices=(0, 3),
                  backbone_channels=(256, 2048),
-                 output_stride=16):
+                 aspp_ratios=(1, 6, 12, 18),
+                 aspp_out_channels=256):
 
         super(DeepLabV3P, self).__init__()
 
         self.backbone = backbone
-        self.aspp = ASPP(output_stride, backbone_channels[1])
+        self.backbone_pretrained = backbone_pretrained
+        self.model_pretrained = model_pretrained
+        
+        self.aspp = pyramid_pool.ASPPModule(
+            aspp_ratios, backbone_channels[1], aspp_out_channels, sep_conv=True, image_pooling=True)
         self.decoder = Decoder(num_classes, backbone_channels[0])
         self.backbone_indices = backbone_indices
-        self.init_weight(model_pretrained)
+        self.init_weight()
 
     def forward(self, input, label=None):
 
@@ -87,19 +94,17 @@ class DeepLabV3P(nn.Layer):
 
         return logit_list
 
-    def init_weight(self, pretrained_model=None):
+    def init_weight(self):
         """
         Initialize the parameters of model parts.
         Args:
             pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
         """
-        if pretrained_model is not None:
-            if os.path.exists(pretrained_model):
-                utils.load_pretrained_model(self, pretrained_model)
-            else:
-                raise Exception('Pretrained model is not found: {}'.format(
-                    pretrained_model))
-
+        if self.model_pretrained is not None:
+            utils.load_pretrained_model(self, self.model_pretrained)
+        elif self.backbone_pretrained is not None:
+            utils.load_pretrained_model(self.backbone, self.backbone_pretrained)
+           
 
 @manager.MODELS.add_component
 class DeepLabV3(nn.Layer):
@@ -119,15 +124,21 @@ class DeepLabV3(nn.Layer):
     def __init__(self,
                  num_classes,
                  backbone,
+                 backbone_pretrained=None,
                  model_pretrained=None,
                  backbone_indices=(3,),
                  backbone_channels=(2048,),
-                 output_stride=16):
+                 aspp_ratios=(1, 6, 12, 18),
+                 aspp_out_channels=256):
 
         super(DeepLabV3, self).__init__()
 
         self.backbone = backbone
-        self.aspp = ASPP(output_stride, backbone_channels[0])
+
+        self.aspp = pyramid_pool.ASPPModule(
+            aspp_ratios, backbone_channels[0], aspp_out_channels, 
+            sep_conv=False, image_pooling=True)
+
         self.cls = nn.Conv2d(
             in_channels=backbone_channels[0],
             out_channels=num_classes,
@@ -161,98 +172,6 @@ class DeepLabV3(nn.Layer):
                     pretrained_model))
 
 
-class ImageAverage(nn.Layer):
-    """
-    Global average pooling
-
-    Args:
-        in_channels (int): the number of input channels.
-
-    """
-
-    def __init__(self, in_channels):
-        super(ImageAverage, self).__init__()
-        self.conv_bn_relu = layer_utils.ConvBnRelu(
-            in_channels, out_channels=256, kernel_size=1)
-
-    def forward(self, input):
-        x = paddle.reduce_mean(input, dim=[2, 3], keep_dim=True)
-        x = self.conv_bn_relu(x)
-        x = F.resize_bilinear(x, out_shape=input.shape[2:])
-        return x
-
-
-class ASPP(nn.Layer):
-    """
-     Decoder module of DeepLabV3P model
-
-    Args:
-        output_stride (int): the ratio of input size and final feature size. Support 16 or 8.
-
-        in_channels (int): the number of input channels in decoder module.
-
-    """
-
-    def __init__(self, output_stride, in_channels):
-        super(ASPP, self).__init__()
-
-        if output_stride == 16:
-            aspp_ratios = (6, 12, 18)
-        elif output_stride == 8:
-            aspp_ratios = (12, 24, 36)
-        else:
-            raise NotImplementedError(
-                "Only support output_stride is 8 or 16, but received{}".format(
-                    output_stride))
-
-        self.image_average = ImageAverage(in_channels=in_channels)
-
-        # The first aspp using 1*1 conv
-        self.aspp1 = layer_utils.DepthwiseConvBnRelu(
-            in_channels=in_channels, out_channels=256, kernel_size=1)
-
-        # The second aspp using 3*3 (separable) conv at dilated rate aspp_ratios[0]
-        self.aspp2 = layer_utils.DepthwiseConvBnRelu(
-            in_channels=in_channels,
-            out_channels=256,
-            kernel_size=3,
-            dilation=aspp_ratios[0],
-            padding=aspp_ratios[0])
-
-        # The Third aspp using 3*3 (separable) conv at dilated rate aspp_ratios[1]
-        self.aspp3 = layer_utils.DepthwiseConvBnRelu(
-            in_channels=in_channels,
-            out_channels=256,
-            kernel_size=3,
-            dilation=aspp_ratios[1],
-            padding=aspp_ratios[1])
-
-        # The Third aspp using 3*3 (separable) conv at dilated rate aspp_ratios[2]
-        self.aspp4 = layer_utils.DepthwiseConvBnRelu(
-            in_channels=in_channels,
-            out_channels=256,
-            kernel_size=3,
-            dilation=aspp_ratios[2],
-            padding=aspp_ratios[2])
-
-        # After concat op, using 1*1 conv
-        self.conv_bn_relu = layer_utils.ConvBnRelu(
-            in_channels=1280, out_channels=256, kernel_size=1)
-
-    def forward(self, x):
-
-        x1 = self.image_average(x)
-        x2 = self.aspp1(x)
-        x3 = self.aspp2(x)
-        x4 = self.aspp3(x)
-        x5 = self.aspp4(x)
-        x = paddle.concat([x1, x2, x3, x4, x5], axis=1)
-
-        x = self.conv_bn_relu(x)
-        x = F.dropout(x, p=0.1)  # dropout_prob
-        return x
-
-
 class Decoder(nn.Layer):
     """
     Decoder module of DeepLabV3P model
@@ -267,12 +186,12 @@ class Decoder(nn.Layer):
     def __init__(self, num_classes, in_channels):
         super(Decoder, self).__init__()
 
-        self.conv_bn_relu1 = layer_utils.ConvBnRelu(
+        self.conv_bn_relu1 = layer_libs.ConvBnRelu(
             in_channels=in_channels, out_channels=48, kernel_size=1)
 
-        self.conv_bn_relu2 = layer_utils.DepthwiseConvBnRelu(
+        self.conv_bn_relu2 = layer_libs.DepthwiseConvBnRelu(
             in_channels=304, out_channels=256, kernel_size=3, padding=1)
-        self.conv_bn_relu3 = layer_utils.DepthwiseConvBnRelu(
+        self.conv_bn_relu3 = layer_libs.DepthwiseConvBnRelu(
             in_channels=256, out_channels=256, kernel_size=3, padding=1)
         self.conv = nn.Conv2d(
             in_channels=256, out_channels=num_classes, kernel_size=1)
diff --git a/dygraph/paddleseg/models/fast_scnn.py b/dygraph/paddleseg/models/fast_scnn.py
index 434f083e99d5337a51b3581f906b0a1fc518676e..3abbcffc85c52563f32406e27a645a29860a2ac3 100644
--- a/dygraph/paddleseg/models/fast_scnn.py
+++ b/dygraph/paddleseg/models/fast_scnn.py
@@ -15,7 +15,7 @@
 import paddle.nn.functional as F
 from paddle import nn
 from paddleseg.cvlibs import manager
-from paddleseg.models.common import layer_utils, model_utils
+from paddleseg.models.common import layer_libs
 
 
 @manager.MODELS.add_component
@@ -110,15 +110,15 @@ class LearningToDownsample(nn.Layer):
     def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64):
         super(LearningToDownsample, self).__init__()
 
-        self.conv_bn_relu = layer_utils.ConvBnRelu(
+        self.conv_bn_relu = layer_libs.ConvBnRelu(
             in_channels=3, out_channels=dw_channels1, kernel_size=3, stride=2)
-        self.dsconv_bn_relu1 = layer_utils.DepthwiseConvBnRelu(
+        self.dsconv_bn_relu1 = layer_libs.DepthwiseConvBnRelu(
             in_channels=dw_channels1,
             out_channels=dw_channels2,
             kernel_size=3,
             stride=2,
             padding=1)
-        self.dsconv_bn_relu2 = layer_utils.DepthwiseConvBnRelu(
+        self.dsconv_bn_relu2 = layer_libs.DepthwiseConvBnRelu(
             in_channels=dw_channels2,
             out_channels=out_channels,
             kernel_size=3,
@@ -220,13 +220,13 @@ class LinearBottleneck(nn.Layer):
         expand_channels = in_channels * expansion
         self.block = nn.Sequential(
             # pw
-            layer_utils.ConvBnRelu(
+            layer_libs.ConvBnRelu(
                 in_channels=in_channels,
                 out_channels=expand_channels,
                 kernel_size=1,
                 bias_attr=False),
             # dw
-            layer_utils.ConvBnRelu(
+            layer_libs.ConvBnRelu(
                 in_channels=expand_channels,
                 out_channels=expand_channels,
                 kernel_size=3,
@@ -267,7 +267,7 @@ class FeatureFusionModule(nn.Layer):
         super(FeatureFusionModule, self).__init__()
 
         # There only depth-wise conv is used WITHOUT point-wise conv
-        self.dwconv = layer_utils.ConvBnRelu(
+        self.dwconv = layer_libs.ConvBnRelu(
             in_channels=low_in_channels,
             out_channels=out_channels,
             kernel_size=3,
@@ -317,13 +317,13 @@ class Classifier(nn.Layer):
     def __init__(self, input_channels, num_classes):
         super(Classifier, self).__init__()
 
-        self.dsconv1 = layer_utils.DepthwiseConvBnRelu(
+        self.dsconv1 = layer_libs.DepthwiseConvBnRelu(
             in_channels=input_channels,
             out_channels=input_channels,
             kernel_size=3,
             padding=1)
 
-        self.dsconv2 = layer_utils.DepthwiseConvBnRelu(
+        self.dsconv2 = layer_libs.DepthwiseConvBnRelu(
             in_channels=input_channels,
             out_channels=input_channels,
             kernel_size=3,
diff --git a/dygraph/paddleseg/models/gcnet.py b/dygraph/paddleseg/models/gcnet.py
index 97a70d13f6c1f53a6123425f42db1315385d61d1..09a900655b1808b19d0e2dcd751d4ee22769d220 100644
--- a/dygraph/paddleseg/models/gcnet.py
+++ b/dygraph/paddleseg/models/gcnet.py
@@ -18,7 +18,7 @@ import paddle
 import paddle.nn.functional as F
 from paddle import nn
 from paddleseg.cvlibs import manager
-from paddleseg.models.common import layer_utils, model_utils
+from paddleseg.models.common import layer_libs
 from paddleseg.utils import utils
 
 
@@ -72,7 +72,7 @@ class GCNet(nn.Layer):
         self.backbone = backbone
 
         in_channels = backbone_channels[1]
-        self.conv_bn_relu1 = layer_utils.ConvBnRelu(
+        self.conv_bn_relu1 = layer_libs.ConvBnRelu(
             in_channels=in_channels,
             out_channels=gc_channels,
             kernel_size=3,
@@ -80,13 +80,13 @@ class GCNet(nn.Layer):
 
         self.gc_block = GlobalContextBlock(in_channels=gc_channels, ratio=ratio)
 
-        self.conv_bn_relu2 = layer_utils.ConvBnRelu(
+        self.conv_bn_relu2 = layer_libs.ConvBnRelu(
             in_channels=gc_channels,
             out_channels=gc_channels,
             kernel_size=3,
             padding=1)
 
-        self.conv_bn_relu3 = layer_utils.ConvBnRelu(
+        self.conv_bn_relu3 = layer_libs.ConvBnRelu(
             in_channels=in_channels + gc_channels,
             out_channels=gc_channels,
             kernel_size=3,
@@ -96,7 +96,7 @@ class GCNet(nn.Layer):
             in_channels=gc_channels, out_channels=num_classes, kernel_size=1)
 
         if enable_auxiliary_loss:
-            self.auxlayer = model_utils.AuxLayer(
+            self.auxlayer = layer_libs.AuxLayer(
                 in_channels=backbone_channels[0],
                 inter_channels=backbone_channels[0] // 4,
                 out_channels=num_classes)
@@ -161,9 +161,9 @@ class GlobalContextBlock(nn.Layer):
 
         self.conv_mask = nn.Conv2d(
             in_channels=in_channels, out_channels=1, kernel_size=1)
-        # current paddle version does not support Softmax class
-        # self.softmax = layer_utils.Activation("softmax", dim=2)
 
+        self.softmax = nn.Softmax(axis=2)
+        
         inter_channels = int(in_channels * ratio)
         self.channel_add_conv = nn.Sequential(
             nn.Conv2d(
@@ -188,7 +188,7 @@ class GlobalContextBlock(nn.Layer):
         # [N, 1, H * W]
         context_mask = paddle.reshape(
             context_mask, shape=[batch, 1, height * width])
-        context_mask = F.softmax(context_mask)
+        context_mask = self.softmax(context_mask)
         # [N, 1, H * W, 1]
         context_mask = paddle.unsqueeze(context_mask, axis=-1)
         # [N, 1, C, 1]
diff --git a/dygraph/paddleseg/models/ocrnet.py b/dygraph/paddleseg/models/ocrnet.py
index 78dfd136d7aaf15aed50f598c66ddbf72ac1e242..00cf079c0c185b5d5904610ef0ca2d3929836e25 100644
--- a/dygraph/paddleseg/models/ocrnet.py
+++ b/dygraph/paddleseg/models/ocrnet.py
@@ -18,7 +18,7 @@ import paddle.fluid as fluid
 from paddle.fluid.dygraph import Sequential, Conv2D
 
 from paddleseg.cvlibs import manager
-from paddleseg.models.common.layer_utils import ConvBnRelu
+from paddleseg.models.common.layer_libs import ConvBnRelu
 from paddleseg import utils
 
 
diff --git a/dygraph/paddleseg/models/pspnet.py b/dygraph/paddleseg/models/pspnet.py
index 764749ce09f4618420d142d1955cf52d9aa5c258..69b831ebb3e29f979128e96a0bb1c7b5a45a37a3 100644
--- a/dygraph/paddleseg/models/pspnet.py
+++ b/dygraph/paddleseg/models/pspnet.py
@@ -17,7 +17,7 @@ import os
 import paddle.nn.functional as F
 from paddle import nn
 from paddleseg.cvlibs import manager
-from paddleseg.models.common import model_utils
+from paddleseg.models.common import layer_libs, pyramid_pool
 from paddleseg.utils import utils
 
 
@@ -70,7 +70,7 @@ class PSPNet(nn.Layer):
         self.backbone = backbone
         self.backbone_indices = backbone_indices
 
-        self.psp_module = model_utils.PPModule(
+        self.psp_module = pyramid_pool.PPModule(
             in_channels=backbone_channels[1],
             out_channels=pp_out_channels,
             bin_sizes=bin_sizes)
@@ -81,8 +81,11 @@ class PSPNet(nn.Layer):
             kernel_size=1)
 
         if enable_auxiliary_loss:
-            self.fcn_head = model_utils.FCNHead(
-                in_channels=backbone_channels[0], out_channels=num_classes)
+            
+            self.auxlayer = layer_libs.AuxLayer(
+                in_channels=backbone_channels[0], 
+                inter_channels=backbone_channels[0] // 4,
+                out_channels=num_classes)
 
         self.enable_auxiliary_loss = enable_auxiliary_loss
 
@@ -102,7 +105,7 @@ class PSPNet(nn.Layer):
 
         if self.enable_auxiliary_loss:
             auxiliary_feat = feat_list[self.backbone_indices[0]]
-            auxiliary_logit = self.fcn_head(auxiliary_feat)
+            auxiliary_logit = self.auxlayer(auxiliary_feat)
             auxiliary_logit = F.resize_bilinear(auxiliary_logit,
                                                 input.shape[2:])
             logit_list.append(auxiliary_logit)
diff --git a/dygraph/transforms/__init__.py b/dygraph/transforms/__init__.py
deleted file mode 100644
index 8f1d5ae80aeb1eb77ac672b1cbcfedcbfbd643c4..0000000000000000000000000000000000000000
--- a/dygraph/transforms/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .transforms import *
-from . import functional
diff --git a/dygraph/transforms/functional.py b/dygraph/transforms/functional.py
deleted file mode 100644
index 6d5a9b10db15edb05692c8aa4249912652e0a745..0000000000000000000000000000000000000000
--- a/dygraph/transforms/functional.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import cv2
-import numpy as np
-from PIL import Image, ImageEnhance
-
-
-def normalize(im, mean, std):
-    im = im.astype(np.float32, copy=False) / 255.0
-    im -= mean
-    im /= std
-    return im
-
-
-def permute(im):
-    im = np.transpose(im, (2, 0, 1))
-    return im
-
-
-def resize(im, target_size=608, interp=cv2.INTER_LINEAR):
-    if isinstance(target_size, list) or isinstance(target_size, tuple):
-        w = target_size[0]
-        h = target_size[1]
-    else:
-        w = target_size
-        h = target_size
-    im = cv2.resize(im, (w, h), interpolation=interp)
-    return im
-
-
-def resize_long(im, long_size=224, interpolation=cv2.INTER_LINEAR):
-    value = max(im.shape[0], im.shape[1])
-    scale = float(long_size) / float(value)
-    resized_width = int(round(im.shape[1] * scale))
-    resized_height = int(round(im.shape[0] * scale))
-
-    im = cv2.resize(
-        im, (resized_width, resized_height), interpolation=interpolation)
-    return im
-
-
-def horizontal_flip(im):
-    if len(im.shape) == 3:
-        im = im[:, ::-1, :]
-    elif len(im.shape) == 2:
-        im = im[:, ::-1]
-    return im
-
-
-def vertical_flip(im):
-    if len(im.shape) == 3:
-        im = im[::-1, :, :]
-    elif len(im.shape) == 2:
-        im = im[::-1, :]
-    return im
-
-
-def brightness(im, brightness_lower, brightness_upper):
-    brightness_delta = np.random.uniform(brightness_lower, brightness_upper)
-    im = ImageEnhance.Brightness(im).enhance(brightness_delta)
-    return im
-
-
-def contrast(im, contrast_lower, contrast_upper):
-    contrast_delta = np.random.uniform(contrast_lower, contrast_upper)
-    im = ImageEnhance.Contrast(im).enhance(contrast_delta)
-    return im
-
-
-def saturation(im, saturation_lower, saturation_upper):
-    saturation_delta = np.random.uniform(saturation_lower, saturation_upper)
-    im = ImageEnhance.Color(im).enhance(saturation_delta)
-    return im
-
-
-def hue(im, hue_lower, hue_upper):
-    hue_delta = np.random.uniform(hue_lower, hue_upper)
-    im = np.array(im.convert('HSV'))
-    im[:, :, 0] = im[:, :, 0] + hue_delta
-    im = Image.fromarray(im, mode='HSV').convert('RGB')
-    return im
-
-
-def rotate(im, rotate_lower, rotate_upper):
-    rotate_delta = np.random.uniform(rotate_lower, rotate_upper)
-    im = im.rotate(int(rotate_delta))
-    return im
diff --git a/dygraph/transforms/transforms.py b/dygraph/transforms/transforms.py
deleted file mode 100644
index 91404ade7d263c6df551ee8b15f74f9d1df96ae0..0000000000000000000000000000000000000000
--- a/dygraph/transforms/transforms.py
+++ /dev/null
@@ -1,576 +0,0 @@
-# coding: utf8
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-from collections import OrderedDict
-
-import numpy as np
-from PIL import Image
-import cv2
-
-from .functional import *
-from dygraph.cvlibs import manager
-
-
-@manager.TRANSFORMS.add_component
-class Compose:
-    def __init__(self, transforms, to_rgb=True):
-        if not isinstance(transforms, list):
-            raise TypeError('The transforms must be a list!')
-        if len(transforms) < 1:
-            raise ValueError('The length of transforms ' + \
-                            'must be equal or larger than 1!')
-        self.transforms = transforms
-        self.to_rgb = to_rgb
-
-    def __call__(self, im, im_info=None, label=None):
-        if im_info is None:
-            im_info = list()
-        if isinstance(im, str):
-            im = cv2.imread(im).astype('float32')
-        if isinstance(label, str):
-            label = np.asarray(Image.open(label))
-        if im is None:
-            raise ValueError('Can\'t read The image file {}!'.format(im))
-        if self.to_rgb:
-            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
-
-        for op in self.transforms:
-            outputs = op(im, im_info, label)
-            im = outputs[0]
-            if len(outputs) >= 2:
-                im_info = outputs[1]
-            if len(outputs) == 3:
-                label = outputs[2]
-        im = permute(im)
-        # if len(outputs) == 3:
-        #     label = label[np.newaxis, :, :]
-        return (im, im_info, label)
-
-
-@manager.TRANSFORMS.add_component
-class RandomHorizontalFlip:
-    def __init__(self, prob=0.5):
-        self.prob = prob
-
-    def __call__(self, im, im_info=None, label=None):
-        if random.random() < self.prob:
-            im = horizontal_flip(im)
-            if label is not None:
-                label = horizontal_flip(label)
-        if label is None:
-            return (im, im_info)
-        else:
-            return (im, im_info, label)
-
-
-@manager.TRANSFORMS.add_component
-class RandomVerticalFlip:
-    def __init__(self, prob=0.1):
-        self.prob = prob
-
-    def __call__(self, im, im_info=None, label=None):
-        if random.random() < self.prob:
-            im = vertical_flip(im)
-            if label is not None:
-                label = vertical_flip(label)
-        if label is None:
-            return (im, im_info)
-        else:
-            return (im, im_info, label)
-
-
-@manager.TRANSFORMS.add_component
-class Resize:
-    # The interpolation mode
-    interp_dict = {
-        'NEAREST': cv2.INTER_NEAREST,
-        'LINEAR': cv2.INTER_LINEAR,
-        'CUBIC': cv2.INTER_CUBIC,
-        'AREA': cv2.INTER_AREA,
-        'LANCZOS4': cv2.INTER_LANCZOS4
-    }
-
-    def __init__(self, target_size=512, interp='LINEAR'):
-        self.interp = interp
-        if not (interp == "RANDOM" or interp in self.interp_dict):
-            raise ValueError("interp should be one of {}".format(
-                self.interp_dict.keys()))
-        if isinstance(target_size, list) or isinstance(target_size, tuple):
-            if len(target_size) != 2:
-                raise TypeError(
-                    'when target is list or tuple, it should include 2 elements, but it is {}'
-                    .format(target_size))
-        elif not isinstance(target_size, int):
-            raise TypeError(
-                "Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
-                .format(type(target_size)))
-
-        self.target_size = target_size
-
-    def __call__(self, im, im_info=None, label=None):
-        if im_info is None:
-            im_info = list()
-        im_info.append(('resize', im.shape[:2]))
-        if not isinstance(im, np.ndarray):
-            raise TypeError("Resize: image type is not numpy.")
-        if len(im.shape) != 3:
-            raise ValueError('Resize: image is not 3-dimensional.')
-        if self.interp == "RANDOM":
-            interp = random.choice(list(self.interp_dict.keys()))
-        else:
-            interp = self.interp
-        im = resize(im, self.target_size, self.interp_dict[interp])
-        if label is not None:
-            label = resize(label, self.target_size, cv2.INTER_NEAREST)
-
-        if label is None:
-            return (im, im_info)
-        else:
-            return (im, im_info, label)
-
-
-@manager.TRANSFORMS.add_component
-class ResizeByLong:
-    def __init__(self, long_size):
-        self.long_size = long_size
-
-    def __call__(self, im, im_info=None, label=None):
-        if im_info is None:
-            im_info = list()
-
-        im_info.append(('resize', im.shape[:2]))
-        im = resize_long(im, self.long_size)
-        if label is not None:
-            label = resize_long(label, self.long_size, cv2.INTER_NEAREST)
-
-        if label is None:
-            return (im, im_info)
-        else:
-            return (im, im_info, label)
-
-
-@manager.TRANSFORMS.add_component
-class ResizeRangeScaling:
-    def __init__(self, min_value=400, max_value=600):
-        if min_value > max_value:
-            raise ValueError('min_value must be less than max_value, '
-                             'but they are {} and {}.'.format(
-                                 min_value, max_value))
-        self.min_value = min_value
-        self.max_value = max_value
-
-    def __call__(self, im, im_info=None, label=None):
-        if self.min_value == self.max_value:
-            random_size = self.max_value
-        else:
-            random_size = int(
-                np.random.uniform(self.min_value, self.max_value) + 0.5)
-        im = resize_long(im, random_size, cv2.INTER_LINEAR)
-        if label is not None:
-            label = resize_long(label, random_size, cv2.INTER_NEAREST)
-
-        if label is None:
-            return (im, im_info)
-        else:
-            return (im, im_info, label)
-
-
-@manager.TRANSFORMS.add_component
-class ResizeStepScaling:
-    def __init__(self,
-                 min_scale_factor=0.75,
-                 max_scale_factor=1.25,
-                 scale_step_size=0.25):
-        if min_scale_factor > max_scale_factor:
-            raise ValueError(
-                'min_scale_factor must be less than max_scale_factor, '
-                'but they are {} and {}.'.format(min_scale_factor,
-                                                 max_scale_factor))
-        self.min_scale_factor = min_scale_factor
-        self.max_scale_factor = max_scale_factor
-        self.scale_step_size = scale_step_size
-
-    def __call__(self, im, im_info=None, label=None):
-        if self.min_scale_factor == self.max_scale_factor:
-            scale_factor = self.min_scale_factor
-
-        elif self.scale_step_size == 0:
-            scale_factor = np.random.uniform(self.min_scale_factor,
-                                             self.max_scale_factor)
-
-        else:
-            num_steps = int((self.max_scale_factor - self.min_scale_factor) /
-                            self.scale_step_size + 1)
-            scale_factors = np.linspace(self.min_scale_factor,
-                                        self.max_scale_factor,
-                                        num_steps).tolist()
-            np.random.shuffle(scale_factors)
-            scale_factor = scale_factors[0]
-        w = int(round(scale_factor * im.shape[1]))
-        h = int(round(scale_factor * im.shape[0]))
-
-        im = resize(im, (w, h), cv2.INTER_LINEAR)
-        if label is not None:
-            label = resize(label, (w, h), cv2.INTER_NEAREST)
-
-        if label is None:
-            return (im, im_info)
-        else:
-            return (im, im_info, label)
-
-
-@manager.TRANSFORMS.add_component
-class Normalize:
-    def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]):
-        self.mean = mean
-        self.std = std
-        if not (isinstance(self.mean, list) and isinstance(self.std, list)):
-            raise ValueError("{}: input type is invalid.".format(self))
-        from functools import reduce
-        if reduce(lambda x, y: x * y, self.std) == 0:
-            raise ValueError('{}: std is invalid!'.format(self))
-
-    def __call__(self, im, im_info=None, label=None):
-        mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
-        std = np.array(self.std)[np.newaxis, np.newaxis, :]
-        im = normalize(im, mean, std)
-
-        if label is None:
-            return (im, im_info)
-        else:
-            return (im, im_info, label)
-
-
-@manager.TRANSFORMS.add_component
-class Padding:
-    def __init__(self,
-                 target_size,
-                 im_padding_value=[127.5, 127.5, 127.5],
-                 label_padding_value=255):
-        if isinstance(target_size, list) or isinstance(target_size, tuple):
-            if len(target_size) != 2:
-                raise ValueError(
-                    'when target is list or tuple, it should include 2 elements, but it is {}'
-                    .format(target_size))
-        elif not isinstance(target_size, int):
-            raise TypeError(
-                "Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
-                .format(type(target_size)))
-        self.target_size = target_size
-        self.im_padding_value = im_padding_value
-        self.label_padding_value = label_padding_value
-
-    def __call__(self, im, im_info=None, label=None):
-        if im_info is None:
-            im_info = list()
-        im_info.append(('padding', im.shape[:2]))
-
-        im_height, im_width = im.shape[0], im.shape[1]
-        if isinstance(self.target_size, int):
-            target_height = self.target_size
-            target_width = self.target_size
-        else:
-            target_height = self.target_size[1]
-            target_width = self.target_size[0]
-        pad_height = target_height - im_height
-        pad_width = target_width - im_width
-        if pad_height < 0 or pad_width < 0:
-            raise ValueError(
-                'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})'
-                .format(im_width, im_height, target_width, target_height))
-        else:
-            im = cv2.copyMakeBorder(
-                im,
-                0,
-                pad_height,
-                0,
-                pad_width,
-                cv2.BORDER_CONSTANT,
-                value=self.im_padding_value)
-            if label is not None:
-                label = cv2.copyMakeBorder(
-                    label,
-                    0,
-                    pad_height,
-                    0,
-                    pad_width,
-                    cv2.BORDER_CONSTANT,
-                    value=self.label_padding_value)
-        if label is None:
-            return (im, im_info)
-        else:
-            return (im, im_info, label)
-
-
-@manager.TRANSFORMS.add_component
-class RandomPaddingCrop:
-    def __init__(self,
-                 crop_size=512,
-                 im_padding_value=[127.5, 127.5, 127.5],
-                 label_padding_value=255):
-        if isinstance(crop_size, list) or isinstance(crop_size, tuple):
-            if len(crop_size) != 2:
-                raise ValueError(
-                    'when crop_size is list or tuple, it should include 2 elements, but it is {}'
-                    .format(crop_size))
-        elif not isinstance(crop_size, int):
-            raise TypeError(
-                "Type of crop_size is invalid. Must be Integer or List or tuple, now is {}"
-                .format(type(crop_size)))
-        self.crop_size = crop_size
-        self.im_padding_value = im_padding_value
-        self.label_padding_value = label_padding_value
-
-    def __call__(self, im, im_info=None, label=None):
-        if isinstance(self.crop_size, int):
-            crop_width = self.crop_size
-            crop_height = self.crop_size
-        else:
-            crop_width = self.crop_size[0]
-            crop_height = self.crop_size[1]
-
-        img_height = im.shape[0]
-        img_width = im.shape[1]
-
-        if img_height == crop_height and img_width == crop_width:
-            if label is None:
-                return (im, im_info)
-            else:
-                return (im, im_info, label)
-        else:
-            pad_height = max(crop_height - img_height, 0)
-            pad_width = max(crop_width - img_width, 0)
-            if (pad_height > 0 or pad_width > 0):
-                im = cv2.copyMakeBorder(
-                    im,
-                    0,
-                    pad_height,
-                    0,
-                    pad_width,
-                    cv2.BORDER_CONSTANT,
-                    value=self.im_padding_value)
-                if label is not None:
-                    label = cv2.copyMakeBorder(
-                        label,
-                        0,
-                        pad_height,
-                        0,
-                        pad_width,
-                        cv2.BORDER_CONSTANT,
-                        value=self.label_padding_value)
-                img_height = im.shape[0]
-                img_width = im.shape[1]
-
-            if crop_height > 0 and crop_width > 0:
-                h_off = np.random.randint(img_height - crop_height + 1)
-                w_off = np.random.randint(img_width - crop_width + 1)
-
-                im = im[h_off:(crop_height + h_off), w_off:(
-                    w_off + crop_width), :]
-                if label is not None:
-                    label = label[h_off:(crop_height + h_off), w_off:(
-                        w_off + crop_width)]
-        if label is None:
-            return (im, im_info)
-        else:
-            return (im, im_info, label)
-
-
-@manager.TRANSFORMS.add_component
-class RandomBlur:
-    def __init__(self, prob=0.1):
-        self.prob = prob
-
-    def __call__(self, im, im_info=None, label=None):
-        if self.prob <= 0:
-            n = 0
-        elif self.prob >= 1:
-            n = 1
-        else:
-            n = int(1.0 / self.prob)
-        if n > 0:
-            if np.random.randint(0, n) == 0:
-                radius = np.random.randint(3, 10)
-                if radius % 2 != 1:
-                    radius = radius + 1
-                if radius > 9:
-                    radius = 9
-                im = cv2.GaussianBlur(im, (radius, radius), 0, 0)
-
-        if label is None:
-            return (im, im_info)
-        else:
-            return (im, im_info, label)
-
-
-@manager.TRANSFORMS.add_component
-class RandomRotation:
-    def __init__(self,
-                 max_rotation=15,
-                 im_padding_value=[127.5, 127.5, 127.5],
-                 label_padding_value=255):
-        self.max_rotation = max_rotation
-        self.im_padding_value = im_padding_value
-        self.label_padding_value = label_padding_value
-
-    def __call__(self, im, im_info=None, label=None):
-        if self.max_rotation > 0:
-            (h, w) = im.shape[:2]
-            do_rotation = np.random.uniform(-self.max_rotation,
-                                            self.max_rotation)
-            pc = (w // 2, h // 2)
-            r = cv2.getRotationMatrix2D(pc, do_rotation, 1.0)
-            cos = np.abs(r[0, 0])
-            sin = np.abs(r[0, 1])
-
-            nw = int((h * sin) + (w * cos))
-            nh = int((h * cos) + (w * sin))
-
-            (cx, cy) = pc
-            r[0, 2] += (nw / 2) - cx
-            r[1, 2] += (nh / 2) - cy
-            dsize = (nw, nh)
-            im = cv2.warpAffine(
-                im,
-                r,
-                dsize=dsize,
-                flags=cv2.INTER_LINEAR,
-                borderMode=cv2.BORDER_CONSTANT,
-                borderValue=self.im_padding_value)
-            label = cv2.warpAffine(
-                label,
-                r,
-                dsize=dsize,
-                flags=cv2.INTER_NEAREST,
-                borderMode=cv2.BORDER_CONSTANT,
-                borderValue=self.label_padding_value)
-
-        if label is None:
-            return (im, im_info)
-        else:
-            return (im, im_info, label)
-
-
-@manager.TRANSFORMS.add_component
-class RandomScaleAspect:
-    def __init__(self, min_scale=0.5, aspect_ratio=0.33):
-        self.min_scale = min_scale
-        self.aspect_ratio = aspect_ratio
-
-    def __call__(self, im, im_info=None, label=None):
-        if self.min_scale != 0 and self.aspect_ratio != 0:
-            img_height = im.shape[0]
-            img_width = im.shape[1]
-            for i in range(0, 10):
-                area = img_height * img_width
-                target_area = area * np.random.uniform(self.min_scale, 1.0)
-                aspectRatio = np.random.uniform(self.aspect_ratio,
-                                                1.0 / self.aspect_ratio)
-
-                dw = int(np.sqrt(target_area * 1.0 * aspectRatio))
-                dh = int(np.sqrt(target_area * 1.0 / aspectRatio))
-                if (np.random.randint(10) < 5):
-                    tmp = dw
-                    dw = dh
-                    dh = tmp
-
-                if (dh < img_height and dw < img_width):
-                    h1 = np.random.randint(0, img_height - dh)
-                    w1 = np.random.randint(0, img_width - dw)
-
-                    im = im[h1:(h1 + dh), w1:(w1 + dw), :]
-                    label = label[h1:(h1 + dh), w1:(w1 + dw)]
-                    im = cv2.resize(
-                        im, (img_width, img_height),
-                        interpolation=cv2.INTER_LINEAR)
-                    label = cv2.resize(
-                        label, (img_width, img_height),
-                        interpolation=cv2.INTER_NEAREST)
-                    break
-        if label is None:
-            return (im, im_info)
-        else:
-            return (im, im_info, label)
-
-
-@manager.TRANSFORMS.add_component
-class RandomDistort:
-    def __init__(self,
-                 brightness_range=0.5,
-                 brightness_prob=0.5,
-                 contrast_range=0.5,
-                 contrast_prob=0.5,
-                 saturation_range=0.5,
-                 saturation_prob=0.5,
-                 hue_range=18,
-                 hue_prob=0.5):
-        self.brightness_range = brightness_range
-        self.brightness_prob = brightness_prob
-        self.contrast_range = contrast_range
-        self.contrast_prob = contrast_prob
-        self.saturation_range = saturation_range
-        self.saturation_prob = saturation_prob
-        self.hue_range = hue_range
-        self.hue_prob = hue_prob
-
-    def __call__(self, im, im_info=None, label=None):
-        brightness_lower = 1 - self.brightness_range
-        brightness_upper = 1 + self.brightness_range
-        contrast_lower = 1 - self.contrast_range
-        contrast_upper = 1 + self.contrast_range
-        saturation_lower = 1 - self.saturation_range
-        saturation_upper = 1 + self.saturation_range
-        hue_lower = -self.hue_range
-        hue_upper = self.hue_range
-        ops = [brightness, contrast, saturation, hue]
-        random.shuffle(ops)
-        params_dict = {
-            'brightness': {
-                'brightness_lower': brightness_lower,
-                'brightness_upper': brightness_upper
-            },
-            'contrast': {
-                'contrast_lower': contrast_lower,
-                'contrast_upper': contrast_upper
-            },
-            'saturation': {
-                'saturation_lower': saturation_lower,
-                'saturation_upper': saturation_upper
-            },
-            'hue': {
-                'hue_lower': hue_lower,
-                'hue_upper': hue_upper
-            }
-        }
-        prob_dict = {
-            'brightness': self.brightness_prob,
-            'contrast': self.contrast_prob,
-            'saturation': self.saturation_prob,
-            'hue': self.hue_prob
-        }
-        im = im.astype('uint8')
-        im = Image.fromarray(im)
-        for id in range(4):
-            params = params_dict[ops[id].__name__]
-            prob = prob_dict[ops[id].__name__]
-            params['im'] = im
-            if np.random.uniform(0, 1) < prob:
-                im = ops[id](**params)
-        im = np.asarray(im).astype('float32')
-        if label is None:
-            return (im, im_info)
-        else:
-            return (im, im_info, label)
diff --git a/dygraph/utils/__init__.py b/dygraph/utils/__init__.py
deleted file mode 100644
index a22f9e5ec0ff32a5e42b6c2d7d6bed14a56994a1..0000000000000000000000000000000000000000
--- a/dygraph/utils/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from . import logger
-from . import download
-from .metrics import ConfusionMatrix
-from .utils import *
-from .timer import Timer, calculate_eta
-from .get_environ_info import get_environ_info
-from .config import Config
diff --git a/dygraph/utils/config.py b/dygraph/utils/config.py
deleted file mode 100644
index e0577a6e0eaaa5353c680bfded30094cacd969ab..0000000000000000000000000000000000000000
--- a/dygraph/utils/config.py
+++ /dev/null
@@ -1,241 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import codecs
-import os
-from typing import Any, Callable
-
-import yaml
-import paddle.fluid as fluid
-
-import dygraph.cvlibs.manager as manager
-
-
-class Config(object):
-    '''
-    Training config.
-
-    Args:
-        path(str) : the path of config file, supports yaml format only
-    '''
-
-    def __init__(self, path: str):
-        if not os.path.exists(path):
-            raise FileNotFoundError('File {} does not exist'.format(path))
-
-        if path.endswith('yml') or path.endswith('yaml'):
-            dic = self._parse_from_yaml(path)
-            print(dic)
-            self._build(dic)
-        else:
-            raise RuntimeError('Config file should in yaml format!')
-
-    def _update_dic(self, dic, base_dic):
-        """
-        update config from dic based base_dic
-        """
-        base_dic = base_dic.copy()
-        for key, val in dic.items():
-            if isinstance(val, dict) and key in base_dic:
-                base_dic[key] = self._update_dic(val, base_dic[key])
-            else:
-                base_dic[key] = val
-        dic = base_dic
-        return dic
-
-    def _parse_from_yaml(self, path: str):
-        '''Parse a yaml file and build config'''
-        with codecs.open(path, 'r', 'utf-8') as file:
-            dic = yaml.load(file, Loader=yaml.FullLoader)
-        if '_base_' in dic:
-            cfg_dir = os.path.dirname(path)
-            base_path = dic.pop('_base_')
-            base_path = os.path.join(cfg_dir, base_path)
-            base_dic = self._parse_from_yaml(base_path)
-            dic = self._update_dic(dic, base_dic)
-        return dic
-
-    def _build(self, dic: dict):
-        '''Build config from dictionary'''
-        dic = dic.copy()
-
-        self._batch_size = dic.get('batch_size', 1)
-        self._iters = dic.get('iters')
-
-        if 'model' not in dic:
-            raise RuntimeError()
-        self._model_cfg = dic['model']
-        self._model = None
-
-        self._train_dataset = dic.get('train_dataset')
-        self._val_dataset = dic.get('val_dataset')
-
-        self._learning_rate_cfg = dic.get('learning_rate', {})
-        self._learning_rate = self._learning_rate_cfg.get('value')
-        self._decay = self._learning_rate_cfg.get('decay', {
-            'type': 'poly',
-            'power': 0.9
-        })
-
-        self._loss_cfg = dic.get('loss', {})
-        self._losses = None
-
-        self._optimizer_cfg = dic.get('optimizer', {})
-
-    def update(self,
-               learning_rate: float = None,
-               batch_size: int = None,
-               iters: int = None):
-        '''Update config'''
-        if learning_rate:
-            self._learning_rate = learning_rate
-
-        if batch_size:
-            self._batch_size = batch_size
-
-        if iters:
-            self._iters = iters
-
-    @property
-    def batch_size(self) -> int:
-        return self._batch_size
-
-    @property
-    def iters(self) -> int:
-        if not self._iters:
-            raise RuntimeError('No iters specified in the configuration file.')
-        return self._iters
-
-    @property
-    def learning_rate(self) -> float:
-        if not self._learning_rate:
-            raise RuntimeError(
-                'No learning rate specified in the configuration file.')
-
-        if self.decay_type == 'poly':
-            lr = self._learning_rate
-            args = self.decay_args
-            args.setdefault('decay_steps', self.iters)
-            return fluid.layers.polynomial_decay(lr, **args)
-        else:
-            raise RuntimeError('Only poly decay support.')
-
-    @property
-    def optimizer(self) -> fluid.optimizer.Optimizer:
-        if self.optimizer_type == 'sgd':
-            lr = self.learning_rate
-            args = self.optimizer_args
-            args.setdefault('momentum', 0.9)
-            return fluid.optimizer.Momentum(
-                lr, parameter_list=self.model.parameters(), **args)
-        else:
-            raise RuntimeError('Only sgd optimizer support.')
-
-    @property
-    def optimizer_type(self) -> str:
-        otype = self._optimizer_cfg.get('type')
-        if not otype:
-            raise RuntimeError(
-                'No optimizer type specified in the configuration file.')
-        return otype
-
-    @property
-    def optimizer_args(self) -> dict:
-        args = self._optimizer_cfg.copy()
-        args.pop('type')
-        return args
-
-    @property
-    def decay_type(self) -> str:
-        return self._decay['type']
-
-    @property
-    def decay_args(self) -> dict:
-        args = self._decay.copy()
-        args.pop('type')
-        return args
-
-    @property
-    def loss(self) -> list:
-        if not self._losses:
-            args = self._loss_cfg.copy()
-            self._losses = dict()
-            for key, val in args.items():
-                if key == 'types':
-                    self._losses['types'] = []
-                    for item in args['types']:
-                        self._losses['types'].append(self._load_object(item))
-                else:
-                    self._losses[key] = val
-            if len(self._losses['coef']) != len(self._losses['types']):
-                raise RuntimeError(
-                    'The length of coef should equal to types in loss config: {} != {}.'
-                    .format(
-                        len(self._losses['coef']), len(self._losses['types'])))
-        return self._losses
-
-    @property
-    def model(self) -> Callable:
-        if not self._model:
-            self._model = self._load_object(self._model_cfg)
-        return self._model
-
-    @property
-    def train_dataset(self) -> Any:
-        if not self._train_dataset:
-            return None
-        return self._load_object(self._train_dataset)
-
-    @property
-    def val_dataset(self) -> Any:
-        if not self._val_dataset:
-            return None
-        return self._load_object(self._val_dataset)
-
-    def _load_component(self, com_name: str) -> Any:
-        com_list = [
-            manager.MODELS, manager.BACKBONES, manager.DATASETS,
-            manager.TRANSFORMS, manager.LOSSES
-        ]
-
-        for com in com_list:
-            if com_name in com.components_dict:
-                return com[com_name]
-        else:
-            raise RuntimeError(
-                'The specified component was not found {}.'.format(com_name))
-
-    def _load_object(self, cfg: dict) -> Any:
-        cfg = cfg.copy()
-        if 'type' not in cfg:
-            raise RuntimeError('No object information in {}.'.format(cfg))
-
-        component = self._load_component(cfg.pop('type'))
-
-        params = {}
-        for key, val in cfg.items():
-            if self._is_meta_type(val):
-                params[key] = self._load_object(val)
-            elif isinstance(val, list):
-                params[key] = [
-                    self._load_object(item)
-                    if self._is_meta_type(item) else item for item in val
-                ]
-            else:
-                params[key] = val
-
-        return component(**params)
-
-    def _is_meta_type(self, item: Any) -> bool:
-        return isinstance(item, dict) and 'type' in item
diff --git a/dygraph/utils/download.py b/dygraph/utils/download.py
deleted file mode 100644
index 7bf6dd096a4b33587b47bed127673d8fe09aefbb..0000000000000000000000000000000000000000
--- a/dygraph/utils/download.py
+++ /dev/null
@@ -1,135 +0,0 @@
-import os
-import sys
-import time
-import requests
-import tarfile
-import zipfile
-import shutil
-import functools
-
-lasttime = time.time()
-FLUSH_INTERVAL = 0.1
-
-
-def progress(str, end=False):
-    global lasttime
-    if end:
-        str += "\n"
-        lasttime = 0
-    if time.time() - lasttime >= FLUSH_INTERVAL:
-        sys.stdout.write("\r%s" % str)
-        lasttime = time.time()
-        sys.stdout.flush()
-
-
-def _download_file(url, savepath, print_progress):
-    r = requests.get(url, stream=True)
-    total_length = r.headers.get('content-length')
-
-    if total_length is None:
-        with open(savepath, 'wb') as f:
-            shutil.copyfileobj(r.raw, f)
-    else:
-        with open(savepath, 'wb') as f:
-            dl = 0
-            total_length = int(total_length)
-            starttime = time.time()
-            if print_progress:
-                print("Downloading %s" % os.path.basename(savepath))
-            for data in r.iter_content(chunk_size=4096):
-                dl += len(data)
-                f.write(data)
-                if print_progress:
-                    done = int(50 * dl / total_length)
-                    progress("[%-50s] %.2f%%" %
-                             ('=' * done, float(100 * dl) / total_length))
-        if print_progress:
-            progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True)
-
-
-def _uncompress_file_zip(filepath, extrapath):
-    files = zipfile.ZipFile(filepath, 'r')
-    filelist = files.namelist()
-    rootpath = filelist[0]
-    total_num = len(filelist)
-    for index, file in enumerate(filelist):
-        files.extract(file, extrapath)
-        yield total_num, index, rootpath
-    files.close()
-    yield total_num, index, rootpath
-
-
-def _uncompress_file_tar(filepath, extrapath, mode="r:gz"):
-    files = tarfile.open(filepath, mode)
-    filelist = files.getnames()
-    total_num = len(filelist)
-    rootpath = filelist[0]
-    for index, file in enumerate(filelist):
-        files.extract(file, extrapath)
-        yield total_num, index, rootpath
-    files.close()
-    yield total_num, index, rootpath
-
-
-def _uncompress_file(filepath, extrapath, delete_file, print_progress):
-    if print_progress:
-        print("Uncompress %s" % os.path.basename(filepath))
-
-    if filepath.endswith("zip"):
-        handler = _uncompress_file_zip
-    elif filepath.endswith("tgz"):
-        handler = _uncompress_file_tar
-    else:
-        handler = functools.partial(_uncompress_file_tar, mode="r")
-
-    for total_num, index, rootpath in handler(filepath, extrapath):
-        if print_progress:
-            done = int(50 * float(index) / total_num)
-            progress(
-                "[%-50s] %.2f%%" % ('=' * done, float(100 * index) / total_num))
-    if print_progress:
-        progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True)
-
-    if delete_file:
-        os.remove(filepath)
-
-    return rootpath
-
-
-def download_file_and_uncompress(url,
-                                 savepath=None,
-                                 extrapath=None,
-                                 extraname=None,
-                                 print_progress=True,
-                                 cover=False,
-                                 delete_file=True):
-    if savepath is None:
-        savepath = "."
-
-    if extrapath is None:
-        extrapath = "."
-
-    savename = url.split("/")[-1]
-    savepath = os.path.join(savepath, savename)
-    savename = ".".join(savename.split(".")[:-1])
-    savename = os.path.join(extrapath, savename)
-    extraname = savename if extraname is None else os.path.join(
-        extrapath, extraname)
-
-    if cover:
-        if os.path.exists(savepath):
-            shutil.rmtree(savepath)
-        if os.path.exists(savename):
-            shutil.rmtree(savename)
-        if os.path.exists(extraname):
-            shutil.rmtree(extraname)
-
-    if not os.path.exists(extraname):
-        if not os.path.exists(savename):
-            if not os.path.exists(savepath):
-                _download_file(url, savepath, print_progress)
-            savename = _uncompress_file(savepath, extrapath, delete_file,
-                                        print_progress)
-            savename = os.path.join(extrapath, savename)
-        shutil.move(savename, extraname)
-    return extraname
diff --git a/dygraph/utils/get_environ_info.py b/dygraph/utils/get_environ_info.py
deleted file mode 100644
index 7d789f4d60e875fd11514fa13b901885be7b0024..0000000000000000000000000000000000000000
--- a/dygraph/utils/get_environ_info.py
+++ /dev/null
@@ -1,118 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-from collections import OrderedDict
-import subprocess
-import glob
-
-import paddle
-import paddle.fluid as fluid
-import cv2
-
-IS_WINDOWS = sys.platform == 'win32'
-
-
-def _find_cuda_home():
-    '''Finds the CUDA install path. It refers to the implementation of
-    pytorch <https://github.com/pytorch/pytorch/blob/master/torch/utils/cpp_extension.py>.
-    '''
-    # Guess #1
-    cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
-    if cuda_home is None:
-        # Guess #2
-        try:
-            which = 'where' if IS_WINDOWS else 'which'
-            nvcc = subprocess.check_output([which,
-                                            'nvcc']).decode().rstrip('\r\n')
-            cuda_home = os.path.dirname(os.path.dirname(nvcc))
-        except Exception:
-            # Guess #3
-            if IS_WINDOWS:
-                cuda_homes = glob.glob(
-                    'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*')
-                if len(cuda_homes) == 0:
-                    cuda_home = ''
-                else:
-                    cuda_home = cuda_homes[0]
-            else:
-                cuda_home = '/usr/local/cuda'
-            if not os.path.exists(cuda_home):
-                cuda_home = None
-    return cuda_home
-
-
-def _get_nvcc_info(cuda_home):
-    if cuda_home is not None and os.path.isdir(cuda_home):
-        try:
-            nvcc = os.path.join(cuda_home, 'bin/nvcc')
-            nvcc = subprocess.check_output(
-                "{} -V".format(nvcc), shell=True).decode()
-            nvcc = nvcc.strip().split('\n')[-1]
-        except subprocess.SubprocessError:
-            nvcc = "Not Available"
-    return nvcc
-
-
-def _get_gpu_info():
-    try:
-        gpu_info = subprocess.check_output(['nvidia-smi',
-                                            '-L']).decode().strip()
-        gpu_info = gpu_info.split('\n')
-        for i in range(len(gpu_info)):
-            gpu_info[i] = ' '.join(gpu_info[i].split(' ')[:4])
-    except:
-        gpu_info = ' Can not get GPU information. Please make sure CUDA have been installed successfully.'
-    return gpu_info
-
-
-def get_environ_info():
-    """collect environment information"""
-    env_info = {}
-    env_info['System Platform'] = sys.platform
-    if env_info['System Platform'] == 'linux':
-        try:
-            lsb_v = subprocess.check_output(['lsb_release',
-                                             '-v']).decode().strip()
-            lsb_v = lsb_v.replace('\t', ' ')
-            lsb_d = subprocess.check_output(['lsb_release',
-                                             '-d']).decode().strip()
-            lsb_d = lsb_d.replace('\t', ' ')
-            env_info['LSB'] = [lsb_v, lsb_d]
-        except:
-            pass
-
-    env_info['Python'] = sys.version.replace('\n', '')
-
-    compiled_with_cuda = paddle.fluid.is_compiled_with_cuda()
-    env_info['Paddle compiled with cuda'] = compiled_with_cuda
-
-    if compiled_with_cuda:
-        cuda_home = _find_cuda_home()
-        env_info['NVCC'] = _get_nvcc_info(cuda_home)
-        gpu_nums = fluid.core.get_cuda_device_count()
-        env_info['GPUs used'] = gpu_nums
-        env_info['CUDA_VISIBLE_DEVICES'] = os.environ.get(
-            'CUDA_VISIBLE_DEVICES')
-        env_info['GPU'] = _get_gpu_info()
-
-    gcc = subprocess.check_output(['gcc', '--version']).decode()
-    gcc = gcc.strip().split('\n')[0]
-    env_info['GCC'] = gcc
-
-    env_info['PaddlePaddle'] = paddle.__version__
-    env_info['OpenCV'] = cv2.__version__
-
-    return env_info
diff --git a/dygraph/utils/logger.py b/dygraph/utils/logger.py
deleted file mode 100644
index 015948f65090e40895f6d4a72a75a11f2b155447..0000000000000000000000000000000000000000
--- a/dygraph/utils/logger.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import time
-import os
-import sys
-
-from paddle.fluid.dygraph.parallel import ParallelEnv
-
-levels = {0: 'ERROR', 1: 'WARNING', 2: 'INFO', 3: 'DEBUG'}
-log_level = 2
-
-
-def log(level=2, message=""):
-    if ParallelEnv().local_rank == 0:
-        current_time = time.time()
-        time_array = time.localtime(current_time)
-        current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array)
-        if log_level >= level:
-            print(
-                "{} [{}]\t{}".format(current_time, levels[level],
-                                     message).encode("utf-8").decode("latin1"))
-            sys.stdout.flush()
-
-
-def debug(message=""):
-    log(level=3, message=message)
-
-
-def info(message=""):
-    log(level=2, message=message)
-
-
-def warning(message=""):
-    log(level=1, message=message)
-
-
-def error(message=""):
-    log(level=0, message=message)
diff --git a/dygraph/utils/metrics.py b/dygraph/utils/metrics.py
deleted file mode 100644
index b107cbd57a936fb909086567fc8b703fb86963b7..0000000000000000000000000000000000000000
--- a/dygraph/utils/metrics.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-import numpy as np
-from scipy.sparse import csr_matrix
-
-
-class ConfusionMatrix(object):
-    """
-        Confusion Matrix for segmentation evaluation
-    """
-
-    def __init__(self, num_classes=2, streaming=False):
-        self.confusion_matrix = np.zeros([num_classes, num_classes],
-                                         dtype='int64')
-        self.num_classes = num_classes
-        self.streaming = streaming
-
-    def calculate(self, pred, label, ignore=None):
-        # If not in streaming mode, clear matrix everytime when call `calculate`
-        if not self.streaming:
-            self.zero_matrix()
-
-        label = np.transpose(label, (0, 2, 3, 1))
-        ignore = np.transpose(ignore, (0, 2, 3, 1))
-        mask = np.array(ignore) == 1
-
-        label = np.asarray(label)[mask]
-        pred = np.asarray(pred)[mask]
-        one = np.ones_like(pred)
-        # Accumuate ([row=label, col=pred], 1) into sparse matrix
-        spm = csr_matrix((one, (label, pred)),
-                         shape=(self.num_classes, self.num_classes))
-        spm = spm.todense()
-        self.confusion_matrix += spm
-
-    def zero_matrix(self):
-        """ Clear confusion matrix """
-        self.confusion_matrix = np.zeros([self.num_classes, self.num_classes],
-                                         dtype='int64')
-
-    def mean_iou(self):
-        iou_list = []
-        avg_iou = 0
-        # TODO: use numpy sum axis api to simpliy
-        vji = np.zeros(self.num_classes, dtype=int)
-        vij = np.zeros(self.num_classes, dtype=int)
-        for j in range(self.num_classes):
-            v_j = 0
-            for i in range(self.num_classes):
-                v_j += self.confusion_matrix[j][i]
-            vji[j] = v_j
-
-        for i in range(self.num_classes):
-            v_i = 0
-            for j in range(self.num_classes):
-                v_i += self.confusion_matrix[j][i]
-            vij[i] = v_i
-
-        for c in range(self.num_classes):
-            total = vji[c] + vij[c] - self.confusion_matrix[c][c]
-            if total == 0:
-                iou = 0
-            else:
-                iou = float(self.confusion_matrix[c][c]) / total
-            avg_iou += iou
-            iou_list.append(iou)
-        avg_iou = float(avg_iou) / float(self.num_classes)
-        return np.array(iou_list), avg_iou
-
-    def accuracy(self):
-        total = self.confusion_matrix.sum()
-        total_right = 0
-        for c in range(self.num_classes):
-            total_right += self.confusion_matrix[c][c]
-        if total == 0:
-            avg_acc = 0
-        else:
-            avg_acc = float(total_right) / total
-
-        vij = np.zeros(self.num_classes, dtype=int)
-        for i in range(self.num_classes):
-            v_i = 0
-            for j in range(self.num_classes):
-                v_i += self.confusion_matrix[j][i]
-            vij[i] = v_i
-
-        acc_list = []
-        for c in range(self.num_classes):
-            if vij[c] == 0:
-                acc = 0
-            else:
-                acc = self.confusion_matrix[c][c] / float(vij[c])
-            acc_list.append(acc)
-        return np.array(acc_list), avg_acc
-
-    def kappa(self):
-        vji = np.zeros(self.num_classes)
-        vij = np.zeros(self.num_classes)
-        for j in range(self.num_classes):
-            v_j = 0
-            for i in range(self.num_classes):
-                v_j += self.confusion_matrix[j][i]
-            vji[j] = v_j
-
-        for i in range(self.num_classes):
-            v_i = 0
-            for j in range(self.num_classes):
-                v_i += self.confusion_matrix[j][i]
-            vij[i] = v_i
-
-        total = self.confusion_matrix.sum()
-
-        # avoid spillovers
-        # TODO: is it reasonable to hard code 10000.0?
-        total = float(total) / 10000.0
-        vji = vji / 10000.0
-        vij = vij / 10000.0
-
-        tp = 0
-        tc = 0
-        for c in range(self.num_classes):
-            tp += vji[c] * vij[c]
-            tc += self.confusion_matrix[c][c]
-
-        tc = tc / 10000.0
-        pe = tp / (total * total)
-        po = tc / total
-
-        kappa = (po - pe) / (1 - pe)
-        return kappa
diff --git a/dygraph/utils/timer.py b/dygraph/utils/timer.py
deleted file mode 100644
index 4ebbddc9a154de4a36d6b6d9b437e14382031c49..0000000000000000000000000000000000000000
--- a/dygraph/utils/timer.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import time
-
-
-class Timer(object):
-    """ Simple timer class for measuring time consuming """
-
-    def __init__(self):
-        self._start_time = 0.0
-        self._end_time = 0.0
-        self._elapsed_time = 0.0
-        self._is_running = False
-
-    def start(self):
-        self._is_running = True
-        self._start_time = time.time()
-
-    def restart(self):
-        self.start()
-
-    def stop(self):
-        self._is_running = False
-        self._end_time = time.time()
-
-    def elapsed_time(self):
-        self._end_time = time.time()
-        self._elapsed_time = self._end_time - self._start_time
-        if not self.is_running:
-            return 0.0
-
-        return self._elapsed_time
-
-    @property
-    def is_running(self):
-        return self._is_running
-
-
-def calculate_eta(remaining_step, speed):
-    if remaining_step < 0:
-        remaining_step = 0
-    remaining_time = int(remaining_step * speed)
-    result = "{:0>2}:{:0>2}:{:0>2}"
-    arr = []
-    for i in range(2, -1, -1):
-        arr.append(int(remaining_time / 60**i))
-        remaining_time %= 60**i
-    return result.format(*arr)
diff --git a/dygraph/utils/utils.py b/dygraph/utils/utils.py
deleted file mode 100644
index 0b7d87169a76a196926e7f9e2017ebd42a5605ad..0000000000000000000000000000000000000000
--- a/dygraph/utils/utils.py
+++ /dev/null
@@ -1,141 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import numpy as np
-import math
-import cv2
-import paddle.fluid as fluid
-
-from . import logger
-
-
-def seconds_to_hms(seconds):
-    h = math.floor(seconds / 3600)
-    m = math.floor((seconds - h * 3600) / 60)
-    s = int(seconds - h * 3600 - m * 60)
-    hms_str = "{}:{}:{}".format(h, m, s)
-    return hms_str
-
-
-def load_pretrained_model(model, pretrained_model):
-    if pretrained_model is not None:
-        logger.info('Load pretrained model from {}'.format(pretrained_model))
-        if os.path.exists(pretrained_model):
-            ckpt_path = os.path.join(pretrained_model, 'model')
-            try:
-                para_state_dict, _ = fluid.load_dygraph(ckpt_path)
-            except:
-                para_state_dict = fluid.load_program_state(pretrained_model)
-
-            model_state_dict = model.state_dict()
-            keys = model_state_dict.keys()
-            num_params_loaded = 0
-            for k in keys:
-                if k not in para_state_dict:
-                    logger.warning("{} is not in pretrained model".format(k))
-                elif list(para_state_dict[k].shape) != list(
-                        model_state_dict[k].shape):
-                    logger.warning(
-                        "[SKIP] Shape of pretrained params {} doesn't match.(Pretrained: {}, Actual: {})"
-                        .format(k, para_state_dict[k].shape,
-                                model_state_dict[k].shape))
-                else:
-                    model_state_dict[k] = para_state_dict[k]
-                    num_params_loaded += 1
-            model.set_dict(model_state_dict)
-            logger.info("There are {}/{} varaibles are loaded.".format(
-                num_params_loaded, len(model_state_dict)))
-
-        else:
-            raise ValueError(
-                'The pretrained model directory is not Found: {}'.format(
-                    pretrained_model))
-    else:
-        logger.warning('No pretrained model to load, train from scratch')
-
-
-def resume(model, optimizer, resume_model):
-    if resume_model is not None:
-        logger.info('Resume model from {}'.format(resume_model))
-        if os.path.exists(resume_model):
-            resume_model = os.path.normpath(resume_model)
-            ckpt_path = os.path.join(resume_model, 'model')
-            para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path)
-            model.set_dict(para_state_dict)
-            optimizer.set_dict(opti_state_dict)
-            epoch = resume_model.split('_')[-1]
-            if epoch.isdigit():
-                epoch = int(epoch)
-            return epoch
-        else:
-            raise ValueError(
-                'The resume model directory is not Found: {}'.format(
-                    resume_model))
-    else:
-        logger.info('No model need to resume')
-
-
-def visualize(image, result, save_dir=None, weight=0.6):
-    """
-    Convert segment result to color image, and save added image.
-    Args:
-        image: the path of origin image
-        result: the predict result of image
-        save_dir: the directory for saving visual image
-        weight: the image weight of visual image, and the result weight is (1 - weight)
-    """
-    color_map = get_color_map_list(256)
-    color_map = np.array(color_map).astype("uint8")
-    # Use OpenCV LUT for color mapping
-    c1 = cv2.LUT(result, color_map[:, 0])
-    c2 = cv2.LUT(result, color_map[:, 1])
-    c3 = cv2.LUT(result, color_map[:, 2])
-    pseudo_img = np.dstack((c1, c2, c3))
-
-    im = cv2.imread(image)
-    vis_result = cv2.addWeighted(im, weight, pseudo_img, 1 - weight, 0)
-
-    if save_dir is not None:
-        if not os.path.exists(save_dir):
-            os.makedirs(save_dir)
-        image_name = os.path.split(image)[-1]
-        out_path = os.path.join(save_dir, image_name)
-        cv2.imwrite(out_path, vis_result)
-    else:
-        return vis_result
-
-
-def get_color_map_list(num_classes):
-    """ Returns the color map for visualizing the segmentation mask,
-        which can support arbitrary number of classes.
-    Args:
-        num_classes: Number of classes
-    Returns:
-        The color map
-    """
-    num_classes += 1
-    color_map = num_classes * [0, 0, 0]
-    for i in range(0, num_classes):
-        j = 0
-        lab = i
-        while lab:
-            color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
-            color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
-            color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
-            j += 1
-            lab >>= 3
-    color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)]
-    color_map = color_map[1:]
-    return color_map