diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh
index c5be5bc142f23b5f6fdfb8062f3be8e5e764e364..bd92727545647836c02931130a1ad528889ae2bb 100755
--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -787,6 +787,15 @@ set +x
                         multiple_card_tests="$multiple_card_tests|^$testcase$"
                     fi
                 else
+                    if [[ "${#single_card_tests}" -gt 3000 ]];then
+                        if [[ "$single_card_tests_1" == "" ]]; then 
+                            single_card_tests_1="^$testcase$"
+                        else
+                            single_card_tests_1="$single_card_tests_1|^$testcase$"
+                        fi
+                        continue
+                    fi
+
                     if [[ "$single_card_tests" == "" ]]; then
                         single_card_tests="^$testcase$"
                     else
@@ -800,6 +809,7 @@ set +x
         done <<< "$test_cases";
 
         card_test "$single_card_tests" 1    # run cases with single GPU
+        card_test "$single_card_tests_1" 1    # run cases with single GPU
         card_test "$multiple_card_tests" 2  # run cases with two GPUs
         card_test "$exclusive_tests"        # run cases exclusively, in this cases would be run with 4/8 GPUs
         if [[ "$EXIT_CODE" != "0" ]]; then
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 77a752800498de77b35336577914a5415a654aa8..59dfc5c9d0311342fc72d8400a3abddd3f6d778b 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -96,6 +96,7 @@ if (WITH_TESTING)
   add_subdirectory(paddle/fluid/tests)
   add_subdirectory(paddle/fluid/contrib/tests)
   add_subdirectory(paddle/fluid/contrib/slim/tests)
+  add_subdirectory(paddle/incubate/hapi/tests)
 endif()
 install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR}
     DESTINATION opt/paddle/share/wheels
diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py
index ad4a6d64d10adb4deb58f8e0c67c56b4f96afadf..fd6544f9dfbeede98285ef400ddd5c061ce9eb3a 100644
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
@@ -202,6 +202,9 @@ from .tensor.stat import var  #DEFINE_ALIAS
 # from .tensor.tensor import Tensor        #DEFINE_ALIAS
 # from .tensor.tensor import LoDTensor        #DEFINE_ALIAS
 # from .tensor.tensor import LoDTensorArray        #DEFINE_ALIAS
+
+from . import incubate
+from .incubate import hapi
 from .fluid.dygraph.base import enable_dygraph  #DEFINE_ALIAS
 from .fluid.dygraph.base import disable_dygraph  #DEFINE_ALIAS
 from .fluid.framework import in_dygraph_mode  #DEFINE_ALIAS
diff --git a/python/paddle/incubate/__init__.py b/python/paddle/incubate/__init__.py
index 76e0e91197f15f94d14ae3c20094f6813f725c38..e6888ebc8f441e137d2483795817d6f08719c075 100644
--- a/python/paddle/incubate/__init__.py
+++ b/python/paddle/incubate/__init__.py
@@ -11,3 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+from . import hapi
+
+__all__ = []
+__all__ += hapi.__all__
diff --git a/python/paddle/incubate/hapi/__init__.py b/python/paddle/incubate/hapi/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b2321976edff0e2c13c614406fcaaa9976cc797
--- /dev/null
+++ b/python/paddle/incubate/hapi/__init__.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import logger
+from . import progressbar
+from . import callbacks
+from . import download
+from . import model
+from . import metrics
+from . import loss
+from . import datasets
+from . import distributed
+from . import vision
+
+logger.setup_logger()
+
+__all__ = [
+    'callbacks',
+    'datasets',
+    'distributed',
+    'download',
+    'metrics',
+    'loss',
+    'vision',
+]
+
+__all__ += model.__all__
diff --git a/python/paddle/incubate/hapi/callbacks.py b/python/paddle/incubate/hapi/callbacks.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b3c41584151c252c65b6cf95f9738b82c78731e
--- /dev/null
+++ b/python/paddle/incubate/hapi/callbacks.py
@@ -0,0 +1,484 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.fluid.dygraph.parallel import ParallelEnv
+
+from .progressbar import ProgressBar
+
+__all__ = ['Callback', 'ProgBarLogger', 'ModelCheckpoint']
+
+
+def config_callbacks(callbacks=None,
+                     model=None,
+                     batch_size=None,
+                     epochs=None,
+                     steps=None,
+                     log_freq=2,
+                     verbose=2,
+                     save_freq=1,
+                     save_dir=None,
+                     metrics=None,
+                     mode='train'):
+    cbks = callbacks or []
+    cbks = cbks if isinstance(cbks, (list, tuple)) else [cbks]
+    if not any(isinstance(k, ProgBarLogger) for k in cbks) and verbose:
+        cbks = [ProgBarLogger(log_freq, verbose=verbose)] + cbks
+
+    if not any(isinstance(k, ModelCheckpoint) for k in cbks):
+        cbks = cbks + [ModelCheckpoint(save_freq, save_dir)]
+
+    cbk_list = CallbackList(cbks)
+    cbk_list.set_model(model)
+    metrics = metrics or [] if mode != 'test' else []
+    params = {
+        'batch_size': batch_size,
+        'epochs': epochs,
+        'steps': steps,
+        'verbose': verbose,
+        'metrics': metrics,
+    }
+    cbk_list.set_params(params)
+    return cbk_list
+
+
+class CallbackList(object):
+    def __init__(self, callbacks=None):
+        # copy
+        self.callbacks = [c for c in callbacks]
+        self.params = {}
+        self.model = None
+
+    def append(self, callback):
+        self.callbacks.append(callback)
+
+    def __iter__(self):
+        return iter(self.callbacks)
+
+    def set_params(self, params):
+        for c in self.callbacks:
+            c.set_params(params)
+
+    def set_model(self, model):
+        for c in self.callbacks:
+            c.set_model(model)
+
+    def _call(self, name, *args):
+        for c in self.callbacks:
+            func = getattr(c, name)
+            func(*args)
+
+    def _check_mode(self, mode):
+        assert mode in ['train', 'eval', 'test'], \
+            'mode should be train, eval or test'
+
+    def on_begin(self, mode, logs=None):
+        self._check_mode(mode)
+        name = 'on_{}_begin'.format(mode)
+        self._call(name, logs)
+
+    def on_end(self, mode, logs=None):
+        self._check_mode(mode)
+        name = 'on_{}_end'.format(mode)
+        self._call(name, logs)
+
+    def on_epoch_begin(self, epoch=None, logs=None):
+        self._call('on_epoch_begin', epoch, logs)
+
+    def on_epoch_end(self, epoch=None, logs=None):
+        self._call('on_epoch_end', epoch, logs)
+
+    def on_batch_begin(self, mode, step=None, logs=None):
+        self._check_mode(mode)
+        name = 'on_{}_batch_begin'.format(mode)
+        self._call(name, step, logs)
+
+    def on_batch_end(self, mode, step=None, logs=None):
+        self._check_mode(mode)
+        name = 'on_{}_batch_end'.format(mode)
+        self._call(name, step, logs)
+
+
+class Callback(object):
+    """
+    Base class used to build new callbacks.
+
+    Examples:
+
+        .. code-block:: python
+            
+            from paddle.incubate.hapi.callbacks import Callback
+
+            # build a simple model checkpoint callback
+            class ModelCheckpoint(Callback):
+                def __init__(self, save_freq=1, save_dir=None):
+                    self.save_freq = save_freq
+                    self.save_dir = save_dir
+
+                def on_epoch_end(self, epoch, logs=None):
+                    if self.model is not None and epoch % self.save_freq == 0:
+                        path = '{}/{}'.format(self.save_dir, epoch)
+                        print('save checkpoint at {}'.format(path))
+                        self.model.save(path)
+
+    """
+
+    def __init__(self):
+        self.model = None
+        self.params = {}
+
+    def set_params(self, params):
+        """
+        Set parameters, which is dict. The keys contain:
+
+          - 'batch_size': an integer. Number of samples per batch.
+          - 'epochs': an integer. Number of epochs.
+          - 'steps': an integer. Number of steps of one epoch.
+          - 'verbose': an integer. Verbose mode is 0, 1 or 2.
+             0 = silent, 1 = progress bar, 2 = one line per epoch.
+          - 'metrics': a list of str. Names of metrics, including 'loss'
+              and the names of hapi.Metric.
+        """
+        self.params = params
+
+    def set_model(self, model):
+        """model is instance of hapi.Model.
+        """
+        self.model = model
+
+    def on_train_begin(self, logs=None):
+        """Called at the start of training.
+
+        Args:
+            logs (dict): The logs is a dict or None.
+        """
+
+    def on_train_end(self, logs=None):
+        """Called at the end of training.
+
+        Args:
+            logs (dict): The logs is a dict or None. The keys of logs
+                passed by hapi.Model contains 'loss', metric names and
+                `batch_size`.
+        """
+
+    def on_eval_begin(self, logs=None):
+        """Called at the start of evaluation.
+
+        Args:
+            logs (dict): The logs is a dict or None. The keys of logs
+                passed by hapi.Model contains 'steps' and 'metrics',
+                The `steps` is number of total steps of validation dataset.
+                The `metrics` is a list of str including 'loss' and the names
+                of hapi.Metric.
+        """
+
+    def on_eval_end(self, logs=None):
+        """Called at the end of evaluation.
+
+        Args:
+            logs (dict): The logs is a dict or None. The `logs` passed by
+                hapi.Model is a dict contains 'loss', metrics and 'batch_size'
+                of last batch of validation dataset.
+        """
+
+    def on_test_begin(self, logs=None):
+        """Called at the beginning of predict.
+
+        Args:
+            logs (dict): The logs is a dict or None.
+        """
+
+    def on_test_end(self, logs=None):
+        """Called at the end of predict.
+
+        Args:
+            logs (dict): The logs is a dict or None.
+        """
+
+    def on_epoch_begin(self, epoch, logs=None):
+        """Called at the beginning of each epoch.
+
+        Args:
+            epoch (int): The index of epoch.
+            logs (dict): The logs is a dict or None. The `logs` passed by
+                hapi.Model is None.
+        """
+
+    def on_epoch_end(self, epoch, logs=None):
+        """Called at the end of each epoch.
+
+        Args:
+            epoch (int): The index of epoch.
+            logs (dict): The logs is a dict or None. The `logs` passed by
+                hapi.Model is a dict, contains 'loss', metrics and 'batch_size'
+                of last batch.
+        """
+
+    def on_train_batch_begin(self, step, logs=None):
+        """Called at the beginning of each batch in training.
+
+        Args:
+            step (int): The index of step (or iteration).
+            logs (dict): The logs is a dict or None. The `logs` passed by
+                hapi.Model is empty.
+        """
+
+    def on_train_batch_end(self, step, logs=None):
+        """Called at the end of each batch in training.
+
+        Args:
+            step (int): The index of step (or iteration).
+            logs (dict): The logs is a dict or None. The `logs` passed by
+                hapi.Model is a dict, contains 'loss', metrics and 'batch_size'
+                of current batch.
+        """
+
+    def on_eval_batch_begin(self, step, logs=None):
+        """Called at the beginning of each batch in evaluation.
+
+        Args:
+            step (int): The index of step (or iteration).
+            logs (dict): The logs is a dict or None. The `logs` passed by
+                hapi.Model is empty.
+        """
+
+    def on_eval_batch_end(self, step, logs=None):
+        """Called at the end of each batch in evaluation.
+
+        Args:
+            step (int): The index of step (or iteration).
+            logs (dict): The logs is a dict or None. The `logs` passed by
+                hapi.Model is a dict, contains 'loss', metrics and 'batch_size'
+                of current batch.
+        """
+
+    def on_test_batch_begin(self, step, logs=None):
+        """Called at the beginning of each batch in predict.
+
+        Args:
+            step (int): The index of step (or iteration).
+            logs (dict): The logs is a dict or None.
+        """
+
+    def on_test_batch_end(self, step, logs=None):
+        """Called at the end of each batch in predict.
+
+        Args:
+            step (int): The index of step (or iteration).
+            logs (dict): The logs is a dict or None.
+        """
+
+
+class ProgBarLogger(Callback):
+    """Logger callback function
+    Args:
+        log_freq (int): The frequency, in number of steps, the logs such as `loss`, 
+                `metrics` are printed. Default: 1.
+        verbose (int): The verbosity mode, should be 0, 1, or 2.
+                0 = silent, 1 = progress bar, 2 = one line per epoch. Default: 2.
+
+    Examples:
+        .. code-block:: python
+
+            import numpy as np
+            from paddle import fluid
+            from paddle.incubate.hapi.metrics import Accuracy
+            from paddle.incubate.hapi.loss import CrossEntropy
+            from paddle.incubate.hapi.datasets import MNIST
+            from paddle.incubate.hapi.vision.models import LeNet
+            from paddle.incubate.hapi.callbacks import ProgBarLogger
+            from paddle.incubate.hapi.model import Input, set_device
+
+            inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
+            labels = [Input([None, 1], 'int64', name='label')]
+
+            train_dataset = MNIST(mode='train')
+
+            model = LeNet()
+
+            optim = fluid.optimizer.Adam(0.001)
+            model.prepare(optimizer=optim, 
+                        loss_function=CrossEntropy(), 
+                        metrics=Accuracy(), 
+                        inputs=inputs, 
+                        labels=labels)
+
+            callback = ProgBarLogger(log_freq=10)
+            model.fit(train_dataset, batch_size=64, callbacks=callback)
+    """
+
+    def __init__(self, log_freq=1, verbose=2):
+        self.epochs = None
+        self.steps = None
+        self.progbar = None
+        self.verbose = verbose
+        self.log_freq = log_freq
+
+    def _is_print(self):
+        return self.verbose and ParallelEnv().local_rank == 0
+
+    def on_train_begin(self, logs=None):
+        self.epochs = self.params['epochs']
+        assert self.epochs
+        self.train_metrics = self.params['metrics']
+        assert self.train_metrics
+
+    def on_epoch_begin(self, epoch=None, logs=None):
+        self.steps = self.params['steps']
+        self.epoch = epoch
+        self.train_step = 0
+        if self.epochs and self._is_print():
+            print('Epoch %d/%d' % (epoch + 1, self.epochs))
+        self.train_progbar = ProgressBar(num=self.steps, verbose=self.verbose)
+
+    def _updates(self, logs, mode):
+        values = []
+        metrics = getattr(self, '%s_metrics' % (mode))
+        progbar = getattr(self, '%s_progbar' % (mode))
+        steps = getattr(self, '%s_step' % (mode))
+
+        for k in metrics:
+            if k in logs:
+                values.append((k, logs[k]))
+
+        progbar.update(steps, values)
+
+    def on_train_batch_end(self, step, logs=None):
+        logs = logs or {}
+        self.train_step += 1
+
+        if self._is_print() and self.train_step % self.log_freq == 0:
+            if self.steps is None or self.train_step < self.steps:
+                self._updates(logs, 'train')
+
+    def on_epoch_end(self, epoch, logs=None):
+        logs = logs or {}
+        if self._is_print() and (self.steps is not None):
+            self._updates(logs, 'train')
+
+    def on_eval_begin(self, logs=None):
+        self.eval_steps = logs.get('steps', None)
+        self.eval_metrics = logs.get('metrics', [])
+        self.eval_step = 0
+        self.evaled_samples = 0
+
+        self.eval_progbar = ProgressBar(
+            num=self.eval_steps, verbose=self.verbose)
+        if self._is_print():
+            print('Eval begin...')
+
+    def on_eval_batch_end(self, step, logs=None):
+        logs = logs or {}
+        self.eval_step += 1
+        samples = logs.get('batch_size', 1)
+        self.evaled_samples += samples
+
+        if self._is_print() and self.eval_step % self.log_freq == 0:
+            if self.eval_steps is None or self.eval_step < self.eval_steps:
+                self._updates(logs, 'eval')
+
+    def on_test_begin(self, logs=None):
+        self.test_steps = logs.get('steps', None)
+        self.test_metrics = logs.get('metrics', [])
+        self.test_step = 0
+        self.tested_samples = 0
+        self.test_progbar = ProgressBar(
+            num=self.test_steps, verbose=self.verbose)
+        if self._is_print():
+            print('Predict begin...')
+
+    def on_test_batch_end(self, step, logs=None):
+        logs = logs or {}
+        self.test_step += 1
+        samples = logs.get('batch_size', 1)
+        self.tested_samples += samples
+
+        if self.test_step % self.log_freq == 0 and self._is_print():
+            if self.test_steps is None or self.test_step < self.test_steps:
+                self._updates(logs, 'test')
+
+    def on_eval_end(self, logs=None):
+        logs = logs or {}
+        if self._is_print() and (self.eval_steps is not None):
+            self._updates(logs, 'eval')
+            print('Eval samples: %d' % (self.evaled_samples))
+
+    def on_test_end(self, logs=None):
+        logs = logs or {}
+        if self._is_print():
+            if self.test_step % self.log_freq != 0 or self.verbose == 1:
+                self._updates(logs, 'test')
+            print('Predict samples: %d' % (self.tested_samples))
+
+
+class ModelCheckpoint(Callback):
+    """Model checkpoint callback function
+    Args:
+        save_freq(int): The frequency, in number of epochs, the model checkpoint 
+                        are saved. Default: 1.
+        save_dir(str|None): The directory to save checkpoint during training.
+                If None, will not save checkpoint. Default: None.
+
+    Examples:
+        .. code-block:: python
+
+            import numpy as np
+            from paddle import fluid
+            from paddle.incubate.hapi.metrics import Accuracy
+            from paddle.incubate.hapi.loss import CrossEntropy
+            from paddle.incubate.hapi.datasets import MNIST
+            
+            from paddle.incubate.hapi.vision.models import LeNet
+            from paddle.incubate.hapi.callbacks import ModelCheckpoint
+            from paddle.incubate.hapi.model import Input, set_device
+
+            inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
+            labels = [Input([None, 1], 'int64', name='label')]
+
+            train_dataset = MNIST(mode='train')
+
+            model = LeNet()
+
+            optim = fluid.optimizer.Adam(0.001)
+            model.prepare(optimizer=optim, 
+                        loss_function=CrossEntropy(), 
+                        metrics=Accuracy(), 
+                        inputs=inputs, 
+                        labels=labels)
+
+            callback = ModelCheckpoint(save_dir='./temp')
+            model.fit(train_dataset, batch_size=64, callbacks=callback)
+    """
+
+    def __init__(self, save_freq=1, save_dir=None):
+        self.save_freq = save_freq
+        self.save_dir = save_dir
+
+    def on_epoch_begin(self, epoch=None, logs=None):
+        self.epoch = epoch
+
+    def _is_save(self):
+        return self.model and self.save_dir and ParallelEnv().local_rank == 0
+
+    def on_epoch_end(self, epoch, logs=None):
+        if self._is_save() and self.epoch % self.save_freq == 0:
+            path = '{}/{}'.format(self.save_dir, epoch)
+            print('save checkpoint at {}'.format(path))
+            self.model.save(path)
+
+    def on_train_end(self, logs=None):
+        if self._is_save():
+            path = '{}/final'.format(self.save_dir)
+            print('save checkpoint at {}'.format(path))
+            self.model.save(path)
diff --git a/python/paddle/incubate/hapi/datasets/__init__.py b/python/paddle/incubate/hapi/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc5df6401992def4bc37329794e534a832924da3
--- /dev/null
+++ b/python/paddle/incubate/hapi/datasets/__init__.py
@@ -0,0 +1,25 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import folder
+from . import mnist
+from . import flowers
+
+from .folder import *
+from .mnist import *
+from .flowers import *
+
+__all__ = folder.__all__ \
+        + mnist.__all__ \
+        + flowers.__all__
diff --git a/python/paddle/incubate/hapi/datasets/flowers.py b/python/paddle/incubate/hapi/datasets/flowers.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f56cc82c1cba800002d82cc8a2bd5ddae619f9e
--- /dev/null
+++ b/python/paddle/incubate/hapi/datasets/flowers.py
@@ -0,0 +1,129 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import os
+import io
+import tarfile
+import numpy as np
+import scipy.io as scio
+from PIL import Image
+
+from paddle.io import Dataset
+from .utils import _check_exists_and_download
+
+__all__ = ["Flowers"]
+
+DATA_URL = 'http://paddlemodels.bj.bcebos.com/flowers/102flowers.tgz'
+LABEL_URL = 'http://paddlemodels.bj.bcebos.com/flowers/imagelabels.mat'
+SETID_URL = 'http://paddlemodels.bj.bcebos.com/flowers/setid.mat'
+DATA_MD5 = '52808999861908f626f3c1f4e79d11fa'
+LABEL_MD5 = 'e0620be6f572b9609742df49c70aed4d'
+SETID_MD5 = 'a5357ecc9cb78c4bef273ce3793fc85c'
+
+# In official 'readme', tstid is the flag of test data
+# and trnid is the flag of train data. But test data is more than train data.
+# So we exchange the train data and test data.
+MODE_FLAG_MAP = {'train': 'tstid', 'test': 'trnid', 'valid': "valid"}
+
+
+class Flowers(Dataset):
+    """
+    Implement of flowers dataset
+
+    Args:
+        data_file(str): path to data file, can be set None if
+            :attr:`download` is True. Default None
+        label_file(str): path to label file, can be set None if
+            :attr:`download` is True. Default None
+        setid_file(str): path to subset index file, can be set
+            None if :attr:`download` is True. Default None
+        mode(str): 'train', 'valid' or 'test' mode. Default 'train'.
+        download(bool): whether auto download mnist dataset if
+            :attr:`image_path`/:attr:`label_path` unset. Default
+            True
+
+    Examples:
+        
+        .. code-block:: python
+
+            from paddle.incubate.hapi.datasets import Flowers
+
+            flowers = Flowers(mode='test')
+
+            for i in range(len(flowers)):
+                sample = flowers[i]
+                print(sample[0].shape, sample[1])
+
+    """
+
+    def __init__(self,
+                 data_file=None,
+                 label_file=None,
+                 setid_file=None,
+                 mode='train',
+                 transform=None,
+                 download=True):
+        assert mode.lower() in ['train', 'valid', 'test'], \
+                "mode should be 'train', 'valid' or 'test', but got {}".format(mode)
+        self.flag = MODE_FLAG_MAP[mode.lower()]
+
+        self.data_file = data_file
+        if self.data_file is None:
+            assert download, "data_file not set and auto download disabled"
+            self.data_file = _check_exists_and_download(
+                data_file, DATA_URL, DATA_MD5, 'flowers', download)
+
+        self.label_file = label_file
+        if self.label_file is None:
+            assert download, "label_file not set and auto download disabled"
+            self.label_file = _check_exists_and_download(
+                label_file, LABEL_URL, LABEL_MD5, 'flowers', download)
+
+        self.setid_file = setid_file
+        if self.setid_file is None:
+            assert download, "setid_file not set and auto download disabled"
+            self.setid_file = _check_exists_and_download(
+                setid_file, SETID_URL, SETID_MD5, 'flowers', download)
+
+        self.transform = transform
+
+        # read dataset into memory
+        self._load_anno()
+
+    def _load_anno(self):
+        self.name2mem = {}
+        self.data_tar = tarfile.open(self.data_file)
+        for ele in self.data_tar.getmembers():
+            self.name2mem[ele.name] = ele
+
+        self.labels = scio.loadmat(self.label_file)['labels'][0]
+        self.indexes = scio.loadmat(self.setid_file)[self.flag][0]
+
+    def __getitem__(self, idx):
+        index = self.indexes[idx]
+        label = np.array([self.labels[index - 1]])
+        img_name = "jpg/image_%05d.jpg" % index
+        img_ele = self.name2mem[img_name]
+        image = self.data_tar.extractfile(img_ele).read()
+        image = np.array(Image.open(io.BytesIO(image)))
+
+        if self.transform is not None:
+            image = self.transform(image)
+
+        return image, label.astype('int64')
+
+    def __len__(self):
+        return len(self.indexes)
diff --git a/python/paddle/incubate/hapi/datasets/folder.py b/python/paddle/incubate/hapi/datasets/folder.py
new file mode 100644
index 0000000000000000000000000000000000000000..358e7681eb8e64364600732f0399e6b97f0d64e0
--- /dev/null
+++ b/python/paddle/incubate/hapi/datasets/folder.py
@@ -0,0 +1,299 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import cv2
+
+from paddle.io import Dataset
+
+__all__ = ["DatasetFolder", "ImageFolder"]
+
+
+def has_valid_extension(filename, extensions):
+    """Checks if a file is a vilid extension.
+
+    Args:
+        filename (str): path to a file
+        extensions (tuple of str): extensions to consider (lowercase)
+
+    Returns:
+        bool: True if the filename ends with one of given extensions
+    """
+    return filename.lower().endswith(extensions)
+
+
+def make_dataset(dir, class_to_idx, extensions, is_valid_file=None):
+    images = []
+    dir = os.path.expanduser(dir)
+
+    if extensions is not None:
+
+        def is_valid_file(x):
+            return has_valid_extension(x, extensions)
+
+    for target in sorted(class_to_idx.keys()):
+        d = os.path.join(dir, target)
+        if not os.path.isdir(d):
+            continue
+        for root, _, fnames in sorted(os.walk(d, followlinks=True)):
+            for fname in sorted(fnames):
+                path = os.path.join(root, fname)
+                if is_valid_file(path):
+                    item = (path, class_to_idx[target])
+                    images.append(item)
+
+    return images
+
+
+class DatasetFolder(Dataset):
+    """A generic data loader where the samples are arranged in this way:
+
+        root/class_a/1.ext
+        root/class_a/2.ext
+        root/class_a/3.ext
+
+        root/class_b/123.ext
+        root/class_b/456.ext
+        root/class_b/789.ext
+
+    Args:
+        root (string): Root directory path.
+        loader (callable|optional): A function to load a sample given its path.
+        extensions (tuple[str]|optional): A list of allowed extensions.
+            both extensions and is_valid_file should not be passed.
+        transform (callable|optional): A function/transform that takes in
+            a sample and returns a transformed version.
+        is_valid_file (callable|optional): A function that takes path of a file
+            and check if the file is a valid file (used to check of corrupt files)
+            both extensions and is_valid_file should not be passed.
+
+     Attributes:
+        classes (list): List of the class names.
+        class_to_idx (dict): Dict with items (class_name, class_index).
+        samples (list): List of (sample path, class_index) tuples
+        targets (list): The class_index value for each image in the dataset
+
+    Example:
+
+        .. code-block:: python
+
+            import os
+            import cv2
+            import tempfile
+            import shutil
+            import numpy as np
+            from paddle.incubate.hapi.datasets import DatasetFolder
+
+            def make_fake_dir():
+                data_dir = tempfile.mkdtemp()
+
+                for i in range(2):
+                    sub_dir = os.path.join(data_dir, 'class_' + str(i))
+                    if not os.path.exists(sub_dir):
+                        os.makedirs(sub_dir)
+                    for j in range(2):
+                        fake_img = (np.random.random((32, 32, 3)) * 255).astype('uint8')
+                        cv2.imwrite(os.path.join(sub_dir, str(j) + '.jpg'), fake_img)
+                return data_dir
+
+            temp_dir = make_fake_dir()
+            data_folder = DatasetFolder(temp_dir)
+
+            for items in data_folder:
+                break
+                
+            shutil.rmtree(temp_dir)
+    """
+
+    def __init__(self,
+                 root,
+                 loader=None,
+                 extensions=None,
+                 transform=None,
+                 is_valid_file=None):
+        self.root = root
+        self.transform = transform
+        if extensions is None:
+            extensions = IMG_EXTENSIONS
+        classes, class_to_idx = self._find_classes(self.root)
+        samples = make_dataset(self.root, class_to_idx, extensions,
+                               is_valid_file)
+        if len(samples) == 0:
+            raise (RuntimeError(
+                "Found 0 files in subfolders of: " + self.root + "\n"
+                "Supported extensions are: " + ",".join(extensions)))
+
+        self.loader = cv2_loader if loader is None else loader
+        self.extensions = extensions
+
+        self.classes = classes
+        self.class_to_idx = class_to_idx
+        self.samples = samples
+        self.targets = [s[1] for s in samples]
+
+    def _find_classes(self, dir):
+        """
+        Finds the class folders in a dataset.
+
+        Args:
+            dir (string): Root directory path.
+
+        Returns:
+            tuple: (classes, class_to_idx) where classes are relative to (dir), 
+                    and class_to_idx is a dictionary.
+
+        """
+        if sys.version_info >= (3, 5):
+            # Faster and available in Python 3.5 and above
+            classes = [d.name for d in os.scandir(dir) if d.is_dir()]
+        else:
+            classes = [
+                d for d in os.listdir(dir)
+                if os.path.isdir(os.path.join(dir, d))
+            ]
+        classes.sort()
+        class_to_idx = {classes[i]: i for i in range(len(classes))}
+        return classes, class_to_idx
+
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+
+        Returns:
+            tuple: (sample, target) where target is class_index of the target class.
+        """
+        path, target = self.samples[index]
+        sample = self.loader(path)
+        if self.transform is not None:
+            sample = self.transform(sample)
+
+        return sample, target
+
+    def __len__(self):
+        return len(self.samples)
+
+
+IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif',
+                  '.tiff', '.webp')
+
+
+def cv2_loader(path):
+    return cv2.imread(path)
+
+
+class ImageFolder(Dataset):
+    """A generic data loader where the samples are arranged in this way:
+
+        root/1.ext
+        root/2.ext
+        root/sub_dir/3.ext
+
+    Args:
+        root (string): Root directory path.
+        loader (callable, optional): A function to load a sample given its path.
+        extensions (tuple[string], optional): A list of allowed extensions.
+            both extensions and is_valid_file should not be passed.
+        transform (callable, optional): A function/transform that takes in
+            a sample and returns a transformed version.
+        is_valid_file (callable, optional): A function that takes path of a file
+            and check if the file is a valid file (used to check of corrupt files)
+            both extensions and is_valid_file should not be passed.
+
+     Attributes:
+        samples (list): List of sample path
+
+    Example:
+
+        .. code-block:: python
+
+            import os
+            import cv2
+            import tempfile
+            import shutil
+            import numpy as np
+            from paddle.incubate.hapi.datasets import ImageFolder
+
+            def make_fake_dir():
+                data_dir = tempfile.mkdtemp()
+
+                for i in range(2):
+                    sub_dir = os.path.join(data_dir, 'class_' + str(i))
+                    if not os.path.exists(sub_dir):
+                        os.makedirs(sub_dir)
+                    for j in range(2):
+                        fake_img = (np.random.random((32, 32, 3)) * 255).astype('uint8')
+                        cv2.imwrite(os.path.join(sub_dir, str(j) + '.jpg'), fake_img)
+                return data_dir
+
+            temp_dir = make_fake_dir()
+            data_folder = ImageFolder(temp_dir)
+
+            for items in data_folder:
+                break
+                
+            shutil.rmtree(temp_dir)
+     """
+
+    def __init__(self,
+                 root,
+                 loader=None,
+                 extensions=None,
+                 transform=None,
+                 is_valid_file=None):
+        self.root = root
+        if extensions is None:
+            extensions = IMG_EXTENSIONS
+
+        samples = []
+        path = os.path.expanduser(root)
+
+        if extensions is not None:
+
+            def is_valid_file(x):
+                return has_valid_extension(x, extensions)
+
+        for root, _, fnames in sorted(os.walk(path, followlinks=True)):
+            for fname in sorted(fnames):
+                f = os.path.join(root, fname)
+                if is_valid_file(f):
+                    samples.append(f)
+
+        if len(samples) == 0:
+            raise (RuntimeError(
+                "Found 0 files in subfolders of: " + self.root + "\n"
+                "Supported extensions are: " + ",".join(extensions)))
+
+        self.loader = cv2_loader if loader is None else loader
+        self.extensions = extensions
+        self.samples = samples
+        self.transform = transform
+
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+
+        Returns:
+            tuple: (sample, target) where target is class_index of the target class.
+        """
+        path = self.samples[index]
+        sample = self.loader(path)
+        if self.transform is not None:
+            sample = self.transform(sample)
+        return [sample]
+
+    def __len__(self):
+        return len(self.samples)
diff --git a/python/paddle/incubate/hapi/datasets/mnist.py b/python/paddle/incubate/hapi/datasets/mnist.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd48ca1c9668b40ac0379bfeda11a5c056f9fd44
--- /dev/null
+++ b/python/paddle/incubate/hapi/datasets/mnist.py
@@ -0,0 +1,162 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import os
+import gzip
+import struct
+import numpy as np
+
+import paddle.dataset.common
+from paddle.io import Dataset
+from .utils import _check_exists_and_download
+
+__all__ = ["MNIST"]
+
+URL_PREFIX = 'https://dataset.bj.bcebos.com/mnist/'
+TEST_IMAGE_URL = URL_PREFIX + 't10k-images-idx3-ubyte.gz'
+TEST_IMAGE_MD5 = '9fb629c4189551a2d022fa330f9573f3'
+TEST_LABEL_URL = URL_PREFIX + 't10k-labels-idx1-ubyte.gz'
+TEST_LABEL_MD5 = 'ec29112dd5afa0611ce80d1b7f02629c'
+TRAIN_IMAGE_URL = URL_PREFIX + 'train-images-idx3-ubyte.gz'
+TRAIN_IMAGE_MD5 = 'f68b3c2dcbeaaa9fbdd348bbdeb94873'
+TRAIN_LABEL_URL = URL_PREFIX + 'train-labels-idx1-ubyte.gz'
+TRAIN_LABEL_MD5 = 'd53e105ee54ea40749a09fcbcd1e9432'
+
+
+class MNIST(Dataset):
+    """
+    Implement of MNIST dataset
+
+    Args:
+        image_path(str): path to image file, can be set None if
+            :attr:`download` is True. Default None
+        label_path(str): path to label file, can be set None if
+            :attr:`download` is True. Default None
+        chw_format(bool): If set True, the output shape is [1, 28, 28],
+            otherwise, output shape is [1, 784]. Default True.
+        mode(str): 'train' or 'test' mode. Default 'train'.
+        download(bool): whether auto download mnist dataset if
+            :attr:`image_path`/:attr:`label_path` unset. Default
+            True
+
+    Returns:
+        Dataset: MNIST Dataset.
+
+    Examples:
+        
+        .. code-block:: python
+
+            from paddle.incubate.hapi.datasets import MNIST
+
+            mnist = MNIST(mode='test')
+
+            for i in range(len(mnist)):
+                sample = mnist[i]
+                print(sample[0].shape, sample[1])
+
+    """
+
+    def __init__(self,
+                 image_path=None,
+                 label_path=None,
+                 chw_format=True,
+                 mode='train',
+                 transform=None,
+                 download=True):
+        assert mode.lower() in ['train', 'test'], \
+                "mode should be 'train' or 'test', but got {}".format(mode)
+        self.mode = mode.lower()
+        self.chw_format = chw_format
+        self.image_path = image_path
+        if self.image_path is None:
+            assert download, "image_path not set and auto download disabled"
+            image_url = TRAIN_IMAGE_URL if mode == 'train' else TEST_IMAGE_URL
+            image_md5 = TRAIN_IMAGE_MD5 if mode == 'train' else TEST_IMAGE_MD5
+            self.image_path = _check_exists_and_download(
+                image_path, image_url, image_md5, 'mnist', download)
+
+        self.label_path = label_path
+        if self.label_path is None:
+            assert download, "label_path not set and auto download disabled"
+            label_url = TRAIN_LABEL_URL if mode == 'train' else TEST_LABEL_URL
+            label_md5 = TRAIN_LABEL_MD5 if mode == 'train' else TEST_LABEL_MD5
+            self.label_path = _check_exists_and_download(
+                label_path, label_url, label_md5, 'mnist', download)
+
+        self.transform = transform
+
+        # read dataset into memory
+        self._parse_dataset()
+
+    def _parse_dataset(self, buffer_size=100):
+        self.images = []
+        self.labels = []
+        with gzip.GzipFile(self.image_path, 'rb') as image_file:
+            img_buf = image_file.read()
+            with gzip.GzipFile(self.label_path, 'rb') as label_file:
+                lab_buf = label_file.read()
+
+                step_label = 0
+                offset_img = 0
+                # read from Big-endian
+                # get file info from magic byte
+                # image file : 16B
+                magic_byte_img = '>IIII'
+                magic_img, image_num, rows, cols = struct.unpack_from(
+                    magic_byte_img, img_buf, offset_img)
+                offset_img += struct.calcsize(magic_byte_img)
+
+                offset_lab = 0
+                # label file : 8B
+                magic_byte_lab = '>II'
+                magic_lab, label_num = struct.unpack_from(magic_byte_lab,
+                                                          lab_buf, offset_lab)
+                offset_lab += struct.calcsize(magic_byte_lab)
+
+                while True:
+                    if step_label >= label_num:
+                        break
+                    fmt_label = '>' + str(buffer_size) + 'B'
+                    labels = struct.unpack_from(fmt_label, lab_buf, offset_lab)
+                    offset_lab += struct.calcsize(fmt_label)
+                    step_label += buffer_size
+
+                    fmt_images = '>' + str(buffer_size * rows * cols) + 'B'
+                    images_temp = struct.unpack_from(fmt_images, img_buf,
+                                                     offset_img)
+                    images = np.reshape(images_temp, (buffer_size, rows *
+                                                      cols)).astype('float32')
+                    offset_img += struct.calcsize(fmt_images)
+
+                    images = images / 255.0
+                    images = images * 2.0
+                    images = images - 1.0
+
+                    for i in range(buffer_size):
+                        self.images.append(images[i, :])
+                        self.labels.append(
+                            np.array([labels[i]]).astype('int64'))
+
+    def __getitem__(self, idx):
+        image, label = self.images[idx], self.labels[idx]
+        if self.chw_format:
+            image = np.reshape(image, [1, 28, 28])
+        if self.transform is not None:
+            image = self.transform(image)
+        return image, label
+
+    def __len__(self):
+        return len(self.labels)
diff --git a/python/paddle/incubate/hapi/datasets/utils.py b/python/paddle/incubate/hapi/datasets/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..171f794ba9df4270727a23cc6cd039a9faa81970
--- /dev/null
+++ b/python/paddle/incubate/hapi/datasets/utils.py
@@ -0,0 +1,29 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import os
+import paddle.dataset.common
+
+
+def _check_exists_and_download(path, url, md5, module_name, download=True):
+    if path and os.path.exists(path):
+        return path
+
+    if download:
+        return paddle.dataset.common.download(url, module_name, md5)
+    else:
+        raise ValueError('{} not exists and auto download disabled'.format(
+            path))
diff --git a/python/paddle/incubate/hapi/distributed.py b/python/paddle/incubate/hapi/distributed.py
new file mode 100644
index 0000000000000000000000000000000000000000..585f466ea6a1ef5a3d888b7c46fe2908ffd2c769
--- /dev/null
+++ b/python/paddle/incubate/hapi/distributed.py
@@ -0,0 +1,254 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import six
+import time
+import math
+import socket
+import contextlib
+import numpy as np
+
+from paddle import fluid
+from paddle.fluid.layers import collective
+from paddle.fluid.dygraph.parallel import ParallelEnv, ParallelStrategy
+from paddle.io import BatchSampler
+
+_parallel_context_initialized = False
+
+__all__ = ['DistributedBatchSampler']
+
+
+class DistributedBatchSampler(BatchSampler):
+    """Sampler that restricts data loading to a subset of the dataset.
+
+    In such case, each process can pass a DistributedBatchSampler instance 
+    as a DataLoader sampler, and load a subset of the original dataset that 
+    is exclusive to it.
+
+    .. note::
+        Dataset is assumed to be of constant size.
+        
+    Args:
+        dataset(paddle.io.Dataset): this could be a `paddle.io.Dataset` implement
+                     or other python object which implemented
+                     `__len__` for BatchSampler to get sample
+                     number of data source.
+        batch_size(int): sample indice number in a mini-batch indices.
+        shuffle(bool): whther to shuffle indices order before genrating
+            batch indices. Default False.
+        drop_last(bool): whether drop the last incomplete batch dataset size
+            is not divisible by the batch size. Default False
+
+    Examples:
+        .. code-block:: python
+
+            import numpy as np
+
+            from paddle.incubate.hapi.datasets import MNIST
+            from paddle.incubate.hapi.distributed import DistributedBatchSampler
+
+            class MnistDataset(MNIST):
+                def __init__(self, mode, return_label=True):
+                    super(MnistDataset, self).__init__(mode=mode)
+                    self.return_label = return_label
+
+                def __getitem__(self, idx):
+                    img = np.reshape(self.images[idx], [1, 28, 28])
+                    if self.return_label:
+                        return img, np.array(self.labels[idx]).astype('int64')
+                    return img,
+
+                def __len__(self):
+                    return len(self.images)
+
+            train_dataset = MnistDataset(mode='train')
+            dist_train_dataloader = DistributedBatchSampler(train_dataset, batch_size=64)
+
+            for data in dist_train_dataloader:
+                # do something
+                break
+    """
+
+    def __init__(self, dataset, batch_size, shuffle=False, drop_last=False):
+        self.dataset = dataset
+
+        assert isinstance(batch_size, int) and batch_size > 0, \
+                "batch_size should be a positive integer"
+        self.batch_size = batch_size
+        assert isinstance(shuffle, bool), \
+                "shuffle should be a boolean value"
+        self.shuffle = shuffle
+        assert isinstance(drop_last, bool), \
+                "drop_last should be a boolean number"
+
+        self.drop_last = drop_last
+        self.nranks = ParallelEnv().nranks
+        self.local_rank = ParallelEnv().local_rank
+        self.epoch = 0
+        self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.nranks))
+        self.total_size = self.num_samples * self.nranks
+
+    def __iter__(self):
+        num_samples = len(self.dataset)
+        indices = np.arange(num_samples).tolist()
+        indices += indices[:(self.total_size - len(indices))]
+        assert len(indices) == self.total_size
+        if self.shuffle:
+            np.random.RandomState(self.epoch).shuffle(indices)
+            self.epoch += 1
+
+        # subsample
+        def _get_indices_by_batch_size(indices):
+            subsampled_indices = []
+            last_batch_size = self.total_size % (self.batch_size * self.nranks)
+            assert last_batch_size % self.nranks == 0
+            last_local_batch_size = last_batch_size // self.nranks
+
+            for i in range(self.local_rank * self.batch_size,
+                           len(indices) - last_batch_size,
+                           self.batch_size * self.nranks):
+                subsampled_indices.extend(indices[i:i + self.batch_size])
+
+            indices = indices[len(indices) - last_batch_size:]
+            subsampled_indices.extend(indices[
+                self.local_rank * last_local_batch_size:(
+                    self.local_rank + 1) * last_local_batch_size])
+            return subsampled_indices
+
+        if self.nranks > 1:
+            indices = _get_indices_by_batch_size(indices)
+
+        assert len(indices) == self.num_samples
+        _sample_iter = iter(indices)
+
+        batch_indices = []
+        for idx in _sample_iter:
+            batch_indices.append(idx)
+            if len(batch_indices) == self.batch_size:
+                yield batch_indices
+                batch_indices = []
+        if not self.drop_last and len(batch_indices) > 0:
+            yield batch_indices
+
+    def __len__(self):
+        num_samples = self.num_samples
+        num_samples += int(not self.drop_last) * (self.batch_size - 1)
+        return num_samples // self.batch_size
+
+    def set_epoch(self, epoch):
+        self.epoch = epoch
+
+
+def _all_gather(x, nranks, ring_id=0, use_calc_stream=True):
+    return collective._c_allgather(
+        x, nranks, ring_id=ring_id, use_calc_stream=use_calc_stream)
+
+
+def wait_server_ready(endpoints):
+    assert not isinstance(endpoints, six.string_types)
+    while True:
+        all_ok = True
+        not_ready_endpoints = []
+        for ep in endpoints:
+            ip_port = ep.split(":")
+            with contextlib.closing(
+                    socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
+                sock.settimeout(2)
+                result = sock.connect_ex((ip_port[0], int(ip_port[1])))
+                if result != 0:
+                    all_ok = False
+                    not_ready_endpoints.append(ep)
+        if not all_ok:
+            time.sleep(3)
+        else:
+            break
+
+
+def init_communicator(program, rank, nranks, wait_port, current_endpoint,
+                      endpoints):
+    if nranks < 2:
+        return
+    other_endpoints = endpoints[:]
+    other_endpoints.remove(current_endpoint)
+    if rank == 0 and wait_port:
+        wait_server_ready(other_endpoints)
+    block = program.global_block()
+    nccl_id_var = block.create_var(
+        name=fluid.unique_name.generate('nccl_id'),
+        persistable=True,
+        type=fluid.core.VarDesc.VarType.RAW)
+
+    block.append_op(
+        type='c_gen_nccl_id',
+        inputs={},
+        outputs={'Out': nccl_id_var},
+        attrs={
+            'rank': rank,
+            'endpoint': current_endpoint,
+            'other_endpoints': other_endpoints
+        })
+
+    block.append_op(
+        type='c_comm_init',
+        inputs={'X': nccl_id_var},
+        outputs={},
+        attrs={
+            'nranks': nranks,
+            'rank': rank,
+            'ring_id': 0,
+        })
+
+
+def prepare_distributed_context(place=None):
+    if place is None:
+        place = fluid.CUDAPlace(ParallelEnv().dev_id) if ParallelEnv().nranks > 1 \
+            else fluid.CUDAPlace(0)
+
+    strategy = ParallelStrategy()
+    strategy.nranks = ParallelEnv().nranks
+    strategy.local_rank = ParallelEnv().local_rank
+    strategy.trainer_endpoints = ParallelEnv().trainer_endpoints
+    strategy.current_endpoint = ParallelEnv().current_endpoint
+
+    if strategy.nranks < 2:
+        return
+
+    global _parallel_context_initialized
+
+    if not _parallel_context_initialized and isinstance(place, fluid.CUDAPlace):
+
+        def _init_context():
+            communicator_prog = fluid.Program()
+            init_communicator(communicator_prog, strategy.local_rank,
+                              strategy.nranks, True, strategy.current_endpoint,
+                              strategy.trainer_endpoints)
+            exe = fluid.Executor(place)
+            exe.run(communicator_prog)
+
+        if fluid.in_dygraph_mode():
+            fluid.disable_dygraph()
+            _init_context()
+            fluid.enable_dygraph(place)
+        else:
+            _init_context()
+
+    else:
+        assert ("Only support CUDAPlace for now.")
+
+    _parallel_context_initialized = True
+    return strategy
diff --git a/python/paddle/incubate/hapi/download.py b/python/paddle/incubate/hapi/download.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c80a68392b941e13880afa980a2a8e1da8acfee
--- /dev/null
+++ b/python/paddle/incubate/hapi/download.py
@@ -0,0 +1,235 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import os.path as osp
+import shutil
+import requests
+import hashlib
+import time
+from collections import OrderedDict
+from paddle.fluid.dygraph.parallel import ParallelEnv
+
+try:
+    from tqdm import tqdm
+except:
+
+    class tqdm(object):
+        def __init__(self, total=None):
+            self.total = total
+            self.n = 0
+
+        def update(self, n):
+            self.n += n
+            if self.total is None:
+                sys.stderr.write("\r{0:.1f} bytes".format(self.n))
+            else:
+                sys.stderr.write("\r{0:.1f}%".format(100 * self.n / float(
+                    self.total)))
+            sys.stderr.flush()
+
+        def __enter__(self):
+            return self
+
+        def __exit__(self, exc_type, exc_val, exc_tb):
+            sys.stderr.write('\n')
+
+
+import logging
+logger = logging.getLogger(__name__)
+
+__all__ = ['get_weights_path_from_url']
+
+WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/hapi/weights")
+
+DOWNLOAD_RETRY_LIMIT = 3
+
+nlp_models = OrderedDict((
+    ('RoBERTa-zh-base',
+     'https://bert-models.bj.bcebos.com/chinese_roberta_wwm_ext_L-12_H-768_A-12.tar.gz'
+     ),
+    ('RoBERTa-zh-large',
+     'https://bert-models.bj.bcebos.com/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.tar.gz'
+     ),
+    ('ERNIE-v2-en-base',
+     'https://ernie.bj.bcebos.com/ERNIE_Base_en_stable-2.0.0.tar.gz'),
+    ('ERNIE-v2-en-large',
+     'https://ernie.bj.bcebos.com/ERNIE_Large_en_stable-2.0.0.tar.gz'),
+    ('XLNet-cased-base',
+     'https://xlnet.bj.bcebos.com/xlnet_cased_L-12_H-768_A-12.tgz'),
+    ('XLNet-cased-large',
+     'https://xlnet.bj.bcebos.com/xlnet_cased_L-24_H-1024_A-16.tgz'),
+    ('ERNIE-v1-zh-base',
+     'https://baidu-nlp.bj.bcebos.com/ERNIE_stable-1.0.1.tar.gz'),
+    ('ERNIE-v1-zh-base-max-len-512',
+     'https://ernie.bj.bcebos.com/ERNIE_1.0_max-len-512.tar.gz'),
+    ('BERT-en-uncased-large-whole-word-masking',
+     'https://bert-models.bj.bcebos.com/wwm_uncased_L-24_H-1024_A-16.tar.gz'),
+    ('BERT-en-cased-large-whole-word-masking',
+     'https://bert-models.bj.bcebos.com/wwm_cased_L-24_H-1024_A-16.tar.gz'),
+    ('BERT-en-uncased-base',
+     'https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz'),
+    ('BERT-en-uncased-large',
+     'https://bert-models.bj.bcebos.com/uncased_L-24_H-1024_A-16.tar.gz'),
+    ('BERT-en-cased-base',
+     'https://bert-models.bj.bcebos.com/cased_L-12_H-768_A-12.tar.gz'),
+    ('BERT-en-cased-large',
+     'https://bert-models.bj.bcebos.com/cased_L-24_H-1024_A-16.tar.gz'),
+    ('BERT-multilingual-uncased-base',
+     'https://bert-models.bj.bcebos.com/multilingual_L-12_H-768_A-12.tar.gz'),
+    ('BERT-multilingual-cased-base',
+     'https://bert-models.bj.bcebos.com/multi_cased_L-12_H-768_A-12.tar.gz'),
+    ('BERT-zh-base',
+     'https://bert-models.bj.bcebos.com/chinese_L-12_H-768_A-12.tar.gz'), ))
+
+
+def is_url(path):
+    """
+    Whether path is URL.
+    Args:
+        path (string): URL string or not.
+    """
+    return path.startswith('http://') or path.startswith('https://')
+
+
+def get_weights_path_from_url(url, md5sum=None):
+    """Get weights path from WEIGHT_HOME, if not exists,
+    download it from url.
+
+    Args:
+        url (str): download url
+        md5sum (str): md5 sum of download package
+    
+    Returns:
+        str: a local path to save downloaded weights.
+
+    Examples:
+        .. code-block:: python
+
+            from paddle.incubate.hapi.download import get_weights_path_from_url
+
+            resnet18_pretrained_weight_url = 'https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams'
+            local_weight_path = get_weights_path_from_url(resnet18_pretrained_weight_url)
+
+    """
+    path = get_path_from_url(url, WEIGHTS_HOME, md5sum)
+    return path
+
+
+def _map_path(url, root_dir):
+    # parse path after download under root_dir
+    fname = osp.split(url)[-1]
+    fpath = fname
+    return osp.join(root_dir, fpath)
+
+
+def get_path_from_url(url, root_dir, md5sum=None, check_exist=True):
+    """ Download from given url to root_dir.
+    if file or directory specified by url is exists under
+    root_dir, return the path directly, otherwise download
+    from url and decompress it, return the path.
+
+    Args:
+        url (str): download url
+        root_dir (str): root dir for downloading, it should be
+                        WEIGHTS_HOME or DATASET_HOME
+        md5sum (str): md5 sum of download package
+    
+    Returns:
+        str: a local path to save downloaded models & weights & datasets.
+    """
+    assert is_url(url), "downloading from {} not a url".format(url)
+    # parse path after download to decompress under root_dir
+    fullpath = _map_path(url, root_dir)
+
+    if osp.exists(fullpath) and check_exist and _md5check(fullpath, md5sum):
+        logger.info("Found {}".format(fullpath))
+    else:
+        if ParallelEnv().local_rank == 0:
+            fullpath = _download(url, root_dir, md5sum)
+        else:
+            while not os.path.exists(fullpath):
+                time.sleep(1)
+    return fullpath
+
+
+def _download(url, path, md5sum=None):
+    """
+    Download from url, save to path.
+
+    url (str): download url
+    path (str): download to given path
+    """
+    if not osp.exists(path):
+        os.makedirs(path)
+
+    fname = osp.split(url)[-1]
+    fullname = osp.join(path, fname)
+    retry_cnt = 0
+
+    while not (osp.exists(fullname) and _md5check(fullname, md5sum)):
+        if retry_cnt < DOWNLOAD_RETRY_LIMIT:
+            retry_cnt += 1
+        else:
+            raise RuntimeError("Download from {} failed. "
+                               "Retry limit reached".format(url))
+
+        logger.info("Downloading {} from {}".format(fname, url))
+
+        req = requests.get(url, stream=True)
+        if req.status_code != 200:
+            raise RuntimeError("Downloading from {} failed with code "
+                               "{}!".format(url, req.status_code))
+
+        # For protecting download interupted, download to
+        # tmp_fullname firstly, move tmp_fullname to fullname
+        # after download finished
+        tmp_fullname = fullname + "_tmp"
+        total_size = req.headers.get('content-length')
+        with open(tmp_fullname, 'wb') as f:
+            if total_size:
+                with tqdm(total=(int(total_size) + 1023) // 1024) as pbar:
+                    for chunk in req.iter_content(chunk_size=1024):
+                        f.write(chunk)
+                        pbar.update(1)
+            else:
+                for chunk in req.iter_content(chunk_size=1024):
+                    if chunk:
+                        f.write(chunk)
+        shutil.move(tmp_fullname, fullname)
+
+    return fullname
+
+
+def _md5check(fullname, md5sum=None):
+    if md5sum is None:
+        return True
+
+    logger.info("File {} md5 checking...".format(fullname))
+    md5 = hashlib.md5()
+    with open(fullname, 'rb') as f:
+        for chunk in iter(lambda: f.read(4096), b""):
+            md5.update(chunk)
+    calc_md5sum = md5.hexdigest()
+
+    if calc_md5sum != md5sum:
+        logger.info("File {} md5 check failed, {}(calc) != "
+                    "{}(base)".format(fullname, calc_md5sum, md5sum))
+        return False
+    return True
diff --git a/python/paddle/incubate/hapi/logger.py b/python/paddle/incubate/hapi/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4f18ce0ff738c966f1e237beffc9da366e3ae64
--- /dev/null
+++ b/python/paddle/incubate/hapi/logger.py
@@ -0,0 +1,71 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import logging
+
+from paddle.fluid.dygraph.parallel import ParallelEnv
+
+
+def setup_logger(output=None, name="hapi", log_level=logging.INFO):
+    """
+    Initialize logger of hapi and set its verbosity level to "INFO".
+
+    Args:
+        output (str): a file name or a directory to save log. If None, will not save log file.
+            If ends with ".txt" or ".log", assumed to be a file name.
+            Otherwise, logs will be saved to `output/log.txt`.
+        name (str): the root module name of this logger. Default: 'hapi'.
+        log_level (enum): log level. eg.'INFO', 'DEBUG', 'ERROR'. Default: logging.INFO.
+    Returns:
+        logging.Logger: a logger
+    """
+    logger = logging.getLogger(name)
+    logger.propagate = False
+    logger.setLevel(log_level)
+
+    format_str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+    # stdout logging: only local rank==0
+    local_rank = ParallelEnv().local_rank
+    if local_rank == 0 and len(logger.handlers) == 0:
+        ch = logging.StreamHandler(stream=sys.stdout)
+        ch.setLevel(log_level)
+
+        ch.setFormatter(logging.Formatter(format_str))
+        logger.addHandler(ch)
+
+    # file logging if output is not None: all workers
+    if output is not None:
+        if output.endswith(".txt") or output.endswith(".log"):
+            filename = output
+        else:
+            filename = os.path.join(output, "log.txt")
+
+        if local_rank > 0:
+            filename = filename + ".rank{}".format(local_rank)
+
+        if not os.path.exists(os.path.dirname(filename)):
+            os.makedirs(os.path.dirname(filename))
+
+        fh = logging.StreamHandler(filename)
+        fh.setLevel(log_level)
+        fh.setFormatter(logging.Formatter(format_str))
+        logger.addHandler(fh)
+
+    return logger
diff --git a/python/paddle/incubate/hapi/loss.py b/python/paddle/incubate/hapi/loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f2e28477953d7ff7b168b207a7d80b48e9d8611
--- /dev/null
+++ b/python/paddle/incubate/hapi/loss.py
@@ -0,0 +1,145 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from paddle import fluid
+from paddle.fluid.framework import in_dygraph_mode, Variable
+from paddle.fluid.dygraph.base import to_variable
+
+from .utils import to_list
+
+__all__ = ['Loss', 'CrossEntropy', 'SoftmaxWithCrossEntropy']
+
+
+class Loss(object):
+    """
+    Base class for loss, encapsulates loss logic and APIs
+
+    Usage:
+        custom_loss = CustomLoss()
+        loss = custom_loss(inputs, labels)
+    
+    Examples:
+        .. code-block:: python
+
+            from paddle.incubate.hapi.loss import Loss
+            from paddle import fluid
+
+            class SoftmaxWithCrossEntropy(Loss):
+                def __init__(self, average=True):
+                    super(SoftmaxWithCrossEntropy, self).__init__(average)
+
+                def forward(self, outputs, labels):
+                    return [
+                        fluid.layers.softmax_with_cross_entropy(
+                            o, l, return_softmax=False) for o, l in zip(outputs, labels)
+                    ]
+            
+    """
+
+    def __init__(self, average=True):
+        super(Loss, self).__init__()
+        self.average = average
+
+    def forward(self, outputs, labels):
+        raise NotImplementedError()
+
+    def __call__(self, outputs, labels=None):
+        labels = to_list(labels)
+        if in_dygraph_mode() and labels:
+            labels = [to_variable(l) for l in labels]
+        losses = to_list(self.forward(to_list(outputs), labels))
+        if self.average:
+            losses = [fluid.layers.reduce_mean(l) for l in losses]
+        else:
+            losses = [fluid.layers.reduce_sum(l) for l in losses]
+        return losses
+
+
+class CrossEntropy(Loss):
+    """
+    Args:
+        input (list[Variable]): Input tensor, the data type is float32,
+            float64, int32, int64.
+        label (list[Variable]): Label tensor, the data type is float32,
+            float64, int32, int64.
+        average (bool, optional): Indicate whether to average the loss, Default: True.
+    Returns:
+        list[Variable]: The tensor variable storing the cross_entropy_loss of inputs and labels.
+
+    Examples:
+        .. code-block:: python
+
+            from paddle.incubate.hapi.model import Input
+            from paddle.incubate.hapi.vision.models import LeNet
+            from paddle.incubate.hapi.loss import CrossEntropy
+
+            inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
+            labels = [Input([None, 1], 'int64', name='label')]
+
+            model = LeNet()
+            loss = CrossEntropy()
+            model.prepare(loss_function=loss, inputs=inputs, labels=labels)
+            
+    """
+
+    def __init__(self, average=True):
+        super(CrossEntropy, self).__init__(average)
+
+    def forward(self, outputs, labels):
+        return [
+            fluid.layers.cross_entropy(o, l) for o, l in zip(outputs, labels)
+        ]
+
+
+class SoftmaxWithCrossEntropy(Loss):
+    """
+    this op combined softmax and cross entropy.
+    Args:
+        input (list[Variable]): Input tensor, the data type is float32,
+            float64, int32, int64.
+        label (list[Variable]): Label tensor, the data type is float32,
+            float64, int32, int64.
+        average (bool, optional): Indicate whether to average the loss, Default: True.
+    Returns:
+        list[Variable]: The tensor variable storing the cross_entropy_loss of inputs and labels.
+
+    Examples:
+        .. code-block:: python
+
+            from paddle.incubate.hapi.model import Input
+            from paddle.incubate.hapi.vision.models import LeNet
+            from paddle.incubate.hapi.loss import SoftmaxWithCrossEntropy
+
+            inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
+            labels = [Input([None, 1], 'int64', name='label')]
+
+            model = LeNet(classifier_activation=None)
+            loss = SoftmaxWithCrossEntropy()
+            model.prepare(loss_function=loss, inputs=inputs, labels=labels)
+    """
+
+    def __init__(self, average=True):
+        super(SoftmaxWithCrossEntropy, self).__init__(average)
+
+    def forward(self, outputs, labels):
+        return [
+            fluid.layers.softmax_with_cross_entropy(
+                o, l, return_softmax=False) for o, l in zip(outputs, labels)
+        ]
diff --git a/python/paddle/incubate/hapi/metrics.py b/python/paddle/incubate/hapi/metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c3d45b05816b54dbdb0fdd9af0d817e796bc034
--- /dev/null
+++ b/python/paddle/incubate/hapi/metrics.py
@@ -0,0 +1,242 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import six
+import abc
+import numpy as np
+import paddle.fluid as fluid
+
+import logging
+
+FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
+logging.basicConfig(level=logging.INFO, format=FORMAT)
+logger = logging.getLogger(__name__)
+
+__all__ = ['Metric', 'Accuracy']
+
+
+@six.add_metaclass(abc.ABCMeta)
+class Metric(object):
+    """
+    Base class for metric, encapsulates metric logic and APIs
+    Usage:
+        
+        m = SomeMetric()
+        for prediction, label in ...:
+            m.update(prediction, label)
+        m.accumulate()
+        
+    Advanced usage for :code:`add_metric_op`
+    Metric calculating con be accelerate by calucateing metric states
+    from model outputs and labels by Paddle OPs in :code:`add_metric_op`,
+    metric states will be fetch as numpy array and call :code:`update`
+    with states in numpy format.
+    Metric calculated as follows (operations in Model and Metric are
+    indicated with curly brackets, while data nodes not):
+                 inputs & labels              || ------------------
+                       |                      ||
+                    {model}                   ||
+                       |                      ||
+                outputs & labels              ||
+                       |                      ||    tensor data
+             {Metric.add_metric_op}           ||
+                       |                      ||
+              metric states(tensor)           ||
+                       |                      ||
+                {fetch as numpy}              || ------------------
+                       |                      ||
+              metric states(numpy)            ||    numpy data
+                       |                      ||
+                {Metric.update}               \/ ------------------
+    Examples:
+        
+        For :code:`Accuracy` metric, which takes :code:`pred` and :code:`label`
+        as inputs, we can calculate the correct prediction matrix between
+        :code:`pred` and :code:`label` in :code:`add_metric_op`.
+        For examples, prediction results contains 10 classes, while :code:`pred`
+        shape is [N, 10], :code:`label` shape is [N, 1], N is mini-batch size,
+        and we only need to calculate accurary of top-1 and top-5, we could
+        calculated the correct prediction matrix of the top-5 scores of the
+        prediction of each sample like follows, while the correct prediction
+        matrix shape is [N, 5].
+        .. code-block:: python
+            def add_metric_op(pred, label):
+                # sort prediction and slice the top-5 scores
+                pred = fluid.layers.argsort(pred, descending=True)[1][:, :5]
+                # calculate whether the predictions are correct
+                correct = pred == label
+                return fluid.layers.cast(correct, dtype='float32')
+        With the :code:`add_metric_op`, we split some calculations to OPs(which
+        may run on GPU devices, will be faster), and only fetch 1 tensor with
+        shape as [N, 5] instead of 2 tensors with shapes as [N, 10] and [N, 1].
+        :code:`update` can be define as follows:
+        .. code-block:: python
+            def update(self, correct):
+                accs = []
+                for i, k in enumerate(self.topk):
+                    num_corrects = correct[:, :k].sum()
+                    num_samples = len(correct)
+                    accs.append(float(num_corrects) / num_samples)
+                    self.total[i] += num_corrects
+                    self.count[i] += num_samples
+                return accs
+    """
+
+    def __init__(self):
+        pass
+
+    @abc.abstractmethod
+    def reset(self):
+        """
+        Reset states and result
+        """
+        raise NotImplementedError("function 'reset' not implemented in {}.".
+                                  format(self.__class__.__name__))
+
+    @abc.abstractmethod
+    def update(self, *args):
+        """
+        Update states for metric
+
+        Inputs of :code:`update` is the outputs of :code:`Metric.add_metric_op`,
+        if :code:`add_metric_op` is not defined, the inputs of :code:`update`
+        will be flatten arguments of **output** of mode and **label** from data:
+        :code:`update(output1, output2, ..., label1, label2,...)`
+
+        see :code:`Metric.add_metric_op`
+        """
+        raise NotImplementedError("function 'update' not implemented in {}.".
+                                  format(self.__class__.__name__))
+
+    @abc.abstractmethod
+    def accumulate(self):
+        """
+        Accumulates statistics, computes and returns the metric value
+        """
+        raise NotImplementedError(
+            "function 'accumulate' not implemented in {}.".format(
+                self.__class__.__name__))
+
+    @abc.abstractmethod
+    def name(self):
+        """
+        Returns metric name
+        """
+        raise NotImplementedError("function 'name' not implemented in {}.".
+                                  format(self.__class__.__name__))
+
+    def add_metric_op(self, *args):
+        """
+        This API is advanced usage to accelerate metric calculating, calulations
+        from outputs of model to the states which should be updated by Metric can
+        be defined here, where Paddle OPs is also supported. Outputs of this API
+        will be the inputs of "Metric.update".
+
+        If :code:`add_metric_op` is defined, it will be called with **outputs**
+        of model and **labels** from data as arguments, all outputs and labels
+        will be concatenated and flatten and each filed as a separate argument
+        as follows:
+        :code:`add_metric_op(output1, output2, ..., label1, label2,...)`
+
+        If :code:`add_metric_op` is not defined, default behaviour is to pass
+        input to output, so output format will be:
+        :code:`return output1, output2, ..., label1, label2,...`
+
+        see :code:`Metric.update`
+        """
+        return args
+
+
+class Accuracy(Metric):
+    """
+    Encapsulates accuracy metric logic
+
+    Examples:
+        
+        .. code-block:: python
+
+        from paddle import fluid
+        from paddle.incubate.hapi.metrics import Accuracy
+        from paddle.incubate.hapi.loss import CrossEntropy
+        from paddle.incubate.hapi.datasets import MNIST
+        from paddle.incubate.hapi.model import Input
+        from paddle.incubate.hapi.vision.models import LeNet 
+
+        fluid.enable_dygraph()
+
+        train_dataset = MNIST(mode='train')
+
+        model = LeNet()
+        optim = fluid.optimizer.Adam(
+            learning_rate=0.001, parameter_list=model.parameters())
+
+        inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
+        labels = [Input([None, 1], 'int64', name='label')]
+            
+        model.prepare(
+            optim,
+            loss_function=CrossEntropy(average=False),
+            metrics=Accuracy(),
+            inputs=inputs,
+            labels=labels)
+
+        model.fit(train_dataset, batch_size=64)
+
+    """
+
+    def __init__(self, topk=(1, ), name=None, *args, **kwargs):
+        super(Accuracy, self).__init__(*args, **kwargs)
+        self.topk = topk
+        self.maxk = max(topk)
+        self._init_name(name)
+        self.reset()
+
+    def add_metric_op(self, pred, label, *args):
+        pred = fluid.layers.argsort(pred, descending=True)[1][:, :self.maxk]
+        correct = pred == label
+        return fluid.layers.cast(correct, dtype='float32')
+
+    def update(self, correct, *args):
+        accs = []
+        for i, k in enumerate(self.topk):
+            num_corrects = correct[:, :k].sum()
+            num_samples = len(correct)
+            accs.append(float(num_corrects) / num_samples)
+            self.total[i] += num_corrects
+            self.count[i] += num_samples
+        return accs
+
+    def reset(self):
+        self.total = [0.] * len(self.topk)
+        self.count = [0] * len(self.topk)
+
+    def accumulate(self):
+        res = []
+        for t, c in zip(self.total, self.count):
+            res.append(float(t) / c)
+        return res
+
+    def _init_name(self, name):
+        name = name or 'acc'
+        if self.maxk != 1:
+            self._name = ['{}_top{}'.format(name, k) for k in self.topk]
+        else:
+            self._name = [name]
+
+    def name(self):
+        return self._name
diff --git a/python/paddle/incubate/hapi/model.py b/python/paddle/incubate/hapi/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..e089c26decb5efd6754d0a36ed0b5dd8d09af9ab
--- /dev/null
+++ b/python/paddle/incubate/hapi/model.py
@@ -0,0 +1,1668 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import inspect
+import os
+import pickle
+import numpy as np
+import six
+import warnings
+from collections import Iterable
+
+from paddle import fluid
+from paddle.fluid.framework import in_dygraph_mode, Variable
+from paddle.fluid.executor import global_scope
+from paddle.fluid.io import is_belong_to_optimizer
+from paddle.fluid.dygraph.base import to_variable
+from paddle.fluid.dygraph.parallel import ParallelEnv
+from paddle.fluid.layers.utils import flatten
+from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy
+from paddle.fluid.incubate.fleet.base import role_maker
+from paddle.io import DataLoader, Dataset
+
+from .loss import Loss
+from .distributed import DistributedBatchSampler, _all_gather, prepare_distributed_context, _parallel_context_initialized
+from .metrics import Metric
+from .callbacks import config_callbacks
+from .utils import to_list, to_numpy, flatten_list, restore_flatten_list
+
+__all__ = [
+    'Model',
+    'Input',
+    'set_device',
+]
+
+
+def set_device(device):
+    """
+    Args:
+        device (str): specify device type, 'cpu' or 'gpu'.
+        
+    Returns:
+        fluid.CUDAPlace or fluid.CPUPlace: Created GPU or CPU place.
+    """
+
+    assert isinstance(device, six.string_types) and device.lower() in ['cpu', 'gpu'], \
+    "Expected device in ['cpu', 'gpu'], but got {}".format(device)
+
+    place = fluid.CUDAPlace(ParallelEnv().dev_id) \
+            if device.lower() == 'gpu' and fluid.is_compiled_with_cuda() \
+                else fluid.CPUPlace()
+
+    return place
+
+
+class Input(fluid.dygraph.Layer):
+    def __init__(self, shape=None, dtype=None, name=None):
+        super(Input, self).__init__()
+        self.shape = shape
+        self.dtype = dtype
+        self.name = name
+
+    def forward(self):
+        return fluid.data(self.name, shape=self.shape, dtype=self.dtype)
+
+
+class StaticGraphAdapter(object):
+    """
+    Model traning/inference with a static graph.
+    """
+
+    def __init__(self, model):
+        super(StaticGraphAdapter, self).__init__()
+        self.model = model
+        # with `_build_once` gone, parameters are now created in `__init__`
+        # so we need to keep track of the parameters already created
+        self._startup_prog = fluid.default_startup_program()
+        self._orig_prog = fluid.default_main_program()
+
+        self._label_vars = {}  # label variables
+        self._input_vars = {}  # label variables
+        self._endpoints = {}
+        self._loss_endpoint = None
+        self._executor = None
+        self._progs = {}
+        self._compiled_progs = {}
+
+        self._merge_count = {
+            'eval_total': 0,
+            'test_total': 0,
+            'eval_batch': 0,
+            'test_batch': 0
+        }
+
+        self._nranks = ParallelEnv().nranks
+        self._local_rank = ParallelEnv().local_rank
+
+    @property
+    def mode(self):
+        return self.model.mode
+
+    @mode.setter
+    def mode(self, value):
+        self.model.mode = value
+
+    def train_batch(self, inputs, labels=None):
+        assert self.model._optimizer, \
+            "model not ready, please call `model.prepare()` first"
+        self.mode = 'train'
+        return self._run(inputs, labels)
+
+    def eval_batch(self, inputs, labels=None):
+        self.mode = 'eval'
+        return self._run(inputs, labels)
+
+    def test_batch(self, inputs):
+        self.mode = 'test'
+        return self._run(inputs, None)
+
+    def parameters(self, *args, **kwargs):
+        return super(Model, self.model).parameters(*args, **kwargs)
+
+    def save(self, path):
+        def _save(state, path):
+            if not state:
+                return
+            state = {
+                k: to_numpy(v) if isinstance(v, Variable) else v
+                for k, v in state.items()
+            }
+            with open(path, 'wb') as f:
+                pickle.dump(state, f)
+
+        base = os.path.basename(path)
+        assert base != "", "path should be of 'dirname/filename' format"
+        dir_name = os.path.dirname(path)
+        if dir_name and not os.path.exists(dir_name):
+            os.makedirs(dir_name)
+        param_path = path + ".pdparams"
+        _save(self.model.state_dict(), param_path)
+        prog = self._progs.get('train', None)
+        if prog is None or self.model._optimizer is None:
+            return
+        # XXX `optimizer.state_dict()` only work in dygraph mode
+        optim_path = path + ".pdopt"
+        optim = {
+            p.name: p
+            for p in filter(is_belong_to_optimizer, prog.list_vars())
+        }
+        if not optim:
+            return
+
+        _save(optim, optim_path)
+
+    def load(self, param_state_pairs, optim_state):
+        if self._executor is None:
+            executor = fluid.Executor(fluid.CPUPlace())._default_executor
+        else:
+            executor = self._executor._default_executor
+
+        # restore parameter states
+        fluid.core._create_loaded_parameter(
+            [param for param, state in param_state_pairs],
+            global_scope(), executor)
+        for param, state in param_state_pairs:
+            self._set_var(param, state)
+
+        # restore optimizer states
+        # FIXME what if a different optimizer is used?
+        if not self.model._optimizer or not optim_state:
+            return
+        self._load_optimizer(optim_state, executor)
+
+    def _load_optimizer(self, state, executor):
+        prog = self._progs.get('train', None)
+        optim = list(filter(is_belong_to_optimizer, prog.list_vars()))
+        if not optim:
+            return
+
+        fluid.core._create_loaded_parameter(optim, global_scope(), executor)
+
+        converted_state = dict(state)
+        for var in optim:
+            if var.name in ["@LR_DECAY_COUNTER@", "global_step"]:
+                # When using learning rate scheduler, dygraph would name the
+                # global step var as "global_step" to save, while static-graph
+                # would has a state var named as "@LR_DECAY_COUNTER@".
+                # NOTE: dygraph saved global_step is 1 larger than that in
+                # static-graph, since the time of global_step to increase is
+                # different.
+                state_val = (
+                    np.array(converted_state.pop("global_step")) - 1
+                ) if "global_step" in converted_state else converted_state.pop(
+                    "@LR_DECAY_COUNTER@", None)
+                if state_val is not None:
+                    converted_state[var.name] = state_val
+            elif var.name.startswith("learning_rate_"):
+                # When using static learning rate, static-graph would make it
+                # a persistable var named 'unique_name.generate("learning_rate")',
+                # However, dygraph wouldn't save it.
+                if var.name not in state:
+                    continue
+            else:
+                # moment and other accumulators
+                if var.name not in converted_state:
+                    # try to convert from dygraph name
+                    opt_name = self.model._optimizer._name
+                    opt_cls_name = self.model._optimizer.__class__.__name__
+                    opt_unq_name = None
+                    for name in self.model._optimizer._accumulators.keys():
+                        accum_name = name if opt_name is None else name[len(
+                            opt_name) + 1:]
+                        for param_name, state_var in self.model._optimizer._accumulators[
+                                name].items():
+                            if opt_unq_name is None:
+                                # can not infer out the exact unique(opt_name),
+                                # thus try to extract rather than generate
+                                for state_key in sorted(
+                                        state.keys(),
+                                        key=lambda x: len(x),
+                                        reverse=True):
+                                    prefix = param_name + "_" + (
+                                        opt_cls_name
+                                        if opt_name is None else opt_name) + "_"
+                                    if state_key.startswith(prefix):
+                                        prefix_offset = state_key[len(
+                                            prefix):].find("_") + len(prefix)
+                                        opt_unq_name = state_key[len(
+                                            param_name + "_"):prefix_offset]
+                                        # TODO: assert
+                                        # assert opt_unq_name is None
+                                    # gen(param.name + "_" + gen(opt_name) + "_" + accum_name)
+                                    # always end with "_0" since the unique optimizer._name
+                            dy_state_name = (param_name + "_" + opt_unq_name +
+                                             "_" + accum_name + "_0")
+                            converted_state[
+                                state_var.name] = converted_state.pop(
+                                    dy_state_name)
+
+            assert var.name in converted_state, \
+                "variable [{}] is not in optimizer state file".format(var.name)
+            self._set_var(var, converted_state[var.name])
+
+    def _set_var(self, var, ndarray):
+        t = global_scope().find_var(var.name).get_tensor()
+        p = t._place()
+        if p.is_cpu_place():
+            place = fluid.CPUPlace()
+        elif p.is_cuda_pinned_place():
+            place = fluid.CUDAPinnedPlace()
+        else:
+            p = fluid.core.Place()
+            p.set_place(t._place())
+            place = fluid.CUDAPlace(p.gpu_device_id())
+
+        t.set(ndarray, place)
+
+    def _run(self, inputs, labels=None):
+        compiled_prog = self._compiled_progs.get(self.mode, None)
+        assert compiled_prog, \
+            "Model is not ready, please call `model.prepare()` first"
+
+        inputs = to_list(inputs)
+        if labels is not None:
+            labels = to_list(labels)
+        assert len(inputs) == len(self._input_vars[self.mode]), \
+            "number of inputs" \
+            + " does not match number of arguments of `forward` method"
+
+        feed = {}
+        input_names = [v.name for v in self._input_vars[self.mode]]
+        for idx, n in enumerate(input_names):
+            # train and test may take different arguments
+            if inputs[idx] is not None:
+                feed[n] = inputs[idx]
+        if labels is not None:
+            for idx, v in enumerate(self._label_vars[self.mode]):
+                feed[v.name] = labels[idx]
+
+        endpoints = self._endpoints[self.mode]
+        if self.mode == 'test':
+            fetch_list = endpoints['output']
+        else:
+            metric_list, metric_splits = flatten_list(endpoints['metric'])
+            fetch_list = endpoints['loss'] + metric_list
+            num_loss = len(endpoints['loss'])
+
+        # if fetch Variable is same as input Variable, do not fetch
+        # from program, get it from input directly
+        pruned_fetch_list = []
+        pruned_fetch_idx_name_map = [""] * len(fetch_list)
+        for i, fetch_var in enumerate(fetch_list):
+            if fetch_var.name in feed.keys():
+                pruned_fetch_idx_name_map[i] = fetch_var.name
+            else:
+                pruned_fetch_list.append(fetch_var)
+
+        rets = self._executor.run(compiled_prog,
+                                  feed=feed,
+                                  fetch_list=pruned_fetch_list,
+                                  return_numpy=False)
+
+        # restore pruned fetch_list Variable from feeds
+        for i, name in enumerate(pruned_fetch_idx_name_map):
+            if len(name) > 0:
+                rets.insert(i, feed[name])
+
+        # LoDTensor cannot be fetch as numpy directly
+        rets = [np.array(v) for v in rets]
+        if self.mode == 'test':
+            return rets[:]
+        losses = rets[:num_loss]
+        metric_states = restore_flatten_list(rets[num_loss:], metric_splits)
+        metrics = []
+        for metric, state in zip(self.model._metrics, metric_states):
+            # cut off padding size
+            if self.mode != 'train' and self.model._test_dataloader is not None \
+                    and isinstance(self.model._test_dataloader, DataLoader) \
+                    and self._nranks > 1:
+                total_size = len(self.model._test_dataloader.dataset)
+                # TODO: fixme if have better way to get batch size
+                samples = state[0].shape[0]
+                current_count = self._merge_count.get(self.mode + '_total', 0)
+                if current_count + samples >= total_size:
+                    state = [
+                        s[:int(total_size - current_count), ...] for s in state
+                    ]
+                    self._merge_count[self.mode + '_total'] = 0
+                    self._merge_count[self.mode + '_batch'] = int(total_size -
+                                                                  current_count)
+                else:
+                    self._merge_count[self.mode + '_total'] += samples
+                    self._merge_count[self.mode + '_batch'] = samples
+
+            metrics.append(metric.update(*state))
+        return (losses, metrics) if len(metrics) > 0 else losses
+
+    def prepare(self):
+        modes = ['train', 'eval', 'test']
+        for mode in modes:
+            self._make_program(mode)
+            self._compile_and_initialize(self._progs[mode], mode)
+
+    def _make_program(self, mode):
+        prog = self._progs.get(mode, None)
+        if prog is not None:
+            return
+
+        prog = self._orig_prog.clone()
+        # NOTE: When defining learning rate scheduling in static-graph, ops to
+        # increase the global step var and calculate learning rate would be
+        # prepended into _orig_prog. test program maked by `_orig_prog.clone`
+        # also would include these ops. Thus must prune these ops in test
+        # program, otherwise the global step would be changed in test.
+        if mode != 'train':
+            for op in list(prog.global_block().ops):
+                prog.global_block()._remove_op(0)
+        if mode == 'train' and self.model._optimizer \
+                and self.model._optimizer._learning_rate_map:
+            # HACK workaround learning rate map issue
+            lr_var = self.model._optimizer._learning_rate_map[self._orig_prog]
+            new_lr_var = prog.global_block().vars[lr_var.name]
+            self.model._optimizer._learning_rate_map[prog] = new_lr_var
+
+        losses = []
+        metrics = []
+        with fluid.program_guard(prog, self._startup_prog):
+            ins = self.model._inputs
+            lbls = self.model._labels if self.model._labels else []
+            inputs = [k.forward() for k in to_list(ins)]
+            labels = [k.forward() for k in to_list(lbls)]
+            self._label_vars[mode] = labels
+            outputs = to_list(self.model.forward(*inputs))
+
+            if mode != 'test' and self.model._loss_function:
+                losses = self.model._loss_function(outputs, labels)
+
+            if self._nranks > 1 and mode != 'train':
+                outputs = [_all_gather(o, self._nranks) for o in outputs]
+                if mode != 'test':
+                    labels = [_all_gather(l, self._nranks) for l in labels]
+
+            if mode != 'test':
+                for metric in self.model._metrics:
+                    metrics.append(
+                        to_list(metric.add_metric_op(*(outputs + labels))))
+
+            if mode == 'train' and self.model._optimizer:
+                self._loss_endpoint = fluid.layers.sum(losses)
+                if self._nranks > 1:
+                    role = role_maker.PaddleCloudRoleMaker(is_collective=True)
+                    fleet.init(role)
+                    dist_strategy = DistributedStrategy()
+                    dist_strategy.mode = "collective"
+                    dist_strategy.collective_mode = "grad_allreduce"
+                    self.model._optimizer = fleet.distributed_optimizer(
+                        self.model._optimizer, strategy=dist_strategy)
+
+                self.model._optimizer.minimize(self._loss_endpoint)
+
+        if mode != 'train':  # clone again to put it in test mode
+            prog = prog.clone(for_test=True)
+
+        self._input_vars[mode] = inputs
+
+        self._progs[mode] = prog
+        self._endpoints[mode] = {
+            "output": outputs,
+            "loss": losses,
+            "metric": metrics
+        }
+
+    def _compile_and_initialize(self, prog, mode):
+        compiled_prog = self._compiled_progs.get(mode, None)
+        if compiled_prog is not None:
+            return compiled_prog
+
+        assert self.model._place is not None, \
+            "device is not set, please call `model.prepare()` first"
+
+        place = self.model._place
+
+        # XXX *ALL WEIGHTS* should be initialized upon model construction
+        # even if `forward()` may run different code path for different mode
+        # therefore startup program only needs to run once
+        if self._executor is None:
+            self._executor = fluid.Executor(place)
+            # XXX incremental initialization
+            uninitialized = []
+            for var_py in self._startup_prog.list_vars():
+                var = fluid.global_scope().find_var(var_py.name)
+                if not var_py.name.startswith('nccl_id') and var and \
+                        var.get_tensor()._is_initialized():
+                    continue
+
+                uninitialized.append(var_py)
+            if uninitialized:
+                startup_prog = self._startup_prog._prune(uninitialized)
+                self._executor.run(startup_prog)
+
+        if self._nranks < 2:
+            compiled_prog = fluid.CompiledProgram(prog)
+        else:
+            compiled_prog = prog
+
+        self._compiled_progs[mode] = compiled_prog
+
+
+class DynamicGraphAdapter(object):
+    def __init__(self, model):
+        super(DynamicGraphAdapter, self).__init__()
+        self.model = model
+        self._nranks = ParallelEnv().nranks
+        self._local_rank = ParallelEnv().local_rank
+        self._merge_count = {
+            'eval_total': 0,
+            'test_total': 0,
+            'eval_batch': 0,
+            'test_batch': 0
+        }
+
+        if self._nranks > 1:
+            stradegy = fluid.dygraph.parallel.ParallelStrategy()
+            stradegy.nranks = ParallelEnv().nranks
+            stradegy.local_rank = ParallelEnv().local_rank
+            stradegy.trainer_endpoints = ParallelEnv().trainer_endpoints
+            stradegy.current_endpoint = ParallelEnv().current_endpoint
+            self.ddp_model = fluid.dygraph.parallel.DataParallel(self.model,
+                                                                 stradegy)
+
+    @property
+    def mode(self):
+        return self.model.mode
+
+    @mode.setter
+    def mode(self, value):
+        self.model.mode = value
+
+    # TODO multi device in dygraph mode not implemented at present time
+    def train_batch(self, inputs, labels=None):
+        assert self.model._optimizer, \
+            "model not ready, please call `model.prepare()` first"
+        super(Model, self.model).train()
+        self.mode = 'train'
+        inputs = to_list(inputs)
+        if labels is not None:
+            labels = [to_variable(l) for l in to_list(labels)]
+        if self._nranks > 1:
+            outputs = self.ddp_model.forward(* [to_variable(x) for x in inputs])
+            losses = self.model._loss_function(outputs, labels)
+            final_loss = fluid.layers.sum(losses)
+            final_loss = self.ddp_model.scale_loss(final_loss)
+            final_loss.backward()
+            self.ddp_model.apply_collective_grads()
+        else:
+            outputs = self.model.forward(* [to_variable(x) for x in inputs])
+            losses = self.model._loss_function(outputs, labels)
+            final_loss = fluid.layers.sum(losses)
+            final_loss.backward()
+
+        self.model._optimizer.minimize(final_loss)
+        self.model.clear_gradients()
+        metrics = []
+        for metric in self.model._metrics:
+            metric_outs = metric.add_metric_op(*(to_list(outputs) + to_list(
+                labels)))
+            m = metric.update(* [to_numpy(m) for m in to_list(metric_outs)])
+            metrics.append(m)
+
+        return ([to_numpy(l) for l in losses], metrics) \
+            if len(metrics) > 0 else [to_numpy(l) for l in losses]
+
+    def eval_batch(self, inputs, labels=None):
+        super(Model, self.model).eval()
+        self.mode = 'eval'
+        inputs = to_list(inputs)
+        if labels is not None:
+            labels = [to_variable(l) for l in to_list(labels)]
+        outputs = self.model.forward(* [to_variable(x) for x in inputs])
+        if self.model._loss_function:
+            losses = self.model._loss_function(outputs, labels)
+        else:
+            losses = []
+        if self._nranks > 1:
+            outputs = [_all_gather(o, self._nranks) for o in to_list(outputs)]
+            labels = [_all_gather(l, self._nranks) for l in labels]
+        metrics = []
+        for metric in self.model._metrics:
+            # cut off padding value.
+            if self.model._test_dataloader is not None and self._nranks > 1 \
+                    and isinstance(self.model._test_dataloader, DataLoader):
+                total_size = len(self.model._test_dataloader.dataset)
+                samples = outputs[0].shape[0]
+                current_count = self._merge_count.get(self.mode + '_total', 0)
+                if current_count + samples >= total_size:
+                    outputs = [
+                        o[:int(total_size - current_count)] for o in outputs
+                    ]
+                    labels = [
+                        l[:int(total_size - current_count)] for l in labels
+                    ]
+                    self._merge_count[self.mode + '_total'] = 0
+                    self._merge_count[self.mode + '_batch'] = int(total_size -
+                                                                  current_count)
+                else:
+                    self._merge_count[self.mode + '_total'] += samples
+                    self._merge_count[self.mode + '_batch'] = samples
+
+            metric_outs = metric.add_metric_op(*(to_list(outputs) + to_list(
+                labels)))
+            m = metric.update(* [to_numpy(m) for m in to_list(metric_outs)])
+            metrics.append(m)
+
+        # To be consistent with static graph
+        # return empty loss if loss_function is None
+        return ([to_numpy(l) for l in losses], metrics) \
+            if len(metrics) > 0 else [to_numpy(l) for l in losses]
+
+    def test_batch(self, inputs):
+        super(Model, self.model).eval()
+        self.mode = 'test'
+        inputs = [to_variable(x) for x in to_list(inputs)]
+        outputs = self.model.forward(*inputs)
+        if self._nranks > 1 and isinstance(self.model._place, fluid.CUDAPlace):
+            outputs = [_all_gather(o, self._nranks) for o in to_list(outputs)]
+
+        return [to_numpy(o) for o in to_list(outputs)]
+
+    def parameters(self, *args, **kwargs):
+        return super(Model, self.model).parameters(*args, **kwargs)
+
+    def save(self, path):
+        params = self.model.state_dict()
+        fluid.save_dygraph(params, path)
+        if self.model._optimizer is None:
+            return
+        if self.model._optimizer.state_dict():
+            optim = self.model._optimizer.state_dict()
+            fluid.save_dygraph(optim, path)
+
+    def load(self, param_state_pairs, optim_state):
+        # restore parameter states
+        for param, state in param_state_pairs:
+            param.set_value(state)
+
+        # resotre optimizer states
+        if not self.model._optimizer or not optim_state:
+            return
+
+        # If optimizer performs set_dict when state vars haven't been created,
+        # which would happen when set_dict before minimize, the state would be
+        # stored in optimizer._accumulators_holder and loaded lazily.
+        # To contrive this when loading from static-graph saved states, extend
+        # state dict to include keys named accoring to dygraph naming rules.
+        # TODO: if len(self.model._optimizer._accumulators) > 0
+        converted_state = dict(optim_state)
+        opt_unq_name = self.model._optimizer._name
+        if opt_unq_name is None:
+            opt_unq_name = ''
+
+        opt_cls_name = self.model._optimizer.__class__.__name__
+        opt_name = opt_unq_name[:opt_unq_name.rfind("_")]  # remove suffix idx
+        param_names = [param.name for param in self.model.parameters()]
+        for var_name, state_var in sorted(
+                optim_state.items(), key=lambda x: len(x[0]), reverse=True):
+            if var_name in ["@LR_DECAY_COUNTER@", "global_step"]:
+                # NOTE: dygraph saved global_step is 1 larger than that in
+                # static-graph, since the time of global_step to increase is
+                # different.
+                if var_name == "@LR_DECAY_COUNTER@":
+                    converted_state["global_step"] = np.array(
+                        converted_state.pop("@LR_DECAY_COUNTER@")) + 1
+            else:
+                # moment and other accumulators
+                # extend state dict to include promising dygraph names
+                for param_name in param_names:
+                    if var_name.startswith(param_name + "_" + opt_name):
+                        # when init optimizer with name
+                        accum_name = var_name[len(param_name + "_" + opt_name +
+                                                  "_"):]
+                    elif var_name.startswith(param_name +
+                                             "_") and opt_name == opt_cls_name:
+                        # when init optimizer without name
+                        accum_name = var_name[len(param_name + "_"):]
+                    else:
+                        continue
+                    # remove suffix idx
+                    accum_name = accum_name[:accum_name.rfind("_")]
+                    # state names always end with "_0" in dygraph because of the
+                    # unique optimizer._name
+                    dy_state_name = (param_name + "_" + opt_unq_name + "_" +
+                                     accum_name + "_0")
+                    converted_state[dy_state_name] = state_var
+
+        self.model._optimizer.set_dict(converted_state)
+
+
+class Model(fluid.dygraph.Layer):
+    """
+    An Model object is network with training and inference features.
+    Dynamic graph and static graph are supported at the same time,
+    switched by `fluid.enable_dygraph()`. The usage is as follows.
+    But note, the switching between dynamic and static should be before
+    instantiating a Model. The input description, i.e, hapi.Input,
+    must be required for static graph.
+
+    Usage:
+        .. code-block:: python
+
+        import numpy as np
+        import paddle
+        import paddle.fluid as fluid
+        #import paddle.incubate.hapi as hapi
+        from paddle.incubate.hapi import Model, Input, set_device
+        from paddle.incubate.hapi.loss import CrossEntropy
+        from paddle.incubate.hapi.dataset import MNIST
+
+        class MyModel(Model):
+            def __init__(self):
+                super(MyModel, self).__init__()
+                self._fc = fluid.dygraph.Linear(784, 10, act='softmax')
+            def forward(self, x):
+                y = self._fc(x)
+                return y
+        device = set_device('gpu')
+        # if use static graph, do not set
+        fluid.enable_dygraph(device)
+        model = MyModel()
+        optim = fluid.optimizer.SGD(learning_rate=1e-3,
+            parameter_list=model.parameters())
+        
+        inputs = [Input([None, 784], 'float32', name='x')]
+        labels = [Input([None, 1], 'int64', name='label')]
+        
+        mnist_data = MNIST(mode='train')
+        model.prepare(optim,
+                      CrossEntropy(average=True),
+                      hapi.metrics.Accuracy(),
+                      inputs,
+                      labels,
+                      device=device)
+        model.fit(mnist_data, epochs=2, batch_size=32, verbose=1)
+    """
+
+    def __init__(self):
+        super(Model, self).__init__(self.__class__.__name__)
+        self.mode = 'train'
+        self._inputs = None
+        self._labels = None
+        self._loss_function = None
+        self._loss_weights = None
+        self._optimizer = None
+        self._device = None
+        self._optimizer = None
+        self._test_dataloader = None
+
+        # init backend
+        if fluid.in_dygraph_mode():
+            self._adapter = DynamicGraphAdapter(self)
+        else:
+            self._adapter = StaticGraphAdapter(self)
+
+    def train_batch(self, inputs, labels=None):
+        """
+        Run one training step on a batch of data.
+
+        Args:
+            inputs (list): A list of numpy.ndarray, each is a batch of
+                input data.
+            labels (list): A list of numpy.ndarray, each is a batch of
+                input label. If has no labels, set None. Default is None.
+
+        Returns:
+            A list of scalar training loss if the model has no metrics,
+            or a tuple (list of scalar loss, list of metrics) if the model
+            set metrics.
+
+        Examples:
+
+            .. code-block:: python
+            
+              import numpy as np
+              import paddle.fluid as fluid
+              from paddle.incubate.hapi import Model, Input, set_device
+
+              class MyModel(Model):
+                  def __init__(self):
+                      super(MyModel, self).__init__()
+                      self._fc = Linear(784, 1, act='softmax')
+                  def forward(self, x):
+                      y = self._fc(x)
+                      return y
+
+              device = hapi.set_device('gpu')
+              fluid.enable_dygraph(device)
+
+              model = MyModel()
+              optim = fluid.optimizer.SGD(learning_rate=1e-3,
+                  parameter_list=model.parameters())
+
+              inputs = [Input([None, 784], 'float32', name='x')]
+              labels = [Input([None, 1], 'int64', name='label')]
+              model.prepare(optim,
+                            CrossEntropy(average=True),
+                            inputs=inputs,
+                            labels=labels,
+                            device=device)
+              data = np.random.random(size=(4,784)).astype(np.float32)
+              label = np.random.randint(0, 10, size=(4, 1)).astype(np.int64)
+              loss = model.train_batch([data], [label])
+              print(loss)
+        """
+        return self._adapter.train_batch(inputs, labels)
+
+    def eval_batch(self, inputs, labels=None):
+        """
+        Run one evaluating step on a batch of data.
+
+        Args:
+            inputs (list): A list of numpy.ndarray, each is a batch of
+                input data.
+            labels (list): A list of numpy.ndarray, each is a batch of
+                input label. If has no labels, set None. Default is None.
+
+        Returns:
+            A list of scalar testing loss if the model has no metrics,
+            or a tuple (list of scalar loss, list of metrics) if the model
+            set metrics.
+
+        Examples:
+
+            .. code-block:: python
+            
+              import numpy as np
+              import paddle.fluid as fluid
+              from paddle.incubate.hapi import Model, Input, set_device
+
+              class MyModel(Model):
+                  def __init__(self):
+                      super(MyModel, self).__init__()
+                      self._fc = fluid.dygraph.Linear(784, 1, act='softmax')
+                  def forward(self, x):
+                      y = self._fc(x)
+                      return y
+
+              device = set_device('gpu')
+              fluid.enable_dygraph(device)
+
+              model = MyModel()
+              optim = fluid.optimizer.SGD(learning_rate=1e-3,
+                  parameter_list=model.parameters())
+
+              inputs = [Input([None, 784], 'float32', name='x')]
+              labels = [Input([None, 1], 'int64', name='label')]
+              model.prepare(optim,
+                            CrossEntropy(average=True),
+                            inputs=inputs,
+                            labels=labels,
+                            device=device)
+              data = np.random.random(size=(4,784)).astype(np.float32)
+              label = np.random.randint(0, 10, size=(4, 1)).astype(np.int64)
+              loss = model.eval_batch([data], [label])
+              print(loss)
+        """
+        return self._adapter.eval_batch(inputs, labels)
+
+    def test_batch(self, inputs):
+        """
+        Run one testing step on a batch of data.
+
+        Args:
+            inputs (list): A list of numpy.ndarray, each is a batch of
+                input data.
+
+        Returns:
+            A list of numpy.ndarray of predictions, that is the outputs
+            of Model forward.
+
+        Examples:
+
+            .. code-block:: python
+            
+              import numpy as np
+              import paddle.fluid as fluid
+              from paddle.incubate.hapi import Model, Input, set_device
+
+              class MyModel(Model):
+                  def __init__(self):
+                      super(MyModel, self).__init__()
+                      self._fc = fluid.dygraph.Linear(784, 1, act='softmax')
+                  def forward(self, x):
+                      y = self._fc(x)
+                      return y
+
+              device = set_device('gpu')
+              fluid.enable_dygraph(device)
+
+              model = MyModel()
+              inputs = [Input([None, 784], 'float32', name='x')]
+              model.prepare(inputs=inputs,
+                            device=device)
+              data = np.random.random(size=(4,784)).astype(np.float32)
+              out = model.eval_batch([data])
+              print(out)
+        """
+        return self._adapter.test_batch(inputs)
+
+    def save(self, path):
+        """
+        This function saves parameters, optimizer infomation to path.
+
+        The parameters contains all the trainable Variable, will save to
+        a file with suffix ".pdparams".
+        The optimizer information contains all the variable used by optimizer.
+        For Adam optimizer, contains beta1, beta2, momentum etc. All the
+        information will save to a file with suffix ".pdopt". (If the optimizer
+        have no variable need to save (like SGD), the fill will not generated).
+
+        This function will silently overwrite existing file
+        at the target location.
+
+        Args:
+            path (str): The file prefix to save model. The format is
+                'dirname/file_prefix' or 'file_prefix'. if empty str. A exception
+                 will be raised.
+
+        Returns:
+            None
+
+        Examples:
+
+            .. code-block:: python
+            
+              import paddle.fluid as fluid
+              from paddle.incubate.hapi import Model, set_device
+              
+              class MyModel(Model):
+                  def __init__(self):
+                      super(MyModel, self).__init__()
+                      self._fc = fluid.dygraph.Linear(784, 1, act='softmax')
+                  def forward(self, x):
+                      y = self._fc(x)
+                      return y
+              
+              device = set_device('cpu')
+              fluid.enable_dygraph(device)
+              model = MyModel()
+              model.save('checkpoint/test')
+        """
+        if ParallelEnv().local_rank == 0:
+            self._adapter.save(path)
+
+    def load(self, path, skip_mismatch=False, reset_optimizer=False):
+        """
+        Load from files storing the model states and optimizer states. The file
+        for optimizer states is not necessary if no need to restore the optimizer.
+
+        NOTE: parameters are retrieved out from the file storing model states
+        accoring to their structured names.
+
+        For fine-tuning or transfer-learning models where some of the layers have
+        changed, keep parameters needed to restore have same structured names in
+        the pre-trained model and fine-tuning model.
+
+        Args:
+            path (str): The prefix of files storing the model states and
+                optimizer states. The files would be `path.pdparams` and
+                `path.pdopt` separately, and the latter is not necessary
+                when no need to restore.
+            skip_mismatch (bool): Whether to skip the loading of mismatch
+                parameter or raise an error when mismatch happens (not found
+                the parameter in file storing model states of or receives a
+                mismatch shape).
+            reset_optimizer (bool): If True, ignore the providing file storing
+                optimizer states and initialize optimizer states from scratch.
+                Otherwise, restore optimizer states from `path.pdopt` if
+                a optimizer has been set to the model. Default False.
+
+        Returns:
+            None
+
+        Examples:
+
+            .. code-block:: python
+            
+              import paddle.fluid as fluid
+              from paddle.incubate.hapi import Model, set_device
+              
+              class MyModel(Model):
+                  def __init__(self):
+                      super(MyModel, self).__init__()
+                      self._fc = fluid.dygraph.Linear(784, 1, act='softmax')
+                  def forward(self, x):
+                      y = self._fc(x)
+                      return y
+              
+              device = set_device('cpu')
+              fluid.enable_dygraph(device)
+              model = MyModel()
+              model.load('checkpoint/test')
+        """
+
+        def _load_state_from_path(path):
+            if not os.path.exists(path):
+                return
+            with open(path, 'rb') as f:
+                return pickle.load(f) if six.PY2 else pickle.load(
+                    f, encoding='latin1')
+
+        def _check_match(key, param):
+            state = param_state.get(key, None)
+            if state is None:
+                raise ValueError(
+                    "{} is not found in the providing file.".format(key))
+            if list(state.shape) != list(param.shape):
+                raise ValueError(
+                    "{} receives a shape {}, but the expected shape is {}.".
+                    format(key, list(state.shape), list(param.shape)))
+            return param, state
+
+        def _strip_postfix(path):
+            path, ext = os.path.splitext(path)
+            assert ext in ['', '.pdparams', '.pdopt', '.pdmodel'], \
+                    "Unknown postfix {} from weights".format(ext)
+            return path
+
+        path = _strip_postfix(path)
+        param_state = _load_state_from_path(path + ".pdparams")
+        assert param_state, "Failed to load parameters, please check path."
+
+        matched_param_state = []
+        for key, param in self.state_dict().items():
+            try:
+                match_res = _check_match(key, param)
+            except ValueError as err:
+                if skip_mismatch:
+                    warnings.warn(
+                        ("Skip loading for {}. ".format(key) + str(err)))
+                    # reset optimizer when mismatch happens
+                    reset_optimizer = True
+                else:
+                    raise err
+            matched_param_state.append(match_res)
+
+        optim_state = None if reset_optimizer else _load_state_from_path(
+            path + ".pdopt")
+        return self._adapter.load(matched_param_state, optim_state)
+
+    def parameters(self, *args, **kwargs):
+        """
+        Returns a list of parameters of the model.
+
+        Returns:
+            A list of Parameter in static graph.
+            A list of ParamBase in dynamic graph.
+
+        Examples:
+
+            .. code-block:: python
+
+              from paddle.incubate.hapi.model import Model, Input, set_device
+              class MyModel(Model):
+                  def __init__(self):
+                      super(MyModel, self).__init__()
+                      self._fc = fluid.dygraph.Linear(20, 10, act='softmax')
+                  def forward(self, x):
+                      y = self._fc(x)
+                      return y
+
+              fluid.enable_dygraph()
+              model = MyModel()
+              params = model.parameters()
+        """
+        return self._adapter.parameters()
+
+    def prepare(self,
+                optimizer=None,
+                loss_function=None,
+                metrics=None,
+                inputs=None,
+                labels=None,
+                device=None):
+        """
+        Configures the model before runing.
+
+        Args:
+            optimizer (Optimizer|None): Optimizer must be set in training
+                and should be a Optimizer instance. It can be None in eval
+                and test mode.
+            loss_function (Loss|None): Loss function must be set in training
+                and should be a Loss instance. It can be None when there is
+                no loss.
+            metrics (Metric|list of Metric|None): If metrics is set, all
+                metrics will be calculated and output in train/eval mode.
+            inputs (Input|list|dict|None): `inputs`, entry points of network,
+                could be a Input layer, or lits of Input layers,
+                or dict (name: Input), or None. For static graph,
+                inputs must be set. For dynamic graph, it could be None.
+            labels (Input|list|None): `labels`, entry points of network,
+                could be a Input layer or lits of Input layers, or None.
+                For static graph, if labels is required in loss_function,
+                labels must be set. Otherwise, it could be None.
+            device (str|fluid.CUDAPlace|fluid.CPUPlace|None): Specify device
+                type, 'CPU', 'GPU', fluid.CUDAPlace or fluid.CPUPlace.
+                If None, automatically select device according to
+                installation package version.
+
+        Returns:
+            None
+        """
+
+        if isinstance(device, fluid.CUDAPlace) or \
+            (isinstance(device, six.string_types) and device.lower() == 'gpu') \
+            or (device is None and fluid.is_compiled_with_cuda()):
+            if isinstance(device, fluid.CUDAPlace):
+                self._place = device
+            else:
+                self._place = fluid.CUDAPlace(ParallelEnv().dev_id) \
+                    if ParallelEnv().nranks > 1 else fluid.CUDAPlace(0)
+
+            global _parallel_context_initialized
+            if ParallelEnv().nranks > 1 and not _parallel_context_initialized:
+                if fluid.in_dygraph_mode():
+                    main_prog_seed = fluid.default_main_program().random_seed
+                    startup_prog_seed = fluid.default_startup_program(
+                    ).random_seed
+                    fluid.disable_dygraph()
+                    fluid.enable_dygraph(self._place)
+                    # enable_dygraph would create and switch to a new program,
+                    # thus also copy seed to the new program
+                    fluid.default_main_program().random_seed = main_prog_seed
+                    fluid.default_startup_program(
+                    ).random_seed = startup_prog_seed
+                    fluid.dygraph.parallel.prepare_context()
+                else:
+                    prepare_distributed_context(self._place)
+
+                _parallel_context_initialized = True
+        elif isinstance(device, fluid.CPUPlace):
+            self._place = device
+        elif (isinstance(device, six.string_types) and device.lower() == 'cpu') \
+            or (device is None):
+            self._place = fluid.CPUPlace()
+        else:
+            raise ValueError(
+                "Expected device in ('gpu', 'cpu', fluid.CUDAPlace, fluid.CPUPlace, None), \
+                but got {}".format(device))
+
+        self._optimizer = optimizer
+        if loss_function:
+            if not isinstance(loss_function, Loss):
+                raise TypeError("'loss_function' must be sub classes of 'Loss'")
+        self._loss_function = loss_function
+        if not in_dygraph_mode():
+            if not isinstance(inputs, (list, dict, Input)):
+                raise TypeError(
+                    "'inputs' must be list or dict in static graph mode")
+
+        metrics = metrics or []
+        for metric in to_list(metrics):
+            assert isinstance(metric, Metric), \
+                "{} is not sub class of Metric".format(
+                    metric.__class__.__name__)
+        self._metrics = to_list(metrics)
+
+        self._inputs = to_list(inputs) if not isinstance(inputs, dict) else [
+            inputs[n] for n in extract_args(self.forward) if n != 'self'
+        ]
+        self._labels = to_list(labels)
+
+        if not in_dygraph_mode():
+            self._adapter.prepare()
+
+    def fit(
+            self,
+            train_data=None,
+            eval_data=None,
+            batch_size=1,
+            epochs=1,
+            eval_freq=1,
+            log_freq=10,
+            save_dir=None,
+            save_freq=1,
+            verbose=2,
+            drop_last=False,
+            shuffle=True,
+            num_workers=0,
+            callbacks=None, ):
+        """
+        Trains the model for a fixed number of epochs. If `eval_data` is set,
+        evaluation will be done at the end of each epoch.
+
+        Args:
+            train_data (Dataset|DataLoader): An iterable data loader is used for 
+                train. An instance of paddle paddle.io.Dataset or 
+                paddle.io.Dataloader is recomended. Default: None.
+            eval_data (Dataset|DataLoader): An iterable data loader is used for
+                evaluation at the end of epoch. If None, will not do evaluation. 
+                An instance of paddle.io.Dataset or paddle.io.Dataloader 
+                is recomended. Default: None.
+            batch_size (int): Integer number. The batch size of train_data
+                and eval_data. When train_data and eval_data are both the
+                instance of Dataloader, this parameter will be ignored.
+                Default: 1.
+            epochs (int): Integer number. The number of epochs to train
+                the model. Default: 1.
+            eval_freq (int): The frequency, in number of epochs, an evalutation
+                is performed. Default: 1.
+            log_freq (int): The frequency, in number of steps, the training logs
+                are printed. Default: 10.
+            save_dir(str|None): The directory to save checkpoint during training.
+                If None, will not save checkpoint. Default: None.
+            save_freq (int): The frequency, in number of epochs, to save
+                checkpoint. Default: 1.
+            verbose (int): The verbosity mode, should be 0, 1, or 2. 0 = silent,
+                1 = progress bar, 2 = one line per epoch. Default: 2.
+            drop_last (bool): Whether drop the last incomplete batch of
+                train_data when dataset size is not divisible by the batch size.
+                When train_data is an instance of Dataloader, this parameter
+                will be ignored. Default: False.
+            shuffle (bool): Whther to shuffle train_data. When train_data is
+                an instance of Dataloader, this parameter will be ignored.
+                Default: True.
+            num_workers (int): The number of subprocess to load data, 0 for no
+                subprocess used and loading data in main process.
+                When train_data and eval_data are both the instance of
+                Dataloader, this parameter will be ignored. Default: 0.
+            callbacks (Callback|None): A list of `Callback` instances to apply
+                during training. If None, `ProgBarLogger` and `ModelCheckpoint`
+                are automatically inserted. Default: None.
+
+        Returns:
+            None
+
+        Examples:
+            1. An example use Dataset and set btch size, shuffle in fit.
+               How to make a batch is done internally.
+
+            .. code-block:: python
+
+              from paddle.incubate.hapi.model import Model, Input, set_device
+              from paddle.incubate.hapi.loss import CrossEntropy
+              from paddle.incubate.hapi.metrics import Accuracy
+              from paddle.incubate.hapi.datasets import MNIST
+              from paddle.incubate.hapi.vision.models import LeNet
+
+              dynamic = True
+              device = set_device(FLAGS.device)
+              fluid.enable_dygraph(device) if dynamic else None
+           
+              train_dataset = MNIST(mode='train')
+              val_dataset = MNIST(mode='test')
+           
+              inputs = [Input([None, 1, 28, 28], 'float32', name='image')]
+              labels = [Input([None, 1], 'int64', name='label')]
+           
+              model = LeNet()
+              optim = fluid.optimizer.Adam(
+                  learning_rate=0.001, parameter_list=model.parameters())
+              model.prepare(
+                  optim,
+                  CrossEntropy(),
+                  Accuracy(topk=(1, 2)),
+                  inputs=inputs,
+                  labels=labels,
+                  device=device)
+              model.fit(train_dataset,
+                        val_dataset,
+                        epochs=2,
+                        batch_size=64,
+                        save_dir='mnist_checkpoint')
+
+            2. An example use DataLoader, batch size and shuffle is set in
+               DataLoader.
+
+            .. code-block:: python
+
+              from paddle.incubate.hapi.model import Model, Input, set_device
+              from paddle.incubate.hapi.loss import CrossEntropy
+              from paddle.incubate.hapi.metrics import Accuracy
+              from paddle.incubate.hapi.datasets import MNIST
+              from paddle.incubate.hapi.vision.models import LeNet
+
+              dynamic = True
+              device = set_device(FLAGS.device)
+              fluid.enable_dygraph(device) if dynamic else None
+           
+              train_dataset = MNIST(mode='train')
+              train_loader = fluid.io.DataLoader(train_dataset,
+                  places=device, batch_size=64)
+              val_dataset = MNIST(mode='test')
+              val_loader = fluid.io.DataLoader(val_dataset,
+                  places=device, batch_size=64)
+           
+              inputs = [Input([None, 1, 28, 28], 'float32', name='image')]
+              labels = [Input([None, 1], 'int64', name='label')]
+           
+              model = LeNet()
+              optim = fluid.optimizer.Adam(
+                  learning_rate=0.001, parameter_list=model.parameters())
+              model.prepare(
+                  optim,
+                  CrossEntropy(),
+                  Accuracy(topk=(1, 2)),
+                  inputs=inputs,
+                  labels=labels,
+                  device=device)
+              model.fit(train_loader,
+                        val_loader,
+                        epochs=2,
+                        save_dir='mnist_checkpoint')
+        """
+
+        assert train_data is not None, \
+                "train_data must be given!"
+
+        if isinstance(train_data, Dataset):
+            train_sampler = DistributedBatchSampler(
+                train_data,
+                batch_size=batch_size,
+                shuffle=shuffle,
+                drop_last=drop_last)
+            train_loader = DataLoader(
+                train_data,
+                batch_sampler=train_sampler,
+                places=self._place,
+                num_workers=num_workers,
+                return_list=True)
+        else:
+            train_loader = train_data
+
+        if eval_data is not None and isinstance(eval_data, Dataset):
+            eval_sampler = DistributedBatchSampler(
+                eval_data, batch_size=batch_size)
+            eval_loader = DataLoader(
+                eval_data,
+                batch_sampler=eval_sampler,
+                places=self._place,
+                num_workers=num_workers,
+                return_list=True)
+        elif eval_data is not None:
+            eval_loader = eval_data
+        else:
+            eval_loader = None
+
+        do_eval = eval_loader is not None
+        self._test_dataloader = eval_loader
+
+        steps = self._len_data_loader(train_loader)
+        cbks = config_callbacks(
+            callbacks,
+            model=self,
+            epochs=epochs,
+            steps=steps,
+            log_freq=log_freq,
+            save_freq=save_freq,
+            save_dir=save_dir,
+            verbose=verbose,
+            metrics=self._metrics_name(), )
+
+        cbks.on_begin('train')
+        for epoch in range(epochs):
+
+            cbks.on_epoch_begin(epoch)
+            logs = self._run_one_epoch(train_loader, cbks, 'train')
+            cbks.on_epoch_end(epoch, logs)
+
+            if do_eval and epoch % eval_freq == 0:
+
+                eval_steps = self._len_data_loader(eval_loader)
+                cbks.on_begin('eval', {
+                    'steps': eval_steps,
+                    'metrics': self._metrics_name()
+                })
+
+                eval_logs = self._run_one_epoch(eval_loader, cbks, 'eval')
+
+                cbks.on_end('eval', eval_logs)
+
+        cbks.on_end('train', logs)
+        self._test_dataloader = None
+
+    def evaluate(
+            self,
+            eval_data,
+            batch_size=1,
+            log_freq=10,
+            verbose=2,
+            num_workers=0,
+            callbacks=None, ):
+        """
+        Evaluate the loss and metrics of the model on input dataset.
+
+        Args:
+            eval_data (Dataset|DataLoader): An iterable data loader is used for
+                evaluation. An instance of paddle.io.Dataset or 
+                paddle.io.Dataloader is recomended.
+            batch_size (int): Integer number. The batch size of train_data
+                and eval_data.  When eval_data is the instance of Dataloader,
+                this argument will be ignored. Default: 1.
+            log_freq (int): The frequency, in number of steps, the eval logs
+                are printed. Default: 10.
+            verbose (int): The verbosity mode, should be 0, 1, or 2. 0 = silent,
+                1 = progress bar, 2 = one line per epoch. Default: 2.
+            num_workers (int): The number of subprocess to load data,
+                0 for no subprocess used and loading data in main process. When
+                train_data and eval_data are both the instance of Dataloader,
+                this parameter will be ignored. Default: 0.
+            callbacks (Callback|None): A list of `Callback` instances to apply
+                during training. If None, `ProgBarLogger` and `ModelCheckpoint`
+                are automatically inserted. Default: None.
+        Returns:
+            dict: Result of metric. The key is the names of Metric,
+                value is a scalar or numpy.array.
+
+        Examples:
+        .. code-block:: python
+
+            # declarative mode
+            import numpy as np
+            from paddle.incubate.hapi.metrics import Accuracy
+            from paddle.incubate.hapi.datasets import MNIST
+            from paddle.incubate.hapi.vision.transforms import Compose,Resize
+            from paddle.incubate.hapi.vision.models import LeNet
+            from paddle.incubate.hapi.model import Input, set_device
+
+
+            inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
+            labels = [Input([None, 1], 'int64', name='label')]
+
+            val_dataset = MNIST(mode='test')
+
+            model = LeNet()
+            model.prepare(metrics=Accuracy(), inputs=inputs, labels=labels)
+
+            result = model.evaluate(val_dataset, batch_size=64)
+            print(result)
+
+            # imperative mode
+            import paddle.fluid.dygraph as dg
+            place = set_device('cpu')
+            with dg.guard(place) as g:
+                model = LeNet()
+                model.prepare(metrics=Accuracy(), inputs=inputs, labels=labels)
+
+                result = model.evaluate(val_dataset, batch_size=64)
+                print(result)
+                
+        """
+
+        if eval_data is not None and isinstance(eval_data, Dataset):
+            eval_sampler = DistributedBatchSampler(
+                eval_data, batch_size=batch_size)
+            eval_loader = DataLoader(
+                eval_data,
+                batch_sampler=eval_sampler,
+                places=self._place,
+                num_workers=num_workers,
+                return_list=True)
+        else:
+            eval_loader = eval_data
+
+        self._test_dataloader = eval_loader
+
+        cbks = config_callbacks(
+            callbacks,
+            model=self,
+            log_freq=log_freq,
+            verbose=verbose,
+            metrics=self._metrics_name(), )
+
+        eval_steps = self._len_data_loader(eval_loader)
+        cbks.on_begin('eval',
+                      {'steps': eval_steps,
+                       'metrics': self._metrics_name()})
+
+        logs = self._run_one_epoch(eval_loader, cbks, 'eval')
+
+        cbks.on_end('eval', logs)
+
+        self._test_dataloader = None
+
+        eval_result = {}
+        for k in self._metrics_name():
+            eval_result[k] = logs[k]
+
+        return eval_result
+
+    def predict(self,
+                test_data,
+                batch_size=1,
+                num_workers=0,
+                stack_outputs=False,
+                callbacks=None):
+        """
+        Compute the output predictions on testing data.
+
+        Args:
+            test_data (Dataset|DataLoader): An iterable data loader is used for
+                predict. An instance of paddle.io.Dataset or paddle.io.Dataloader
+                is recomended.
+            batch_size (int): Integer number. The batch size of train_data and eval_data.
+                When train_data and eval_data are both the instance of Dataloader, this
+                argument will be ignored. Default: 1.
+            num_workers (int): The number of subprocess to load data, 0 for no subprocess 
+                used and loading data in main process. When train_data and eval_data are
+                both the instance of Dataloader, this argument will be ignored. Default: 0.
+            stack_output (bool): Whether stack output field like a batch, as for an output
+                filed of a sample is in shape [X, Y], test_data contains N samples, predict
+                output field will be in shape [N, X, Y] if stack_output is True, and will
+                be a length N list in shape [[X, Y], [X, Y], ....[X, Y]] if stack_outputs
+                is False. stack_outputs as False is used for LoDTensor output situation,
+                it is recommended set as True if outputs contains no LoDTensor. Default: False.
+        Returns:
+            list: output of models.
+
+        Examples:
+        .. code-block:: python
+
+            # declarative mode
+            import numpy as np
+            from paddle.incubate.hapi.metrics import Accuracy
+            from paddle.incubate.hapi.datasets import MNIST
+            from paddle.incubate.hapi.vision.transforms import Compose,Resize
+            from paddle.incubate.hapi.vision.models import LeNet
+            from paddle.incubate.hapi.model import Input, set_device
+
+            class MnistDataset(MNIST):
+                def __init__(self, mode, return_label=True):
+                    super(MnistDataset, self).__init__(mode=mode)
+                    self.return_label = return_label
+
+                def __getitem__(self, idx):
+                    img = np.reshape(self.images[idx], [1, 28, 28])
+                    if self.return_label:
+                        return img, np.array(self.labels[idx]).astype('int64')
+                    return img,
+
+                def __len__(self):
+                    return len(self.images)
+
+            inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
+
+            test_dataset = MnistDataset(mode='test', return_label=False)
+
+            model = LeNet()
+            model.prepare(inputs=inputs)
+
+            result = model.predict(test_dataset, batch_size=64)
+            print(result)
+
+            # imperative mode
+            import paddle.fluid.dygraph as dg
+            place = set_device('cpu')
+            with dg.guard(place) as g:
+                model = LeNet()
+                model.prepare(inputs=inputs)
+
+                result = model.predict(test_dataset, batch_size=64)
+                print(result)
+        """
+
+        if test_data is not None and isinstance(test_data, Dataset):
+            test_sampler = DistributedBatchSampler(
+                test_data, batch_size=batch_size)
+            test_loader = DataLoader(
+                test_data,
+                batch_sampler=test_sampler,
+                places=self._place,
+                num_workers=num_workers,
+                return_list=True)
+        else:
+            test_loader = test_data
+
+        self._test_dataloader = test_loader
+
+        cbks = config_callbacks(callbacks, model=self, verbose=1)
+
+        test_steps = self._len_data_loader(test_loader)
+        logs = {'steps': test_steps}
+
+        cbks.on_begin('test', logs)
+
+        outputs = []
+
+        logs, outputs = self._run_one_epoch(test_loader, cbks, 'test')
+
+        outputs = list(zip(*outputs))
+
+        # NOTE: for lod tensor output, we should not stack outputs
+        # for stacking may lose its detail info
+        if stack_outputs:
+            outputs = [np.vstack(outs) for outs in outputs]
+
+        self._test_dataloader = None
+
+        cbks.on_end('test', logs)
+        return outputs
+
+    def save_inference_model(self,
+                             save_dir,
+                             model_filename=None,
+                             params_filename=None,
+                             model_only=False):
+        """
+        Save inference model must in static mode.
+
+        Args:
+            save_dir (str): The directory path to save the inference model.
+            model_filename (str|None): The name of file to save the inference
+                model itself. If is set None, a default filename
+                :code:`__model__` will be used.
+            params_filename (str|None): The name of file to save all related
+                parameters. If it is set None, parameters will be saved
+                in separate files .
+            model_only (bool): If True, It will save inference model only,
+                and do not save parameters. Default: False.
+
+        Returns:
+            list: The fetch variables' name list
+        """
+        assert not fluid.in_dygraph_mode(
+        ), 'Save inference model must in static mode!'
+
+        prog = self._adapter._progs.get('test', None)
+        assert prog, \
+            "Model is not ready, please call `model.prepare()` first"
+
+        infer_prog = prog.clone(for_test=True)
+
+        input_names = [v.name for v in self._adapter._input_vars['test']]
+        endpoints = self._adapter._endpoints['test']['output']
+
+        return fluid.io.save_inference_model(
+            save_dir,
+            input_names,
+            endpoints,
+            self._adapter._executor,
+            main_program=infer_prog,
+            model_filename=model_filename,
+            params_filename=params_filename,
+            program_only=model_only)
+
+    def _run_one_epoch(self, data_loader, callbacks, mode, logs={}):
+        outputs = []
+        for step, data in enumerate(data_loader):
+            # data might come from different types of data_loader and have
+            # different format, as following:
+            # 1. DataLoader in static graph:
+            #    [[input1, input2, ..., label1, lable2, ...]]
+            # 2. DataLoader in dygraph
+            #    [input1, input2, ..., label1, lable2, ...]
+            # 3. custumed iterator yield concated inputs and labels:
+            #   [input1, input2, ..., label1, lable2, ...]
+            # 4. custumed iterator yield seperated inputs and labels:
+            #   ([input1, input2, ...], [label1, lable2, ...])
+            # To handle all of these, flatten (nested) list to list.
+            data = flatten(data)
+            # LoDTensor.shape is callable, where LoDTensor comes from
+            # DataLoader in static graph
+            batch_size = data[0].shape()[0] if callable(data[
+                0].shape) else data[0].shape[0]
+
+            callbacks.on_batch_begin(mode, step, logs)
+
+            if mode != 'test':
+                outs = getattr(self, mode + '_batch')(data[:len(self._inputs)],
+                                                      data[len(self._inputs):])
+                # losses
+                loss = outs[0] if self._metrics else outs
+                metrics = [[l[0] for l in loss]]
+
+                # metrics
+                for metric in self._metrics:
+                    res = metric.accumulate()
+                    metrics.extend(to_list(res))
+
+                assert len(self._metrics_name()) == len(metrics)
+                for k, v in zip(self._metrics_name(), metrics):
+                    logs[k] = v
+            else:
+                outs = getattr(self, mode + '_batch')(data)
+                outputs.append(outs)
+
+            logs['step'] = step
+            if mode == 'train' or self._adapter._merge_count.get(
+                    mode + '_batch', 0) <= 0:
+                logs['batch_size'] = batch_size * ParallelEnv().nranks
+            else:
+                logs['batch_size'] = self._adapter._merge_count[mode + '_batch']
+
+            callbacks.on_batch_end(mode, step, logs)
+        self._reset_metrics()
+
+        if mode == 'test':
+            return logs, outputs
+        return logs
+
+    def _reset_metrics(self):
+        for metric in self._metrics:
+            metric.reset()
+
+    def _metrics_name(self):
+        metrics_name = ['loss']
+        for m in self._metrics:
+            metrics_name.extend(to_list(m.name()))
+        return metrics_name
+
+    def _len_data_loader(self, data_loader):
+        try:
+            steps = len(data_loader)
+        except Exception:
+            steps = None
+        return steps
diff --git a/python/paddle/incubate/hapi/progressbar.py b/python/paddle/incubate/hapi/progressbar.py
new file mode 100644
index 0000000000000000000000000000000000000000..2487fcbde8744fa7cc186e16b0653f03629d0366
--- /dev/null
+++ b/python/paddle/incubate/hapi/progressbar.py
@@ -0,0 +1,192 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import time
+import numpy as np
+from collections import namedtuple
+
+__all__ = ['ProgressBar']
+
+
+class ProgressBar(object):
+    """progress bar """
+
+    def __init__(self,
+                 num=None,
+                 width=30,
+                 verbose=1,
+                 start=True,
+                 file=sys.stdout):
+        self._num = num
+        if isinstance(num, int) and num <= 0:
+            raise TypeError('num should be None or integer (> 0)')
+        max_width = self._get_max_width()
+        self._width = width if width <= max_width else max_width
+        self._total_width = 0
+        self._verbose = verbose
+        self.file = file
+        self._values = {}
+        self._values_order = []
+        if start:
+            self._start = time.time()
+        self._last_update = 0
+
+        self._dynamic_display = (
+            (hasattr(self.file, 'isatty') and
+             self.file.isatty()) or 'ipykernel' in sys.modules or
+            'posix' in sys.modules or 'PYCHARM_HOSTED' in os.environ)
+
+    def _get_max_width(self):
+        if sys.version_info > (3, 3):
+            from shutil import get_terminal_size
+        else:
+            try:
+                from backports.shutil_get_terminal_size import get_terminal_size
+            except:
+
+                def get_terminal_size():
+                    terminal_size = namedtuple("terminal_size", "columns lines")
+                    return terminal_size(80, 24)
+
+        terminal_width, _ = get_terminal_size()
+        max_width = min(int(terminal_width * 0.6), terminal_width - 50)
+        return max_width
+
+    def start(self):
+        self.file.flush()
+        self._start = time.time()
+
+    def update(self, current_num, values=None):
+        now = time.time()
+
+        if current_num:
+            time_per_unit = (now - self._start) / current_num
+        else:
+            time_per_unit = 0
+
+        if time_per_unit >= 1 or time_per_unit == 0:
+            fps = ' - %.0fs/%s' % (time_per_unit, 'step')
+        elif time_per_unit >= 1e-3:
+            fps = ' - %.0fms/%s' % (time_per_unit * 1e3, 'step')
+        else:
+            fps = ' - %.0fus/%s' % (time_per_unit * 1e6, 'step')
+
+        info = ''
+        if self._verbose == 1:
+            prev_total_width = self._total_width
+
+            if self._dynamic_display:
+                sys.stdout.write('\b' * prev_total_width)
+                sys.stdout.write('\r')
+            else:
+                sys.stdout.write('\n')
+
+            if self._num is not None:
+                numdigits = int(np.log10(self._num)) + 1
+
+                bar_chars = ('step %' + str(numdigits) + 'd/%d [') % (
+                    current_num, self._num)
+                prog = float(current_num) / self._num
+                prog_width = int(self._width * prog)
+
+                if prog_width > 0:
+                    bar_chars += ('=' * (prog_width - 1))
+                    if current_num < self._num:
+                        bar_chars += '>'
+                    else:
+                        bar_chars += '='
+                bar_chars += ('.' * (self._width - prog_width))
+                bar_chars += ']'
+            else:
+                bar_chars = 'step %3d' % current_num
+
+            self._total_width = len(bar_chars)
+            sys.stdout.write(bar_chars)
+
+            for k, val in values:
+                info += ' - %s:' % k
+                val = val if isinstance(val, list) else [val]
+                for i, v in enumerate(val):
+                    if isinstance(v, (float, np.float32, np.float64)):
+                        if abs(v) > 1e-3:
+                            info += ' %.4f' % v
+                        else:
+                            info += ' %.4e' % v
+                    else:
+                        info += ' %s' % v
+
+            if self._num is not None and current_num < self._num:
+                eta = time_per_unit * (self._num - current_num)
+                if eta > 3600:
+                    eta_format = '%d:%02d:%02d' % (eta // 3600,
+                                                   (eta % 3600) // 60, eta % 60)
+                elif eta > 60:
+                    eta_format = '%d:%02d' % (eta // 60, eta % 60)
+                else:
+                    eta_format = '%ds' % eta
+
+                info += ' - ETA: %s' % eta_format
+
+            info += fps
+            self._total_width += len(info)
+            if prev_total_width > self._total_width:
+                info += (' ' * (prev_total_width - self._total_width))
+
+            # newline for another epoch
+            if self._num is not None and current_num >= self._num:
+                info += '\n'
+            if self._num is None:
+                info += '\n'
+
+            sys.stdout.write(info)
+            sys.stdout.flush()
+            self._last_update = now
+        elif self._verbose == 2:
+            if self._num:
+                numdigits = int(np.log10(self._num)) + 1
+                count = ('step %' + str(numdigits) + 'd/%d') % (current_num,
+                                                                self._num)
+            else:
+                count = 'step %3d' % current_num
+            info = count + info
+
+            for k, val in values:
+                info += ' - %s:' % k
+                val = val if isinstance(val, list) else [val]
+                for v in val:
+                    if isinstance(v, (float, np.float32, np.float64)):
+                        if abs(v) > 1e-3:
+                            info += ' %.4f' % v
+                        else:
+                            info += ' %.4e' % v
+                    elif isinstance(v, np.ndarray) and \
+                        v.size == 1 and \
+                        v.dtype in [np.float32, np.float64]:
+                        if abs(v[0]) > 1e-3:
+                            info += ' %.4f' % v[0]
+                        else:
+                            info += ' %.4e' % v[0]
+                    else:
+                        info += ' %s' % v
+
+            info += fps
+            info += '\n'
+            sys.stdout.write(info)
+            sys.stdout.flush()
diff --git a/python/paddle/incubate/hapi/tests/CMakeLists.txt b/python/paddle/incubate/hapi/tests/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23eac04aa76060c5e3166b5e67c17fce82094ce9
--- /dev/null
+++ b/python/paddle/incubate/hapi/tests/CMakeLists.txt
@@ -0,0 +1,45 @@
+file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
+string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
+
+file(GLOB DIST_TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_dist_*.py")
+string(REPLACE ".py" "" DIST_TEST_OPS "${DIST_TEST_OPS}")
+
+
+foreach(TEST_OP ${DIST_TEST_OPS})
+    list(REMOVE_ITEM TEST_OPS ${TEST_OP})
+endforeach()
+
+foreach(src ${TEST_OPS})
+    py_test(${src} SRCS ${src}.py)
+endforeach()
+
+
+function(py_dist_test TARGET_NAME)
+  if(WITH_TESTING)
+    set(options "")
+    set(oneValueArgs "")
+    set(multiValueArgs SRCS DEPS ARGS ENVS)
+    cmake_parse_arguments(py_dist_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    if(WITH_COVERAGE AND WITH_GPU AND WITH_NCCL AND NOT WIN32)
+      add_test(NAME ${TARGET_NAME}
+               COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
+               FLAGS_cpu_deterministic=true NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1
+               PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_dist_test_ENVS}
+               COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data
+               ${PYTHON_EXECUTABLE} -u ${py_dist_test_SRCS} ${py_dist_test_ARGS}
+               WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+      # No unit test should exceed 10 minutes.
+      set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=DIST" RUN_SERIAL TRUE)
+    endif()
+
+    
+  endif()
+endfunction()
+
+
+
+foreach(src ${DIST_TEST_OPS})
+    message(STATUS ${src})
+    py_dist_test(${src} SRCS ${src}.py)
+endforeach()
diff --git a/python/paddle/incubate/hapi/tests/dist_hapi_mnist_dynamic.py b/python/paddle/incubate/hapi/tests/dist_hapi_mnist_dynamic.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8b7b978621b91a8dc8d4cd2e37e0740965ab111
--- /dev/null
+++ b/python/paddle/incubate/hapi/tests/dist_hapi_mnist_dynamic.py
@@ -0,0 +1,100 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+
+import numpy as np
+import contextlib
+
+from paddle import fluid
+
+from paddle.incubate.hapi.model import Model, Input, set_device
+from paddle.incubate.hapi.loss import CrossEntropy
+from paddle.incubate.hapi.vision.models import LeNet
+from paddle.incubate.hapi.metrics import Accuracy
+from paddle.incubate.hapi.callbacks import ProgBarLogger
+from paddle.incubate.hapi.datasets import MNIST
+
+
+class MnistDataset(MNIST):
+    def __init__(self, mode, return_label=True):
+        super(MnistDataset, self).__init__(mode=mode)
+        self.return_label = return_label
+
+    def __getitem__(self, idx):
+        img = np.reshape(self.images[idx], [1, 28, 28])
+        if self.return_label:
+            return img, np.array(self.labels[idx]).astype('int64')
+        return img,
+
+    def __len__(self):
+        return len(self.images)
+
+
+def compute_accuracy(pred, gt):
+    pred = np.argmax(pred, -1)
+    gt = np.array(gt)
+
+    correct = pred[:, np.newaxis] == gt
+
+    return np.sum(correct) / correct.shape[0]
+
+
+@unittest.skipIf(not fluid.is_compiled_with_cuda(),
+                 'CPU testing is not supported')
+class TestDistTraning(unittest.TestCase):
+    def test_static_multiple_gpus(self):
+        device = set_device('gpu')
+
+        fluid.enable_dygraph(device)
+        im_shape = (-1, 1, 28, 28)
+        batch_size = 128
+
+        inputs = [Input(im_shape, 'float32', name='image')]
+        labels = [Input([None, 1], 'int64', name='label')]
+
+        train_dataset = MnistDataset(mode='train')
+        val_dataset = MnistDataset(mode='test')
+        test_dataset = MnistDataset(mode='test', return_label=False)
+
+        model = LeNet()
+        optim = fluid.optimizer.Momentum(
+            learning_rate=0.001, momentum=.9, parameter_list=model.parameters())
+        loss = CrossEntropy()
+        model.prepare(optim, loss, Accuracy(), inputs, labels, device=device)
+        cbk = ProgBarLogger(50)
+
+        model.fit(train_dataset,
+                  val_dataset,
+                  epochs=2,
+                  batch_size=batch_size,
+                  callbacks=cbk)
+
+        eval_result = model.evaluate(val_dataset, batch_size=batch_size)
+
+        output = model.predict(
+            test_dataset, batch_size=batch_size, stack_outputs=True)
+
+        np.testing.assert_equal(output[0].shape[0], len(test_dataset))
+
+        acc = compute_accuracy(output[0], val_dataset.labels)
+
+        np.testing.assert_allclose(acc, eval_result['acc'])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/incubate/hapi/tests/dist_hapi_mnist_static.py b/python/paddle/incubate/hapi/tests/dist_hapi_mnist_static.py
new file mode 100644
index 0000000000000000000000000000000000000000..31ba9104b7106c16a232084ba6d99316d0b65475
--- /dev/null
+++ b/python/paddle/incubate/hapi/tests/dist_hapi_mnist_static.py
@@ -0,0 +1,99 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+
+import numpy as np
+import contextlib
+
+from paddle import fluid
+
+from paddle.incubate.hapi.model import Model, Input, set_device
+from paddle.incubate.hapi.loss import CrossEntropy
+from paddle.incubate.hapi.vision.models import LeNet
+from paddle.incubate.hapi.metrics import Accuracy
+from paddle.incubate.hapi.callbacks import ProgBarLogger
+from paddle.incubate.hapi.datasets import MNIST
+
+
+class MnistDataset(MNIST):
+    def __init__(self, mode, return_label=True):
+        super(MnistDataset, self).__init__(mode=mode)
+        self.return_label = return_label
+
+    def __getitem__(self, idx):
+        img = np.reshape(self.images[idx], [1, 28, 28])
+        if self.return_label:
+            return img, np.array(self.labels[idx]).astype('int64')
+        return img,
+
+    def __len__(self):
+        return len(self.images)
+
+
+def compute_accuracy(pred, gt):
+    pred = np.argmax(pred, -1)
+    gt = np.array(gt)
+
+    correct = pred[:, np.newaxis] == gt
+
+    return np.sum(correct) / correct.shape[0]
+
+
+@unittest.skipIf(not fluid.is_compiled_with_cuda(),
+                 'CPU testing is not supported')
+class TestDistTraning(unittest.TestCase):
+    def test_static_multiple_gpus(self):
+        device = set_device('gpu')
+
+        im_shape = (-1, 1, 28, 28)
+        batch_size = 128
+
+        inputs = [Input(im_shape, 'float32', name='image')]
+        labels = [Input([None, 1], 'int64', name='label')]
+
+        train_dataset = MnistDataset(mode='train')
+        val_dataset = MnistDataset(mode='test')
+        test_dataset = MnistDataset(mode='test', return_label=False)
+
+        model = LeNet()
+        optim = fluid.optimizer.Momentum(
+            learning_rate=0.001, momentum=.9, parameter_list=model.parameters())
+        loss = CrossEntropy()
+        model.prepare(optim, loss, Accuracy(), inputs, labels, device=device)
+        cbk = ProgBarLogger(50)
+
+        model.fit(train_dataset,
+                  val_dataset,
+                  epochs=2,
+                  batch_size=batch_size,
+                  callbacks=cbk)
+
+        eval_result = model.evaluate(val_dataset, batch_size=batch_size)
+
+        output = model.predict(
+            test_dataset, batch_size=batch_size, stack_outputs=True)
+
+        np.testing.assert_equal(output[0].shape[0], len(test_dataset))
+
+        acc = compute_accuracy(output[0], val_dataset.labels)
+
+        np.testing.assert_allclose(acc, eval_result['acc'])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/incubate/hapi/tests/test_callbacks.py b/python/paddle/incubate/hapi/tests/test_callbacks.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8630038cd87f4fa1cd864d7b0eeffa6e4b2b8c2
--- /dev/null
+++ b/python/paddle/incubate/hapi/tests/test_callbacks.py
@@ -0,0 +1,106 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import time
+import random
+import tempfile
+import shutil
+
+from paddle.incubate.hapi.model import Input
+from paddle.incubate.hapi.vision.models import LeNet
+from paddle.incubate.hapi.callbacks import config_callbacks
+
+
+class TestCallbacks(unittest.TestCase):
+    def setUp(self):
+        self.save_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.save_dir)
+
+    def run_callback(self):
+        epochs = 2
+        steps = 50
+        freq = 2
+        eval_steps = 20
+
+        lenet = LeNet()
+        inputs = [Input([None, 1, 28, 28], 'float32', name='image')]
+        lenet.prepare(inputs=inputs)
+
+        cbks = config_callbacks(
+            model=lenet,
+            batch_size=128,
+            epochs=epochs,
+            steps=steps,
+            log_freq=freq,
+            verbose=self.verbose,
+            metrics=['loss', 'acc'],
+            save_dir=self.save_dir)
+        cbks.on_begin('train')
+
+        logs = {'loss': 50.341673, 'acc': 0.00256}
+        for epoch in range(epochs):
+            cbks.on_epoch_begin(epoch)
+            for step in range(steps):
+                cbks.on_batch_begin('train', step, logs)
+                logs['loss'] -= random.random() * 0.1
+                logs['acc'] += random.random() * 0.1
+                time.sleep(0.005)
+                cbks.on_batch_end('train', step, logs)
+            cbks.on_epoch_end(epoch, logs)
+
+            eval_logs = {'eval_loss': 20.341673, 'eval_acc': 0.256}
+            params = {
+                'steps': eval_steps,
+                'metrics': ['eval_loss', 'eval_acc'],
+            }
+            cbks.on_begin('eval', params)
+            for step in range(eval_steps):
+                cbks.on_batch_begin('eval', step, eval_logs)
+                eval_logs['eval_loss'] -= random.random() * 0.1
+                eval_logs['eval_acc'] += random.random() * 0.1
+                eval_logs['batch_size'] = 2
+                time.sleep(0.005)
+                cbks.on_batch_end('eval', step, eval_logs)
+            cbks.on_end('eval', eval_logs)
+
+            test_logs = {}
+            params = {'steps': eval_steps}
+            cbks.on_begin('test', params)
+            for step in range(eval_steps):
+                cbks.on_batch_begin('test', step, test_logs)
+                test_logs['batch_size'] = 2
+                time.sleep(0.005)
+                cbks.on_batch_end('test', step, test_logs)
+            cbks.on_end('test', test_logs)
+
+        cbks.on_end('train')
+
+    def test_callback_verbose_0(self):
+        self.verbose = 0
+        self.run_callback()
+
+    def test_callback_verbose_1(self):
+        self.verbose = 1
+        self.run_callback()
+
+    def test_callback_verbose_2(self):
+        self.verbose = 2
+        self.run_callback()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/incubate/hapi/tests/test_datasets.py b/python/paddle/incubate/hapi/tests/test_datasets.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f544e5ad84d5aa2041e8fdb6c1ac77cc34d8164
--- /dev/null
+++ b/python/paddle/incubate/hapi/tests/test_datasets.py
@@ -0,0 +1,159 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import os
+import numpy as np
+import tempfile
+import shutil
+import cv2
+
+from paddle.incubate.hapi.datasets import *
+from paddle.incubate.hapi.datasets.utils import _check_exists_and_download
+
+
+class TestFolderDatasets(unittest.TestCase):
+    def setUp(self):
+        self.data_dir = tempfile.mkdtemp()
+        self.empty_dir = tempfile.mkdtemp()
+        for i in range(2):
+            sub_dir = os.path.join(self.data_dir, 'class_' + str(i))
+            if not os.path.exists(sub_dir):
+                os.makedirs(sub_dir)
+            for j in range(2):
+                fake_img = (np.random.random((32, 32, 3)) * 255).astype('uint8')
+                cv2.imwrite(os.path.join(sub_dir, str(j) + '.jpg'), fake_img)
+
+    def tearDown(self):
+        shutil.rmtree(self.data_dir)
+
+    def test_dataset(self):
+        dataset_folder = DatasetFolder(self.data_dir)
+
+        for _ in dataset_folder:
+            pass
+
+        assert len(dataset_folder) == 4
+        assert len(dataset_folder.classes) == 2
+
+        dataset_folder = DatasetFolder(self.data_dir)
+        for _ in dataset_folder:
+            pass
+
+    def test_folder(self):
+        loader = ImageFolder(self.data_dir)
+
+        for _ in loader:
+            pass
+
+        loader = ImageFolder(self.data_dir)
+        for _ in loader:
+            pass
+
+        assert len(loader) == 4
+
+    def test_transform(self):
+        def fake_transform(img):
+            return img
+
+        transfrom = fake_transform
+        dataset_folder = DatasetFolder(self.data_dir, transform=transfrom)
+
+        for _ in dataset_folder:
+            pass
+
+        loader = ImageFolder(self.data_dir, transform=transfrom)
+        for _ in loader:
+            pass
+
+    def test_errors(self):
+        with self.assertRaises(RuntimeError):
+            ImageFolder(self.empty_dir)
+        with self.assertRaises(RuntimeError):
+            DatasetFolder(self.empty_dir)
+
+        with self.assertRaises(ValueError):
+            _check_exists_and_download('temp_paddle', None, None, None, False)
+
+
+class TestMNISTTest(unittest.TestCase):
+    def test_main(self):
+        mnist = MNIST(mode='test')
+        self.assertTrue(len(mnist) == 10000)
+
+        for i in range(len(mnist)):
+            image, label = mnist[i]
+            self.assertTrue(image.shape[0] == 1)
+            self.assertTrue(image.shape[1] == 28)
+            self.assertTrue(image.shape[2] == 28)
+            self.assertTrue(label.shape[0] == 1)
+            self.assertTrue(0 <= int(label) <= 9)
+
+
+class TestMNISTTrain(unittest.TestCase):
+    def test_main(self):
+        mnist = MNIST(mode='train', chw_format=False)
+        self.assertTrue(len(mnist) == 60000)
+
+        for i in range(len(mnist)):
+            image, label = mnist[i]
+            self.assertTrue(image.shape[0] == 784)
+            self.assertTrue(label.shape[0] == 1)
+            self.assertTrue(0 <= int(label) <= 9)
+
+
+class TestFlowersTrain(unittest.TestCase):
+    def test_main(self):
+        flowers = Flowers(mode='train')
+        self.assertTrue(len(flowers) == 6149)
+
+        # traversal whole dataset may cost a
+        # long time, randomly check 1 sample
+        idx = np.random.randint(0, 6149)
+        image, label = flowers[idx]
+        self.assertTrue(len(image.shape) == 3)
+        self.assertTrue(image.shape[2] == 3)
+        self.assertTrue(label.shape[0] == 1)
+
+
+class TestFlowersValid(unittest.TestCase):
+    def test_main(self):
+        flowers = Flowers(mode='valid')
+        self.assertTrue(len(flowers) == 1020)
+
+        # traversal whole dataset may cost a
+        # long time, randomly check 1 sample
+        idx = np.random.randint(0, 1020)
+        image, label = flowers[idx]
+        self.assertTrue(len(image.shape) == 3)
+        self.assertTrue(image.shape[2] == 3)
+        self.assertTrue(label.shape[0] == 1)
+
+
+class TestFlowersTest(unittest.TestCase):
+    def test_main(self):
+        flowers = Flowers(mode='test')
+        self.assertTrue(len(flowers) == 1020)
+
+        # traversal whole dataset may cost a
+        # long time, randomly check 1 sample
+        idx = np.random.randint(0, 1020)
+        image, label = flowers[idx]
+        self.assertTrue(len(image.shape) == 3)
+        self.assertTrue(image.shape[2] == 3)
+        self.assertTrue(label.shape[0] == 1)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/incubate/hapi/tests/test_dist_hapi_model.py b/python/paddle/incubate/hapi/tests/test_dist_hapi_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..e75e08e3749e6ce629e88c486e4f87d9109dc709
--- /dev/null
+++ b/python/paddle/incubate/hapi/tests/test_dist_hapi_model.py
@@ -0,0 +1,130 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import os
+import time
+import copy
+import subprocess
+import paddle.fluid as fluid
+
+from paddle.distributed.utils import find_free_ports, watch_local_trainers, get_cluster, TrainerProc
+
+
+def get_cluster_from_args(selected_gpus):
+    cluster_node_ips = '127.0.0.1'
+    node_ip = '127.0.0.1'
+
+    node_ips = [x.strip() for x in cluster_node_ips.split(',')]
+
+    node_ips.index(node_ip)
+
+    free_ports = None
+
+    free_ports = find_free_ports(len(selected_gpus))
+    if free_ports is not None:
+        free_ports = list(free_ports)
+    return get_cluster(node_ips, node_ip, free_ports, selected_gpus)
+
+
+def get_gpus(selected_gpus):
+    selected_gpus = [x.strip() for x in selected_gpus.split(',')]
+    return selected_gpus
+
+
+def start_local_trainers(cluster,
+                         pod,
+                         training_script,
+                         training_script_args,
+                         log_dir=None):
+    current_env = copy.copy(os.environ.copy())
+    #paddle broadcast ncclUniqueId use socket, and
+    #proxy maybe make trainers unreachable, so delete them.
+    #if we set them to "", grpc will log error message "bad uri"
+    #so just delete them.
+    current_env.pop("http_proxy", None)
+    current_env.pop("https_proxy", None)
+
+    procs = []
+    for t in pod.trainers:
+        proc_env = {
+            "FLAGS_selected_gpus": "%s" % ",".join([str(g) for g in t.gpus]),
+            "PADDLE_TRAINER_ID": "%d" % t.rank,
+            "PADDLE_CURRENT_ENDPOINT": "%s" % t.endpoint,
+            "PADDLE_TRAINERS_NUM": "%d" % cluster.trainers_nranks(),
+            "PADDLE_TRAINER_ENDPOINTS": ",".join(cluster.trainers_endpoints())
+        }
+
+        current_env.update(proc_env)
+
+        print("trainer proc env:{}".format(current_env))
+
+        if os.getenv('WITH_COVERAGE', 'OFF') == 'ON':
+            cmd = "python -m coverage run --branch -p " + training_script
+        else:
+            cmd = "python -u " + training_script
+
+        print("start trainer proc:{} env:{}".format(cmd, proc_env))
+
+        fn = None
+
+        proc = subprocess.Popen(cmd.split(" "), env=current_env)
+
+        tp = TrainerProc()
+        tp.proc = proc
+        tp.rank = t.rank
+        tp.log_fn = fn
+        tp.cmd = cmd
+
+        procs.append(tp)
+
+    return procs
+
+
+class TestMultipleGpus(unittest.TestCase):
+    def run_mnist_2gpu(self, target_file_name):
+        if fluid.core.get_cuda_device_count() == 0:
+            return
+
+        selected_gpus = get_gpus('0,1')
+        cluster = None
+        pod = None
+
+        cluster, pod = get_cluster_from_args(selected_gpus)
+
+        procs = start_local_trainers(
+            cluster,
+            pod,
+            training_script=target_file_name,
+            training_script_args=[])
+
+        while True:
+            alive = watch_local_trainers(procs, cluster.trainers_nranks())
+
+            if not alive:
+                print("Local procs complete, POD info:{}".format(pod))
+                break
+            time.sleep(3)
+
+    def test_hapi_multiple_gpus_static(self):
+        self.run_mnist_2gpu('dist_hapi_mnist_static.py')
+
+    def test_hapi_multiple_gpus_dynamic(self):
+        self.run_mnist_2gpu('dist_hapi_mnist_dynamic.py')
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/incubate/hapi/tests/test_download.py b/python/paddle/incubate/hapi/tests/test_download.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b43b9a58dc4eff633d03ad1cc5ed4274f0f3c76
--- /dev/null
+++ b/python/paddle/incubate/hapi/tests/test_download.py
@@ -0,0 +1,50 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from paddle.incubate.hapi.download import get_weights_path_from_url
+
+
+class TestDownload(unittest.TestCase):
+    def download(self, url, md5sum):
+        get_weights_path_from_url(url, md5sum)
+
+    def test_download_model(self):
+        url = 'https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams'
+        md5sum = '8ff74f291f72533f2a7956a4efff9d88'
+        self.download(url, md5sum)
+
+    def test_exist_download(self):
+        url = 'https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams'
+        md5sum = '8ff74f291f72533f2a7956a4efff9d88'
+        self.download(url, md5sum)
+
+    def test_download_without_md5sum(self):
+        url = 'https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams'
+        self.download(url, None)
+
+    def test_download_errors(self):
+        with self.assertRaises(RuntimeError):
+            url = 'https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0t.pdparams'
+            md5sum = '8ff74f291f72533f2a7956a4eftttttt'
+            self.download(url, md5sum)
+
+        with self.assertRaises(RuntimeError):
+            url = 'https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0t.pdparams'
+            self.download(url, None)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/incubate/hapi/tests/test_logger.py b/python/paddle/incubate/hapi/tests/test_logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..f25d0ee4f7e2f0db1031f1f2884fb6df338003cc
--- /dev/null
+++ b/python/paddle/incubate/hapi/tests/test_logger.py
@@ -0,0 +1,49 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+import os
+import numpy as np
+import shutil
+import tempfile
+
+from paddle.incubate.hapi.logger import setup_logger
+
+
+class TestSetupLogger(unittest.TestCase):
+    def setUp(self):
+        self.save_dir = tempfile.mkdtemp()
+        self.save_file = os.path.join(self.save_dir, 'logger.txt')
+
+    def tearDown(self):
+        shutil.rmtree(self.save_dir)
+
+    def logger(self, output=None):
+        setup_logger(output=output)
+
+    def test_logger_no_output(self):
+        self.logger()
+
+    def test_logger_dir(self):
+        self.logger(self.save_dir)
+
+    def test_logger_file(self):
+        self.logger(self.save_file)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/incubate/hapi/tests/test_loss.py b/python/paddle/incubate/hapi/tests/test_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..f729b38b81f333c6d871fc2e21c1cea988d78437
--- /dev/null
+++ b/python/paddle/incubate/hapi/tests/test_loss.py
@@ -0,0 +1,111 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+import os
+import six
+import numpy as np
+import shutil
+import copy
+
+import paddle
+from paddle import fluid
+
+from paddle.incubate.hapi.model import Model, Input
+from paddle.incubate.hapi.loss import CrossEntropy, SoftmaxWithCrossEntropy
+
+
+def stable_softmax(x):
+    """Compute the softmax of vector x in a numerically stable way."""
+    # clip to shiftx, otherwise, when calc loss with
+    # log(exp(shiftx)), may get log(0)=INF
+    shiftx = (x - np.max(x)).clip(-64.)
+    exps = np.exp(shiftx)
+    return exps / np.sum(exps)
+
+
+def randomize_probability(batch_size, class_num, dtype='float32'):
+    prob = np.random.uniform(
+        0.1, 1.0, size=(batch_size, class_num)).astype(dtype)
+    prob_sum = prob.sum(axis=1)
+    for i in six.moves.xrange(len(prob)):
+        prob[i] /= prob_sum[i]
+    return prob
+
+
+def numpy_ce(x, label):
+    return np.asmatrix(
+        [[-np.log(x[i][label[i][0]])] for i in range(x.shape[0])],
+        dtype="float32").mean()
+
+
+class TestLoss(unittest.TestCase):
+    def test_cross_entropy(self):
+        class_num = 100
+        batch_size = 128
+        inputs = [randomize_probability(128, class_num) for _ in range(2)]
+
+        labels = [
+            np.random.randint(
+                0, class_num, (batch_size, 1), dtype="int64") for _ in range(2)
+        ]
+
+        gt_out = [numpy_ce(inputs[i], labels[i]) for i in range(2)]
+
+        fluid.enable_dygraph()
+        cross_entropy = CrossEntropy()
+        out = cross_entropy(
+            [fluid.dygraph.to_variable(x) for x in inputs],
+            [fluid.dygraph.to_variable(label) for label in labels])
+        out = [o.numpy() for o in out]
+
+        for o, g in zip(out, gt_out):
+            np.testing.assert_allclose(o, g, atol=1e-5)
+
+    def test_soft_cross_entronpy(self):
+        class_num = 100
+        batch_size = 128
+
+        inputs = [randomize_probability(128, class_num) for _ in range(2)]
+
+        labels = [
+            np.random.randint(
+                0, class_num, (batch_size, 1), dtype="int64") for _ in range(2)
+        ]
+
+        fluid.enable_dygraph()
+        softmax_cross_entropy = SoftmaxWithCrossEntropy()
+
+        softmax_cross_entropy(
+            [fluid.dygraph.to_variable(x) for x in inputs],
+            [fluid.dygraph.to_variable(label) for label in labels])
+
+        softmax_cross_entropy = SoftmaxWithCrossEntropy(average=False)
+
+        inputs = [randomize_probability(128, class_num)]
+
+        labels = [
+            np.random.randint(
+                0, class_num, (batch_size, 1), dtype="int64")
+        ]
+
+        softmax_cross_entropy([fluid.dygraph.to_variable(x) for x in inputs],
+                              fluid.dygraph.to_variable(labels[0]))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/incubate/hapi/tests/test_metrics.py b/python/paddle/incubate/hapi/tests/test_metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d25a275d5f1c539ce959c5231a7af771b229836
--- /dev/null
+++ b/python/paddle/incubate/hapi/tests/test_metrics.py
@@ -0,0 +1,132 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+from __future__ import print_function
+
+import os
+import unittest
+import numpy as np
+
+import paddle.fluid as fluid
+from paddle.fluid.dygraph.base import to_variable
+
+from paddle.incubate.hapi.metrics import *
+from paddle.incubate.hapi.utils import to_list
+
+
+def accuracy(pred, label, topk=(1, )):
+    maxk = max(topk)
+    pred = np.argsort(pred)[:, ::-1][:, :maxk]
+    correct = (pred == np.repeat(label, maxk, 1))
+
+    batch_size = label.shape[0]
+    res = []
+    for k in topk:
+        correct_k = correct[:, :k].sum()
+        res.append(correct_k / batch_size)
+    return res
+
+
+def convert_to_one_hot(y, C):
+    oh = np.random.random((y.shape[0], C)).astype('float32') * .5
+    for i in range(y.shape[0]):
+        oh[i, int(y[i])] = 1.
+    return oh
+
+
+class TestAccuracyDynamic(unittest.TestCase):
+    def setUp(self):
+        self.topk = (1, )
+        self.class_num = 5
+        self.sample_num = 1000
+        self.name = None
+
+    def random_pred_label(self):
+        label = np.random.randint(0, self.class_num,
+                                  (self.sample_num, 1)).astype('int64')
+        pred = np.random.randint(0, self.class_num,
+                                 (self.sample_num, 1)).astype('int32')
+        pred_one_hot = convert_to_one_hot(pred, self.class_num)
+        pred_one_hot = pred_one_hot.astype('float32')
+
+        return label, pred_one_hot
+
+    def test_main(self):
+        with fluid.dygraph.guard(fluid.CPUPlace()):
+            acc = Accuracy(topk=self.topk, name=self.name)
+            for _ in range(10):
+                label, pred = self.random_pred_label()
+                label_var = to_variable(label)
+                pred_var = to_variable(pred)
+                state = to_list(acc.add_metric_op(pred_var, label_var))
+                acc.update(* [s.numpy() for s in state])
+                res_m = acc.accumulate()
+                res_f = accuracy(pred, label, self.topk)
+                assert np.all(np.isclose(np.array(res_m, dtype='float64'), np.array(res_f, dtype='float64'), rtol=1e-3)), \
+                        "Accuracy precision error: {} != {}".format(res_m, res_f)
+                acc.reset()
+                assert np.sum(acc.total) == 0
+                assert np.sum(acc.count) == 0
+
+
+class TestAccuracyDynamicMultiTopk(TestAccuracyDynamic):
+    def setUp(self):
+        self.topk = (1, 5)
+        self.class_num = 10
+        self.sample_num = 1000
+        self.name = "accuracy"
+
+
+class TestAccuracyStatic(TestAccuracyDynamic):
+    def test_main(self):
+        main_prog = fluid.Program()
+        startup_prog = fluid.Program()
+        with fluid.program_guard(main_prog, startup_prog):
+            pred = fluid.data(
+                name='pred', shape=[None, self.class_num], dtype='float32')
+            label = fluid.data(name='label', shape=[None, 1], dtype='int64')
+            acc = Accuracy(topk=self.topk, name=self.name)
+            state = acc.add_metric_op(pred, label)
+
+        exe = fluid.Executor(fluid.CPUPlace())
+        compiled_main_prog = fluid.CompiledProgram(main_prog)
+
+        for _ in range(10):
+            label, pred = self.random_pred_label()
+            state_ret = exe.run(compiled_main_prog,
+                                feed={'pred': pred,
+                                      'label': label},
+                                fetch_list=[s.name for s in to_list(state)],
+                                return_numpy=True)
+            acc.update(*state_ret)
+            res_m = acc.accumulate()
+            res_f = accuracy(pred, label, self.topk)
+            assert np.all(np.isclose(np.array(res_m, dtype='float64'), np.array(res_f, dtype='float64'), rtol=1e-3)), \
+                    "Accuracy precision error: {} != {}".format(res_m, res_f)
+            acc.reset()
+            assert np.sum(acc.total) == 0
+            assert np.sum(acc.count) == 0
+
+
+class TestAccuracyStaticMultiTopk(TestAccuracyStatic):
+    def setUp(self):
+        self.topk = (1, 5)
+        self.class_num = 10
+        self.sample_num = 1000
+        self.name = "accuracy"
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/incubate/hapi/tests/test_model.py b/python/paddle/incubate/hapi/tests/test_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..e49ec5651ff160fce0c80e8804b55b18baadd3c3
--- /dev/null
+++ b/python/paddle/incubate/hapi/tests/test_model.py
@@ -0,0 +1,483 @@
+# copyright (c) 2020 paddlepaddle authors. all rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+
+import os
+import numpy as np
+import shutil
+import tempfile
+
+from paddle import fluid
+from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
+from paddle.fluid.dygraph.container import Sequential
+from paddle.fluid.dygraph.base import to_variable
+
+from paddle.incubate.hapi.model import Model, Input, set_device
+from paddle.incubate.hapi.loss import CrossEntropy
+from paddle.incubate.hapi.metrics import Accuracy
+from paddle.incubate.hapi.datasets import MNIST
+from paddle.incubate.hapi.vision.models import LeNet
+from paddle.incubate.hapi.distributed import DistributedBatchSampler, prepare_distributed_context
+
+
+class LeNetDygraph(fluid.dygraph.Layer):
+    def __init__(self, num_classes=10, classifier_activation='softmax'):
+        super(LeNetDygraph, self).__init__()
+        self.num_classes = num_classes
+        self.features = Sequential(
+            Conv2D(
+                1, 6, 3, stride=1, padding=1),
+            Pool2D(2, 'max', 2),
+            Conv2D(
+                6, 16, 5, stride=1, padding=0),
+            Pool2D(2, 'max', 2))
+
+        if num_classes > 0:
+            self.fc = Sequential(
+                Linear(400, 120),
+                Linear(120, 84),
+                Linear(
+                    84, 10, act=classifier_activation))
+
+    def forward(self, inputs):
+        x = self.features(inputs)
+
+        if self.num_classes > 0:
+            x = fluid.layers.flatten(x, 1)
+            x = self.fc(x)
+        return x
+
+
+class MnistDataset(MNIST):
+    def __init__(self, mode, return_label=True, sample_num=None):
+        super(MnistDataset, self).__init__(mode=mode)
+        self.return_label = return_label
+        if sample_num:
+            self.images = self.images[:sample_num]
+            self.labels = self.labels[:sample_num]
+
+    def __getitem__(self, idx):
+        img, label = self.images[idx], self.labels[idx]
+        img = np.reshape(img, [1, 28, 28])
+        if self.return_label:
+            return img, np.array(self.labels[idx]).astype('int64')
+        return img,
+
+    def __len__(self):
+        return len(self.images)
+
+
+def compute_acc(pred, label):
+    pred = np.argmax(pred, -1)
+    label = np.array(label)
+    correct = pred[:, np.newaxis] == label
+    return np.sum(correct) / correct.shape[0]
+
+
+def dynamic_train(model, dataloader):
+    optim = fluid.optimizer.Adam(
+        learning_rate=0.001, parameter_list=model.parameters())
+    model.train()
+    for inputs, labels in dataloader:
+        outputs = model(inputs)
+        loss = fluid.layers.cross_entropy(outputs, labels)
+        avg_loss = fluid.layers.reduce_sum(loss)
+        avg_loss.backward()
+        optim.minimize(avg_loss)
+        model.clear_gradients()
+
+
+def dynamic_evaluate(model, dataloader):
+    with fluid.dygraph.no_grad():
+        model.eval()
+        cnt = 0
+        for inputs, labels in dataloader:
+            outputs = model(inputs)
+
+            cnt += (np.argmax(outputs.numpy(), -1)[:, np.newaxis] ==
+                    labels.numpy()).astype('int').sum()
+
+    return cnt / len(dataloader.dataset)
+
+
+@unittest.skipIf(not fluid.is_compiled_with_cuda(),
+                 'CPU testing is not supported')
+class TestModel(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        if not fluid.is_compiled_with_cuda():
+            self.skipTest('module not tested when ONLY_CPU compling')
+        cls.device = set_device('gpu')
+        fluid.enable_dygraph(cls.device)
+
+        sp_num = 1280
+        cls.train_dataset = MnistDataset(mode='train', sample_num=sp_num)
+        cls.val_dataset = MnistDataset(mode='test', sample_num=sp_num)
+        cls.test_dataset = MnistDataset(
+            mode='test', return_label=False, sample_num=sp_num)
+
+        cls.train_loader = fluid.io.DataLoader(
+            cls.train_dataset, places=cls.device, batch_size=64)
+        cls.val_loader = fluid.io.DataLoader(
+            cls.val_dataset, places=cls.device, batch_size=64)
+        cls.test_loader = fluid.io.DataLoader(
+            cls.test_dataset, places=cls.device, batch_size=64)
+
+        seed = 333
+        fluid.default_startup_program().random_seed = seed
+        fluid.default_main_program().random_seed = seed
+
+        dy_lenet = LeNetDygraph()
+        cls.init_param = dy_lenet.state_dict()
+        dynamic_train(dy_lenet, cls.train_loader)
+
+        cls.acc1 = dynamic_evaluate(dy_lenet, cls.val_loader)
+
+        cls.inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
+        cls.labels = [Input([None, 1], 'int64', name='label')]
+
+        cls.save_dir = tempfile.mkdtemp()
+        cls.weight_path = os.path.join(cls.save_dir, 'lenet')
+        fluid.dygraph.save_dygraph(dy_lenet.state_dict(), cls.weight_path)
+
+        fluid.disable_dygraph()
+
+    @classmethod
+    def tearDownClass(cls):
+        shutil.rmtree(cls.save_dir)
+
+    def test_fit_dygraph(self):
+        self.fit(True)
+
+    def test_fit_static(self):
+        self.fit(False)
+
+    def test_evaluate_dygraph(self):
+        self.evaluate(True)
+
+    def test_evaluate_static(self):
+        self.evaluate(False)
+
+    def test_predict_dygraph(self):
+        self.predict(True)
+
+    def test_predict_static(self):
+        self.predict(False)
+
+    def test_prepare_context(self):
+        prepare_distributed_context()
+
+    def fit(self, dynamic):
+        fluid.enable_dygraph(self.device) if dynamic else None
+        seed = 333
+        fluid.default_startup_program().random_seed = seed
+        fluid.default_main_program().random_seed = seed
+
+        model = LeNet()
+        optim_new = fluid.optimizer.Adam(
+            learning_rate=0.001, parameter_list=model.parameters())
+        model.prepare(
+            optim_new,
+            loss_function=CrossEntropy(average=False),
+            metrics=Accuracy(),
+            inputs=self.inputs,
+            labels=self.labels)
+        model.fit(self.train_dataset, batch_size=64, shuffle=False)
+
+        result = model.evaluate(self.val_dataset, batch_size=64)
+        np.testing.assert_allclose(result['acc'], self.acc1)
+
+        train_sampler = DistributedBatchSampler(
+            self.train_dataset, batch_size=64, shuffle=False)
+        val_sampler = DistributedBatchSampler(
+            self.val_dataset, batch_size=64, shuffle=False)
+
+        train_loader = fluid.io.DataLoader(
+            self.train_dataset,
+            batch_sampler=train_sampler,
+            places=self.device,
+            return_list=True)
+
+        val_loader = fluid.io.DataLoader(
+            self.val_dataset,
+            batch_sampler=val_sampler,
+            places=self.device,
+            return_list=True)
+
+        model.fit(train_loader, val_loader)
+        fluid.disable_dygraph() if dynamic else None
+
+    def evaluate(self, dynamic):
+        fluid.enable_dygraph(self.device) if dynamic else None
+        model = LeNet()
+        model.prepare(
+            metrics=Accuracy(), inputs=self.inputs, labels=self.labels)
+        model.load(self.weight_path)
+        result = model.evaluate(self.val_dataset, batch_size=64)
+        np.testing.assert_allclose(result['acc'], self.acc1)
+
+        sampler = DistributedBatchSampler(
+            self.val_dataset, batch_size=64, shuffle=False)
+
+        val_loader = fluid.io.DataLoader(
+            self.val_dataset,
+            batch_sampler=sampler,
+            places=self.device,
+            return_list=True)
+
+        model.evaluate(val_loader)
+
+        fluid.disable_dygraph() if dynamic else None
+
+    def predict(self, dynamic):
+        fluid.enable_dygraph(self.device) if dynamic else None
+        model = LeNet()
+        model.prepare(inputs=self.inputs)
+        model.load(self.weight_path)
+        output = model.predict(
+            self.test_dataset, batch_size=64, stack_outputs=True)
+        np.testing.assert_equal(output[0].shape[0], len(self.test_dataset))
+
+        acc = compute_acc(output[0], self.val_dataset.labels)
+        np.testing.assert_allclose(acc, self.acc1)
+
+        sampler = DistributedBatchSampler(
+            self.test_dataset, batch_size=64, shuffle=False)
+
+        test_loader = fluid.io.DataLoader(
+            self.test_dataset,
+            batch_sampler=sampler,
+            places=self.device,
+            return_list=True)
+
+        model.evaluate(test_loader)
+
+        fluid.disable_dygraph() if dynamic else None
+
+
+class MyModel(Model):
+    def __init__(self):
+        super(MyModel, self).__init__()
+        self._fc = Linear(20, 10, act='softmax')
+
+    def forward(self, x):
+        y = self._fc(x)
+        return y
+
+
+class TestModelFunction(unittest.TestCase):
+    def set_seed(self, seed=1024):
+        fluid.default_startup_program().random_seed = seed
+        fluid.default_main_program().random_seed = seed
+
+    def test_train_batch(self, dynamic=True):
+        dim = 20
+        data = np.random.random(size=(4, dim)).astype(np.float32)
+        label = np.random.randint(0, 10, size=(4, 1)).astype(np.int64)
+
+        def get_expect():
+            fluid.enable_dygraph(fluid.CPUPlace())
+            self.set_seed()
+            m = MyModel()
+            optim = fluid.optimizer.SGD(learning_rate=0.001,
+                                        parameter_list=m.parameters())
+            m.train()
+            output = m(to_variable(data))
+            l = to_variable(label)
+            loss = fluid.layers.cross_entropy(output, l)
+            avg_loss = fluid.layers.reduce_sum(loss)
+            avg_loss.backward()
+            optim.minimize(avg_loss)
+            m.clear_gradients()
+            fluid.disable_dygraph()
+            return avg_loss.numpy()
+
+        ref = get_expect()
+        for dynamic in [True, False]:
+            device = set_device('cpu')
+            fluid.enable_dygraph(device) if dynamic else None
+            self.set_seed()
+            model = MyModel()
+
+            optim2 = fluid.optimizer.SGD(learning_rate=0.001,
+                                         parameter_list=model.parameters())
+
+            inputs = [Input([None, dim], 'float32', name='x')]
+            labels = [Input([None, 1], 'int64', name='label')]
+            model.prepare(
+                optim2,
+                loss_function=CrossEntropy(average=False),
+                inputs=inputs,
+                labels=labels,
+                device=device)
+            loss, = model.train_batch([data], [label])
+
+            np.testing.assert_allclose(loss.flatten(), ref.flatten())
+            fluid.disable_dygraph() if dynamic else None
+
+    def test_test_batch(self, dynamic=True):
+        dim = 20
+        data = np.random.random(size=(4, dim)).astype(np.float32)
+
+        def get_expect():
+            fluid.enable_dygraph(fluid.CPUPlace())
+            self.set_seed()
+            m = MyModel()
+            m.eval()
+            output = m(to_variable(data))
+            fluid.disable_dygraph()
+            return output.numpy()
+
+        ref = get_expect()
+        for dynamic in [True, False]:
+            device = set_device('cpu')
+            fluid.enable_dygraph(device) if dynamic else None
+            self.set_seed()
+            model = MyModel()
+            inputs = [Input([None, dim], 'float32', name='x')]
+            model.prepare(inputs=inputs, device=device)
+            out, = model.test_batch([data])
+
+            np.testing.assert_allclose(out, ref)
+            fluid.disable_dygraph() if dynamic else None
+
+    def test_save_load(self):
+        path = tempfile.mkdtemp()
+        for dynamic in [True, False]:
+            device = set_device('cpu')
+            fluid.enable_dygraph(device) if dynamic else None
+            model = MyModel()
+            inputs = [Input([None, 20], 'float32', name='x')]
+            labels = [Input([None, 1], 'int64', name='label')]
+            optim = fluid.optimizer.SGD(learning_rate=0.001,
+                                        parameter_list=model.parameters())
+            model.prepare(
+                inputs=inputs,
+                optimizer=optim,
+                loss_function=CrossEntropy(average=False),
+                labels=labels)
+            model.save(path + '/test')
+            model.load(path + '/test')
+            shutil.rmtree(path)
+            fluid.disable_dygraph() if dynamic else None
+
+    def test_dynamic_save_static_load(self):
+        path = tempfile.mkdtemp()
+        # for dynamic in [True, False]:
+        device = set_device('cpu')
+        fluid.enable_dygraph(device)  #if dynamic else None
+        model = MyModel()
+        inputs = [Input([None, 20], 'float32', name='x')]
+        labels = [Input([None, 1], 'int64', name='label')]
+        optim = fluid.optimizer.SGD(learning_rate=0.001,
+                                    parameter_list=model.parameters())
+        model.prepare(
+            inputs=inputs,
+            optimizer=optim,
+            loss_function=CrossEntropy(average=False),
+            labels=labels)
+        model.save(path + '/test')
+        fluid.disable_dygraph()
+        model = MyModel()
+        inputs = [Input([None, 20], 'float32', name='x')]
+        labels = [Input([None, 1], 'int64', name='label')]
+        optim = fluid.optimizer.SGD(learning_rate=0.001,
+                                    parameter_list=model.parameters())
+        model.prepare(
+            inputs=inputs,
+            optimizer=optim,
+            loss_function=CrossEntropy(average=False),
+            labels=labels)
+        model.load(path + '/test')
+        shutil.rmtree(path)
+
+    def test_static_save_dynamic_load(self):
+        path = tempfile.mkdtemp()
+
+        model = MyModel()
+        inputs = [Input([None, 20], 'float32', name='x')]
+        labels = [Input([None, 1], 'int64', name='label')]
+        optim = fluid.optimizer.SGD(learning_rate=0.001,
+                                    parameter_list=model.parameters())
+        model.prepare(
+            inputs=inputs,
+            optimizer=optim,
+            loss_function=CrossEntropy(average=False),
+            labels=labels)
+        model.save(path + '/test')
+
+        device = set_device('cpu')
+        fluid.enable_dygraph(device)  #if dynamic else None
+
+        model = MyModel()
+        inputs = [Input([None, 20], 'float32', name='x')]
+        labels = [Input([None, 1], 'int64', name='label')]
+        optim = fluid.optimizer.SGD(learning_rate=0.001,
+                                    parameter_list=model.parameters())
+        model.prepare(
+            inputs=inputs,
+            optimizer=optim,
+            loss_function=CrossEntropy(average=False),
+            labels=labels)
+        model.load(path + '/test')
+        shutil.rmtree(path)
+        fluid.disable_dygraph()
+
+    def test_parameters(self):
+        for dynamic in [True, False]:
+            device = set_device('cpu')
+            fluid.enable_dygraph(device) if dynamic else None
+            model = MyModel()
+            inputs = [Input([None, 20], 'float32', name='x')]
+            model.prepare(inputs=inputs)
+            params = model.parameters()
+            self.assertTrue(params[0].shape[0] == 20)
+            self.assertTrue(params[0].shape[1] == 10)
+            fluid.disable_dygraph() if dynamic else None
+
+    def test_export_deploy_model(self):
+        model = LeNet()
+        inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
+        model.prepare(inputs=inputs)
+        save_dir = tempfile.mkdtemp()
+        if not os.path.exists(save_dir):
+            os.makedirs(save_dir)
+
+        tensor_img = np.array(
+            np.random.random((1, 1, 28, 28)), dtype=np.float32)
+        ori_results = model.test_batch(tensor_img)
+
+        model.save_inference_model(save_dir)
+
+        place = fluid.CPUPlace() if not fluid.is_compiled_with_cuda(
+        ) else fluid.CUDAPlace(0)
+        exe = fluid.Executor(place)
+        [inference_program, feed_target_names, fetch_targets] = (
+            fluid.io.load_inference_model(
+                dirname=save_dir, executor=exe))
+
+        results = exe.run(inference_program,
+                          feed={feed_target_names[0]: tensor_img},
+                          fetch_list=fetch_targets)
+
+        np.testing.assert_allclose(results, ori_results)
+        shutil.rmtree(save_dir)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/incubate/hapi/tests/test_progressbar.py b/python/paddle/incubate/hapi/tests/test_progressbar.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff315ef505606aaf45b46a722de8f0386ae2d5ed
--- /dev/null
+++ b/python/paddle/incubate/hapi/tests/test_progressbar.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import unittest
+import random
+import time
+
+from paddle.incubate.hapi.progressbar import ProgressBar
+
+
+class TestProgressBar(unittest.TestCase):
+    def prog_bar(self, num, epoch, width, verbose=1):
+        for epoch in range(epoch):
+            progbar = ProgressBar(num, verbose=verbose)
+            values = [
+                ['loss', 50.341673],
+                ['acc', 0.00256],
+            ]
+            for step in range(1, num + 1):
+                values[0][1] -= random.random() * 0.1
+                values[1][1] += random.random() * 0.1
+                if step % 10 == 0:
+                    progbar.update(step, values)
+                time.sleep(0.002)
+            progbar.update(step, values)
+
+        progbar.update(1, [['loss', int(1)]])
+        progbar.update(1, [['loss', 'INF']])
+        progbar.update(1, [['loss', 1e-4]])
+        progbar.update(1, [['loss', np.array([1.])]])
+        progbar.update(1, [['loss', np.array([1e-4])]])
+        progbar.start()
+
+        progbar.update(0, values)
+        progbar._dynamic_display = False
+        progbar.update(1e4, values)
+
+        progbar._num = None
+        progbar.update(0, values)
+        progbar._num = 1
+        progbar.update(1 + 1e-4, values)
+
+    def test1(self):
+        self.prog_bar(50, 1, 30)
+
+    def test2(self):
+        self.prog_bar(50, 2, 30)
+
+    def test4(self):
+        self.prog_bar(50, 2, 30, verbose=2)
+
+    def test_errors(self):
+        with self.assertRaises(TypeError):
+            ProgressBar(-1)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/incubate/hapi/utils.py b/python/paddle/incubate/hapi/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..4182d81d9fc1a593252ceeed7ba8943373a239a9
--- /dev/null
+++ b/python/paddle/incubate/hapi/utils.py
@@ -0,0 +1,63 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import inspect
+import numpy as np
+
+from paddle import fluid
+from paddle.fluid.framework import Variable
+from paddle.fluid.executor import global_scope
+
+
+def to_list(value):
+    if value is None:
+        return value
+    if isinstance(value, (list, tuple)):
+        return list(value)
+    return [value]
+
+
+def to_numpy(var):
+    assert isinstance(var, (Variable, fluid.core.VarBase)), "not a variable"
+    if isinstance(var, fluid.core.VarBase):
+        return var.numpy()
+    t = global_scope().find_var(var.name).get_tensor()
+    return np.array(t)
+
+
+def flatten_list(l):
+    assert isinstance(l, list), "not a list"
+    outl = []
+    splits = []
+    for sl in l:
+        assert isinstance(sl, list), "sub content not a list"
+        splits.append(len(sl))
+        outl += sl
+    return outl, splits
+
+
+def restore_flatten_list(l, splits):
+    outl = []
+    for split in splits:
+        assert len(l) >= split, "list length invalid"
+        sl, l = l[:split], l[split:]
+        outl.append(sl)
+    return outl
+
+
+def extract_args(func):
+    if hasattr(inspect, 'getfullargspec'):
+        return inspect.getfullargspec(func)[0]
+    else:
+        return inspect.getargspec(func)[0]
diff --git a/python/paddle/incubate/hapi/vision/__init__.py b/python/paddle/incubate/hapi/vision/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac43effc883b24987f896265c0b7145f91025f82
--- /dev/null
+++ b/python/paddle/incubate/hapi/vision/__init__.py
@@ -0,0 +1,18 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import models
+from .models import *
+
+__all__ = models.__all__
diff --git a/python/paddle/incubate/hapi/vision/models/__init__.py b/python/paddle/incubate/hapi/vision/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed457798cba948553a46472273fc6fd0f6703768
--- /dev/null
+++ b/python/paddle/incubate/hapi/vision/models/__init__.py
@@ -0,0 +1,18 @@
+#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from . import lenet
+from .lenet import *
+
+__all__ = lenet.__all__
diff --git a/python/paddle/incubate/hapi/vision/models/lenet.py b/python/paddle/incubate/hapi/vision/models/lenet.py
new file mode 100644
index 0000000000000000000000000000000000000000..c49addcb1fb2482b5dc92330df3094f68f95ac2f
--- /dev/null
+++ b/python/paddle/incubate/hapi/vision/models/lenet.py
@@ -0,0 +1,65 @@
+#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import paddle.fluid as fluid
+from paddle.fluid.dygraph.nn import Conv2D, BatchNorm, Pool2D, Linear
+from paddle.fluid.dygraph.container import Sequential
+
+from ...model import Model
+
+__all__ = ['LeNet']
+
+
+class LeNet(Model):
+    """LeNet model from
+    `"LeCun Y, Bottou L, Bengio Y, et al. Gradient-based learning applied to document recognition[J]. Proceedings of the IEEE, 1998, 86(11): 2278-2324.`_
+
+    Args:
+        num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer 
+                            will not be defined. Default: 10.
+        classifier_activation (str): activation for the last fc layer. Default: 'softmax'.
+
+    Examples:
+        .. code-block:: python
+
+            from paddle.incubate.hapi.vision.models import LeNet
+
+            model = LeNet()
+    """
+
+    def __init__(self, num_classes=10, classifier_activation='softmax'):
+        super(LeNet, self).__init__()
+        self.num_classes = num_classes
+        self.features = Sequential(
+            Conv2D(
+                1, 6, 3, stride=1, padding=1),
+            Pool2D(2, 'max', 2),
+            Conv2D(
+                6, 16, 5, stride=1, padding=0),
+            Pool2D(2, 'max', 2))
+
+        if num_classes > 0:
+            self.fc = Sequential(
+                Linear(400, 120),
+                Linear(120, 84),
+                Linear(
+                    84, 10, act=classifier_activation))
+
+    def forward(self, inputs):
+        x = self.features(inputs)
+
+        if self.num_classes > 0:
+            x = fluid.layers.flatten(x, 1)
+            x = self.fc(x)
+        return x
diff --git a/python/setup.py.in b/python/setup.py.in
index 4fc2c352a53285b08ea5fe223f647ac74b6ede14..959842d8492cecdef018c42404539210b4543009 100644
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -178,6 +178,10 @@ packages=['paddle',
           'paddle.fluid.incubate.fleet.parameter_server.pslib',
           'paddle.fluid.incubate.fleet.collective',
           'paddle.fluid.incubate.fleet.utils',
+          'paddle.incubate.hapi',
+          'paddle.incubate.hapi.datasets',
+          'paddle.incubate.hapi.vision',
+          'paddle.incubate.hapi.vision.models',
           'paddle.io',
           'paddle.nn',
           'paddle.nn.functional',
diff --git a/tools/wlist.json b/tools/wlist.json
index cb6f9a6c9a41f0da4ba0e391614b9871a8367f75..594b40d650c33e0c4091b2b5bcee7ee6d6ebc6b1 100644
--- a/tools/wlist.json
+++ b/tools/wlist.json
@@ -102,7 +102,27 @@
         "load_persistables_for_inference",
         "cache",
         "buffered",
-        "xmap_readers"
+        "xmap_readers",
+        "Metric.reset",
+        "Metric.update",
+        "Metric.accumulate",
+        "Metric.name",
+        "Metric.add_metric_op",
+        "Callback.set_params",
+        "Callback.on_train_begin",
+        "Callback.on_train_end",
+        "Callback.on_eval_begin",
+        "Callback.on_eval_end",
+        "Callback.on_test_begin",
+        "Callback.on_test_end",
+        "Callback.on_epoch_begin",
+        "Callback.on_epoch_end",
+        "Callback.on_train_batch_begin",
+        "Callback.on_train_batch_end",
+        "Callback.on_eval_batch_begin",
+        "Callback.on_eval_batch_end",
+        "Callback.on_test_batch_begin",
+        "Callback.on_test_batch_end"
     ],
     "wlist_no_op_pass":[
         "gelu",