remove high level api (#23854)

6bd200db · zhangchunle · GitHub · 94fdb8eb · 6bd200db · 6bd200db
21 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -83,7 +83,6 @@ option(WITH_CONTRIB     "Compile the third-party contributation"        OFF)
 option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF)
 option(WITH_GRPC     "Use grpc as the default rpc framework"            ${WITH_DISTRIBUTE})
 option(WITH_INFERENCE_API_TEST   "Test fluid inference C++ high-level api interface"  OFF)
-option(WITH_HIGH_LEVEL_API_TEST   "Test fluid python high-level api interface"  OFF)
 option(PY_VERSION       "Compile PaddlePaddle with python3 support"     ${PY_VERSION})
 option(WITH_DGC   "Use DGC(Deep Gradient Compression) or not" ${WITH_DISTRIBUTE})
 option(SANITIZER_TYPE "Choose the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined" OFF)

--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -211,7 +211,6 @@ function cmake_base() {
        -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
        -DWITH_CONTRIB=${WITH_CONTRIB:-ON}
        -DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON}
-        -DWITH_HIGH_LEVEL_API_TEST=${WITH_HIGH_LEVEL_API_TEST:-OFF}
        -DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR}
        -DPY_VERSION=${PY_VERSION:-2.7}
        -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build}
@@ -244,7 +243,6 @@ EOF
        -DWITH_CONTRIB=${WITH_CONTRIB:-ON} \
        -DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON} \
        -DWITH_HIGH_LEVEL_API_TEST=${WITH_HIGH_LEVEL_API_TEST:-OFF} \
-        -DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} \
        -DPY_VERSION=${PY_VERSION:-2.7} \
        -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build} \
        -DWITH_GRPC=${grpc_flag} \

--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
@@ -47,7 +47,6 @@ from .dataset import *
 from .data import *
 from . import trainer_desc
-from . import inferencer
 from . import io
 from . import evaluator
@@ -92,7 +91,7 @@ from .dygraph.varbase_patch_methods import monkey_patch_varbase
 Tensor = LoDTensor
 __all__ = framework.__all__ + executor.__all__ + \
-    trainer_desc.__all__ + inferencer.__all__ + transpiler.__all__ + \
+    trainer_desc.__all__ + transpiler.__all__ + \
    parallel_executor.__all__ + lod_tensor.__all__ + \
    data_feed_desc.__all__ + compiler.__all__ + backward.__all__  + [
        'io',

--- a/python/paddle/fluid/contrib/inferencer.py
+++ b/python/paddle/fluid/contrib/inferencer.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-from ..wrapped_decorator import signature_safe_contextmanager
-from .. import core
-from .. import executor
-from .. import framework
-from .. import io
-from .. import parallel_executor
-from .. import unique_name
-from .trainer import check_and_get_place
-__all__ = ['Inferencer', ]
-class Inferencer(object):
-    """
-    Inferencer High Level API.
-    Args:
-        infer_func (Python func): Infer function that will return predict Variable
-        param_path (str): The path where the inference model is saved by fluid.io.save_params
-        place (Place): place to do the inference
-        parallel (bool): use parallel_executor to run the inference, it will use multi CPU/GPU.
-    Examples:
-        .. code-block:: python
-            def inference_program():
-                x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-                y_predict = fluid.layers.fc(input=x, size=1, act=None)
-                return y_predict
-            place = fluid.CPUPlace()
-            inferencer = fluid.Inferencer(
-                infer_func=inference_program, param_path="/tmp/model", place=place)
-    """
-    def __init__(self, infer_func, param_path, place=None, parallel=False):
-        self.param_path = param_path
-        self.scope = core.Scope()
-        self.parallel = parallel
-        self.place = check_and_get_place(place)
-        self.inference_program = framework.Program()
-        with framework.program_guard(self.inference_program):
-            with unique_name.guard():
-                self.predict_var = infer_func()
-        with self._prog_and_scope_guard():
-            # load params from param_path into scope
-            io.load_params(executor.Executor(self.place), param_path)
-        if parallel:
-            with self._prog_and_scope_guard():
-                self.exe = parallel_executor.ParallelExecutor(
-                    use_cuda=isinstance(self.place, core.CUDAPlace),
-                    loss_name=self.predict_var.name)
-        else:
-            self.exe = executor.Executor(self.place)
-        self.inference_program = self.inference_program.clone(for_test=True)
-    def infer(self, inputs, return_numpy=True):
-        """
-        Do Inference for Inputs
-        Args:
-            inputs (map): a map of {"input_name": input_var} that will be feed into the inference program
-            return_numpy (bool): transform return value into numpy or not
-        Returns:
-            Tensor or Numpy: the predict value of the inference model for the inputs
-        Examples:
-            .. code-block:: python
-                tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
-                results = inferencer.infer({'x': tensor_x})
-        """
-        if not isinstance(inputs, dict):
-            raise ValueError(
-                "inputs should be a map of {'input_name': input_var}")
-        with self._prog_and_scope_guard():
-            results = self.exe.run(feed=inputs,
-                                   fetch_list=[self.predict_var.name],
-                                   return_numpy=return_numpy)
-        return results
-    @signature_safe_contextmanager
-    def _prog_and_scope_guard(self):
-        with framework.program_guard(main_program=self.inference_program):
-            with executor.scope_guard(self.scope):
-                yield
--- a/python/paddle/fluid/contrib/trainer.py
+++ b/python/paddle/fluid/contrib/trainer.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-from ..wrapped_decorator import signature_safe_contextmanager
-import os
-import errno
-import shutil
-import six
-import time
-from .. import core
-from .. import data_feeder
-from .. import executor
-from .. import framework
-from .. import io
-# optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module
-from .. import optimizer as opt_module
-from .. import parallel_executor
-from ..transpiler import distribute_transpiler
-__all__ = [
-    'Trainer', 'BeginEpochEvent', 'EndEpochEvent', 'BeginStepEvent',
-    'EndStepEvent', 'CheckpointConfig'
-]
-class BeginEpochEvent(object):
-    """
-    The begin of a training epoch.
-    Args:
-        epoch_id(int): The current epoch ID.
-    """
-    def __init__(self, epoch_id):
-        self.epoch = epoch_id
-class EndEpochEvent(object):
-    """
-    The end of a training epoch.
-    Args:
-        epoch_id(int): The current epoch ID.
-    """
-    def __init__(self, epoch_id):
-        self.epoch = epoch_id
-class BeginStepEvent(object):
-    """
-    The begin of a training epoch.
-    Args:
-        epoch_id(int): The current epoch ID.
-        step_id(int): The current step ID.
-    """
-    def __init__(self, epoch_id, step_id):
-        self.epoch = epoch_id
-        self.step = step_id
-        self.fetch_metrics = True
-        """
-        If fetch_metrics is true, the metrics will be fetched at the
-        EndStepEvent. Default is True.
-        """
-class EndStepEvent(object):
-    """
-    The end of a training step.
-    Args:
-        epoch_id(int): The current epoch ID.
-        step_id(int): The current step ID.
-        metrics(list): A list of fetched tensor. The order of this list is same
-            as the :code:`train_func` returns.
-    """
-    def __init__(self, epoch_id, step_id, metrics):
-        self.epoch = epoch_id
-        self.step = step_id
-        self.metrics = metrics
-class CheckpointConfig(object):
-    """
-    Parameter object for :code:`save_checkpoint` and
-    :code:`fluid.Trainer`. Used to configuration how to save checkpoint.
-    Args:
-        checkpoint_dir(str): Directory path to save check point. Default is the
-            current directory.
-        max_num_checkpoints(int): The max number of local check points.
-        epoch_interval(int): Every number of epoch to save check point.
-        step_interval(int): Every number of step to save check point.
-    Examples:
-        >>> config = fluid.CheckpointConfig("./checkpoints")
-        >>> trainer = fluid.Trainer(train_func=train_program,
-        >>>                         place=place,
-        >>>                         optimizer_func=optimizer_func,
-        >>>                         checkpoint_config=config)
-        >>> trainer.train(...)
-    """
-    def __init__(self,
-                 checkpoint_dir=None,
-                 max_num_checkpoints=3,
-                 epoch_interval=1,
-                 step_interval=10):
-        assert epoch_interval >= 1
-        assert step_interval >= 1
-        self.checkpoint_dir = checkpoint_dir \
-            if checkpoint_dir is not None else os.getcwd()
-        self.max_num_checkpoints = max_num_checkpoints
-        self.epoch_interval = epoch_interval
-        self.step_interval = step_interval
-        self.epoch_id = 0
-        self.step_id = 0
-        self.load_serial = None
-        self.pserver_id = None
-        self.lookup_table_name = None
-def check_and_get_place(place):
-    """
-    Check the type of place or get the default place
-    Args:
-        place(None|core.CUDAPlace|core.CPUPlace): the place that trainer will be executed on.
-    Raises:
-        TypeError if the type mismatched.
-    Returns:
-        the original place if it is not None.
-        if fluid is compiled with CUDA, returns CUDAPlace(0) by default.
-        Otherwise returns CPUPlace by default.
-    """
-    if place is None:
-        if core.is_compiled_with_cuda():
-            return core.CUDAPlace(0)
-        else:
-            return core.CPUPlace()
-    else:
-        if not isinstance(place, core.CUDAPlace) and not isinstance(
-                place, core.CPUPlace):
-            raise TypeError("Place should be either CUDAPlace or CPUPlace")
-        return place
-class Trainer(object):
-    """
-    A trainer wraps MultiGPU/MultiNode training loops and can be used to train a
-    simple neural network easily.
-    This API takes a :code:`train_func`. A :code:`train_func` is a function that
-    return loss as it first return value. The reset value can be fetched by
-    EndStepEvent.metrics
-    This API also takes a :code:`optimizer_func` that will return an optimizer
-    instance.
-    For example, to train a MLP for MNIST dataset, the sample program is
-    >>> import paddle.fluid as fluid
-    >>>
-    >>> def mlp(image, layer_sizes=[200, 100], activation="relu", num_classes=10):
-    >>>     hidden = image
-    >>>     for layer_size in layer_sizes:
-    >>>         hidden = fluid.layers.fc(input=hidden, size=layer_size, act=activation)
-    >>>     return fluid.layers.fc(input=hidden, size=num_classes, act="softmax")
-    >>>
-    >>> def train_mnist_mlp():
-    >>>     img = fluid.layers.data(name='image', shape=[784])
-    >>>     label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-    >>>     prediction = mlp(img)
-    >>>     return fluid.layers.mean(fluid.layers.cross_entropy(prediction, label))
-    >>>
-    >>> def optimizer():
-    >>>     return fluid.optimizer.Adam()
-    >>>
-    >>> trainer = Trainer(train_func=train_mnist_mlp,
-    >>>                   optimizer_func=optimizer,
-    >>>                   place=fluid.CUDAPlace(0),
-    >>>                   parallel=True)
-    >>>
-    >>> def train_callback(event):
-    >>>     if isinstance(event, fluid.EndStepEvent):
-    >>>         print "Epoch ID", event.epoch, "Step ID",\
-    >>>             event.step, "AvgLoss", event.metrics[0]
-    >>>     elif isinstance(event, fluid.EndEpochEvent):
-    >>>         trainer.save_params("./model_{0}".format(event.epoch))
-    >>>
-    >>> trainer.train(num_epochs=100, event_handler=train_callback)
-    For more example, please see :ref:`api_guide_high_level_api`.
-    Args:
-        train_func(callable): A function which will return loss. The loss must be
-            a scalar tensor.
-        optimizer_func(callable): A function that returns an Optimizer object.
-        place(CUDAPlace|CPUPlace): The device place of this trainer. If
-            :code:`parallel=True,` all CUDA Places will be used if :code:`place`
-            is a :code:`CUDAPlace`.
-        parallel(bool): True if use multiple devices.
-        checkpoint_config(CheckpointConfig): Configuration about how to save
-            checkpoints.
-    """
-    def __init__(self,
-                 train_func,
-                 optimizer_func,
-                 param_path=None,
-                 place=None,
-                 parallel=False,
-                 checkpoint_config=None):
-        self.__stop = False
-        self.parallel = parallel
-        # config for checkpoint
-        # only chief worker will save variables
-        self.trainer_id = 0
-        self.checkpoint_cfg = checkpoint_config
-        if self.checkpoint_cfg:
-            assert isinstance(self.checkpoint_cfg, CheckpointConfig)
-            serial = _get_latest_checkpoint_serial(
-                self.checkpoint_cfg.checkpoint_dir)
-            self.checkpoint_cfg.load_serial = serial if serial >= 0 else None
-        self.scope = core.Scope()
-        # 1. we need to generate a framework.Program by calling
-        # program_func. Reference: fluid.program_guard in
-        # test_word2vec.py
-        self.startup_program = framework.Program()
-        self.train_program = framework.Program()
-        with framework.program_guard(self.train_program, self.startup_program):
-            program_func_outs = train_func()
-            self.train_func_outputs = program_func_outs if isinstance(
-                program_func_outs, list) else [program_func_outs]
-            self.test_program = self.train_program.clone(for_test=True)
-            # The first element of program_func_outs is loss.
-            loss = self.train_func_outputs[0]
-            optimizer = optimizer_func()
-            if not isinstance(optimizer, opt_module.Optimizer):
-                raise TypeError(
-                    "The optimizer should be an instance of Optimizer")
-            optimize_ops, params_grads = optimizer.minimize(loss)
-        self.place = check_and_get_place(place)
-        self._dist_transpile_if_necessary(optimize_ops, params_grads)
-        # 2. move the default_main_program to self.program and run the
-        # default_startup program on an empty core.Scope()
-        # Run startup program
-        with self._prog_and_scope_guard():
-            exe = executor.Executor(place)
-            exe.run(self.startup_program)
-        if self.checkpoint_cfg and self.checkpoint_cfg.load_serial is not None:
-            self._load_checkpoint()
-        if param_path and os.path.isdir(param_path):
-            with self._prog_and_scope_guard():
-                # load params from param_path into scope
-                io.load_persistables(
-                    executor=exe,
-                    dirname=param_path,
-                    main_program=self.startup_program)
-    def _transpile_nccl2_dist(self):
-        # PADDLE_TRAINER_IPS
-        if "PADDLE_TRAINER_IPS" not in os.environ:
-            self.nccl_id_var = None
-        else:
-            self.trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
-            port = os.getenv("PADDLE_PSERVER_PORT")
-            worker_ips = os.getenv("PADDLE_TRAINER_IPS")
-            worker_endpoints = []
-            for ip in worker_ips.split(","):
-                worker_endpoints.append(':'.join([ip, port]))
-            self.num_trainers = len(worker_endpoints)
-            current_endpoint = os.getenv("PADDLE_CURRENT_IP") + ":" + port
-            worker_endpoints.remove(current_endpoint)
-            # TODO(wuyi): use self.nccl_id_var, self.num_trainers and self.trainer_id
-            # in ParallelExecutor to start
-            # distributed training using NCCL2
-            self.nccl_id_var = self.startup_program.global_block().create_var(
-                name="NCCLID", persistable=True, type=core.VarDesc.VarType.RAW)
-            self.startup_program.global_block().append_op(
-                type="gen_nccl_id",
-                inputs={},
-                outputs={"NCCLID": self.nccl_id_var},
-                attrs={
-                    "endpoint": current_endpoint,
-                    "endpoint_list": worker_endpoints,
-                    "trainer_id": self.trainer_id
-                })
-    def _dist_transpile_if_necessary(self, optimize_ops, params_grads):
-        self._transpile_nccl2_dist()
-        if self.nccl_id_var != None:
-            return
-        if "PADDLE_TRAINING_ROLE" not in os.environ:
-            return
-        # the port of all pservers, needed by both trainer and pserver
-        port = os.getenv("PADDLE_PSERVER_PORT", "6174")
-        # comma separated ips of all pservers, needed by trainer and
-        # pserver
-        pserver_ips = os.getenv("PADDLE_PSERVER_IPS", "")
-        eplist = []
-        for ip in pserver_ips.split(","):
-            eplist.append(':'.join([ip, port]))
-        pserver_endpoints = ",".join(eplist)
-        # total number of workers/trainers in the job, needed by
-        # trainer and pserver
-        trainers = int(os.getenv("PADDLE_TRAINERS"))
-        # the IP of the local machine, needed by pserver only
-        current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port
-        # the unique trainer id, starting from 0, needed by trainer
-        # only
-        self.trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
-        # the role, should be either PSERVER or TRAINER
-        training_role = os.getenv("PADDLE_TRAINING_ROLE")
-        with self._prog_and_scope_guard():
-            t = distribute_transpiler.DistributeTranspiler()
-            t.transpile(
-                self.trainer_id, pservers=pserver_endpoints, trainers=trainers)
-            if training_role == "PSERVER":
-                if self.checkpoint_cfg:
-                    pserver_id = eplist.index(current_endpoint)
-                    self.checkpoint_cfg.pserver_id = pserver_id
-                    if t.has_distributed_lookup_table:
-                        self.checkpoint_cfg.lookup_table_name = t.table_name
-                self.train_program = t.get_pserver_program(current_endpoint)
-                self.startup_program = t.get_startup_program(current_endpoint,
-                                                             self.train_program)
-            elif training_role == "TRAINER":
-                self.train_program = t.get_trainer_program()
-            else:
-                raise ValueError(
-                    'TRAINING_ROLE environment variable must be either TRAINER or PSERVER'
-                )
-    def stop(self):
-        """
-        stop training
-        """
-        self.__stop = True
-    def train(self, num_epochs, event_handler, reader=None, feed_order=None):
-        """
-        Start the train loop to train the model.
-        Args:
-            num_epochs(int): The number of epoch. An epoch will process all data in reader
-            event_handler(callable): The event handler. A function with type (ev:Event)->void
-            reader(callable): A reader creator object. See also
-                :ref:`api_guide_python_reader` .
-            feed_order(list): Feeding order of reader. None will following the defining
-                order in program
-        Returns:
-            None
-        """
-        training_role = os.getenv("PADDLE_TRAINING_ROLE", "")
-        if training_role == "PSERVER":
-            with self._prog_and_scope_guard():
-                exe = executor.Executor(self.place)
-                exe.run()
-                return
-        if self.parallel:
-            self._train_by_parallel_executor(num_epochs, event_handler, reader,
-                                             feed_order)
-        else:
-            self._train_by_executor(num_epochs, event_handler, reader,
-                                    feed_order)
-    def test(self, reader, feed_order):
-        """
-        Test the model on given test data
-        Args:
-            reader(callable): The reader that yields test data.
-            feed_order(list): Feeding order of reader. None will following the
-                defining order in program
-        """
-        return self._test_by_executor(reader, feed_order,
-                                      self.train_func_outputs)
-    def save_params(self, param_path):
-        """
-        Save all parameters into :code:`param_path`.
-        Args:
-            param_path(str): The path to save parameters.
-        Returns:
-            None
-        """
-        with self._prog_and_scope_guard():
-            exe = executor.Executor(self.place)
-            io.save_persistables(exe, dirname=param_path)
-    def save_inference_model(self, param_path, feeded_var_names,
-                             target_var_indexes):
-        """
-        Save model for cpp inference into :code:`param_path`.
-        Args:
-            param_path(str): The path to save parameters.
-            feeded_var_names(list(str)): The name of the vars that you
-                need to feed in before run program.
-            target_var_indexes(list(int)): the index of target var that
-                you need to return in trainer.train_func.
-        Returns:
-            None
-        """
-        with self._prog_and_scope_guard():
-            exe = executor.Executor(self.place)
-            target_vars = [
-                self.train_func_outputs[index] for index in target_var_indexes
-            ]
-            io.save_inference_model(param_path, feeded_var_names, target_vars,
-                                    exe)
-    @signature_safe_contextmanager
-    def _prog_and_scope_guard(self):
-        with framework.program_guard(
-                main_program=self.train_program,
-                startup_program=self.startup_program):
-            with executor.scope_guard(self.scope):
-                yield
-    def _train_by_executor(self, num_epochs, event_handler, reader, feed_order):
-        """
-        Train by Executor and single device.
-        Args:
-            num_epochs:
-            event_handler:
-            reader:
-            feed_order:
-        Returns:
-        """
-        with self._prog_and_scope_guard():
-            feed_var_list = build_feed_var_list(self.train_program, feed_order)
-            feeder = data_feeder.DataFeeder(
-                feed_list=feed_var_list, place=self.place)
-            exe = executor.Executor(self.place)
-            reader = feeder.decorate_reader(reader, multi_devices=False)
-            self._train_by_any_executor(event_handler, exe, num_epochs, reader)
-    def _train_by_any_executor(self, event_handler, exe, num_epochs, reader):
-        if self.checkpoint_cfg:
-            epochs = [
-                epoch_id for epoch_id in range(num_epochs)
-                if epoch_id >= self.checkpoint_cfg.epoch_id
-            ]
-        else:
-            epochs = [epoch_id for epoch_id in range(num_epochs)]
-        for epoch_id in epochs:
-            event_handler(BeginEpochEvent(epoch_id))
-            for step_id, data in enumerate(reader()):
-                if self.__stop:
-                    if self.checkpoint_cfg:
-                        self._clean_checkpoint()
-                    return
-                if self.checkpoint_cfg and self.checkpoint_cfg.load_serial \
-                        and self.checkpoint_cfg.step_id >= step_id and self.checkpoint_cfg.epoch_id == epoch_id:
-                    continue
-                begin_event = BeginStepEvent(epoch_id, step_id)
-                event_handler(begin_event)
-                if begin_event.fetch_metrics:
-                    metrics = exe.run(feed=data,
-                                      fetch_list=[
-                                          var.name
-                                          for var in self.train_func_outputs
-                                      ])
-                else:
-                    metrics = exe.run(feed=data, fetch_list=[])
-                if self.checkpoint_cfg:
-                    self._save_checkpoint(epoch_id, step_id)
-                event_handler(EndStepEvent(epoch_id, step_id, metrics))
-            event_handler(EndEpochEvent(epoch_id))
-        if self.checkpoint_cfg:
-            self._clean_checkpoint()
-    def _test_by_executor(self, reader, feed_order, fetch_list):
-        with executor.scope_guard(self.scope):
-            feed_var_list = build_feed_var_list(self.test_program, feed_order)
-            feeder = data_feeder.DataFeeder(
-                feed_list=feed_var_list, place=self.place)
-            exe = executor.Executor(self.place)
-            accumulated = len(fetch_list) * [0]
-            count = 0
-            for data in reader():
-                outs = exe.run(program=self.test_program,
-                               feed=feeder.feed(data),
-                               fetch_list=fetch_list)
-                accumulated = [x[0] + x[1][0] for x in zip(accumulated, outs)]
-                count += 1
-            return [x / count for x in accumulated]
-    def _train_by_parallel_executor(self, num_epochs, event_handler, reader,
-                                    feed_order):
-        with self._prog_and_scope_guard():
-            pe = self._get_or_create_parallel_executor()
-            feed_var_list = build_feed_var_list(self.train_program, feed_order)
-            feeder = data_feeder.DataFeeder(
-                feed_list=feed_var_list, place=self.place)
-            reader = feeder.decorate_reader(reader, multi_devices=True)
-            self._train_by_any_executor(event_handler, pe, num_epochs, reader)
-    def _get_parallel_executor(self):
-        return getattr(self, 'parallel_executor', None)
-    def _get_or_create_parallel_executor(self):
-        if self._get_parallel_executor() is None:
-            self.parallel_executor = parallel_executor.ParallelExecutor(
-                use_cuda=isinstance(self.place, core.CUDAPlace),
-                loss_name=self.train_func_outputs[0].name)
-        return self._get_parallel_executor()
-    def _clean_checkpoint(self):
-        assert self.checkpoint_cfg
-        clean_checkpoint(checkpoint_dir=self.checkpoint_cfg.checkpoint_dir)
-    def _get_checkpoint_load_args(self):
-        """
-        epoch_id and step_id are runtime arguments, they are not variables, will load them independently.
-        """
-        return ["epoch_id", "step_id"]
-    def _get_checkpoint_save_args(self, epoch_id, step_id):
-        """
-        epoch_id and step_id are runtime arguments, they are not variables, will save them independently.
-        """
-        trainer_args = {}
-        trainer_args["epoch_id"] = epoch_id
-        trainer_args["step_id"] = step_id
-        return trainer_args
-    def _save_checkpoint(self, epoch_id, step_id):
-        assert self.checkpoint_cfg
-        if epoch_id % self.checkpoint_cfg.epoch_interval == 0 \
-                and step_id % self.checkpoint_cfg.step_interval == 0:
-            exe = executor.Executor(self.place)
-            save_checkpoint(
-                executor=exe,
-                checkpoint_dir=self.checkpoint_cfg.checkpoint_dir,
-                trainer_id=self.trainer_id,
-                trainer_args=self._get_checkpoint_save_args(epoch_id, step_id),
-                main_program=self.train_program,
-                max_num_checkpoints=self.checkpoint_cfg.max_num_checkpoints)
-    def _load_checkpoint(self):
-        with self._prog_and_scope_guard():
-            exe = executor.Executor(self.place)
-            load_checkpoint(
-                executor=exe,
-                checkpoint_dir=self.checkpoint_cfg.checkpoint_dir,
-                main_program=self.startup_program)
-            if not self.checkpoint_cfg.pserver_id:
-                load_trainer_args = self._get_checkpoint_load_args()
-                trainer_args = load_checkpoint(
-                    executor=exe,
-                    checkpoint_dir=self.checkpoint_cfg.checkpoint_dir,
-                    main_program=self.startup_program,
-                    role_id=self.trainer_id,
-                    is_trainer=True,
-                    load_trainer_args=load_trainer_args)
-                if len(trainer_args) != 2:
-                    raise ValueError(
-                        "the return trainer_args length do not equal _get_checkpoint_load_args"
-                    )
-                self.checkpoint_cfg.epoch_id = int(trainer_args[0])
-                self.checkpoint_cfg.step_id = int(trainer_args[1])
-            else:
-                if self.checkpoint_cfg.lookup_table_name:
-                    load_checkpoint(
-                        executor=exe,
-                        checkpoint_dir=self.checkpoint_cfg.checkpoint_dir,
-                        main_program=self.startup_program,
-                        role_id=self.checkpoint_cfg.pserver_id,
-                        is_trainer=False,
-                        load_trainer_args=None,
-                        load_lookup_table=self.checkpoint_cfg.lookup_table_name)
-def build_feed_var_list(program, feed_order):
-    if not isinstance(program, framework.Program):
-        raise TypeError("The 'program' should be an object of Program")
-    if isinstance(feed_order, list):
-        feed_var_list = [
-            program.global_block().var(var_name) for var_name in feed_order
-        ]
-    else:
-        if not isinstance(feed_order, dict):
-            raise TypeError(
-                "The 'feed_order' should be either None, list or dict.")
-        if not sorted(feed_order.values()) == list(range(len(feed_order))):
-            raise ValueError(
-                "The values of 'feed_order' should be a permutation of [0, len(feed_order))"
-            )
-        sorted_pair_list = sorted(
-            six.iteritems(feed_order), key=lambda item: item[1])
-        feed_var_list = [
-            program.global_block().var(pair[0]) for pair in sorted_pair_list
-        ]
-    return feed_var_list
-# move Checkpoint APIs from io.py to trainer.py, make all of them are private.
-SUCCESS_MARK_FILENAME = "_SUCCESS"
-CHECKPOINT_PREFIX = "checkpoint"
-MODEL_DIR = "__model__"
-LOOKUP_TABLE_DIR = "__lookup_table__"
-TRAINER_PREFIX = "trainer"
-CHECKPOINT_SEPARATOR = "_"
-def save_checkpoint(executor,
-                    checkpoint_dir,
-                    trainer_id,
-                    main_program,
-                    trainer_args=None,
-                    max_num_checkpoints=3,
-                    lookup_table=None,
-                    pserver_endpoints=None):
-    """
-    This function filters out all checkpoint variables from the give
-    main_program and then saves these variables to the `checkpoint_dir`
-    directory.
-    In the training process, we generally save a checkpoint in each
-    iteration. So there might be a lot of checkpoints in the
-    `checkpoint_dir`. To avoid them taking too much disk space, the
-    `max_num_checkpoints` are introduced to limit the total number of
-    checkpoints. If the number of existing checkpoints is greater than
-    the `max_num_checkpoints`, oldest ones will be scroll deleted.
-    A variable is a checkpoint variable and will be saved if it meets
-    all following conditions:
-        1. It's persistable.
-        2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW.
-        3. It's name contains no "@GRAD" nor ".trainer_" nor ".block".
-    Args:
-        executor(Executor): The executor to run for save checkpoint.
-        checkpoint_dir(str): The folder where to save checkpoints.
-        trainer_id(int): current trainer id, if id is equal to 0, the trainer
-            is chief.
-        trainer_args(dict|None): Current training arguments. Such as 'epoch_id'
-            and 'step_id'.
-            Defaut: None
-        main_program(Program): The program whose checkpoint variables will
-            be saved.
-        max_num_checkpoints(int): The max number of total number of existing
-            checkpoints.
-            Default: 3
-        lookup_table(string|None): the lookup table name, when use distribute
-            lookup table, we can get lookup table name by DistributeTranspiler.
-            table_name
-        pserver_endpoints(list|None): the parameter server ip:port list.
-            when use distribute lookup table, we can get pserver_endpoints by
-            distribute arguments.
-    Returns:
-        None
-    Raises:
-        ValueError: If `checkpoint_dir` is None.
-        AssertionError: If `trainer_args` is not a dict.
-    Examples:
-        .. code-block:: python
-            exe = fluid.Executor(fluid.CPUPlace())
-            path = "./checkpoints"
-            prog = fluid.default_main_program()
-            trainer_args = {"epoch_id": 200,
-                            "step_id": 20} # just an example
-            table_name = "share_w"
-            ps_endpoints = ["127.0.0.1:6000","127.0.0.1:6001"]
-            save_checkpoint(executor=exe,
-                                     checkpoint_dir=path,
-                                     trainer_id=0,
-                                     trainer_args=trainer_args,
-                                     main_program=prog,
-                                     max_num_checkpoints=3,
-                                     lookup_table=table_name,
-                                     pserver_endpoints = ps_endpoints)
-    """
-    if checkpoint_dir is None:
-        raise ValueError("'checkpoint_dir' should not be None")
-    if main_program is None:
-        raise ValueError('main_program should not be None.')
-    if trainer_args:
-        assert isinstance(trainer_args, dict)
-    is_chief = trainer_id == 0
-    _make_chekcpoint_dirs(checkpoint_dir)
-    serial = _get_latest_checkpoint_serial(checkpoint_dir) + 1
-    cur_dir = _get_serial_dir(checkpoint_dir, serial)
-    _save_trainer_args(cur_dir, trainer_id, trainer_args)
-    if is_chief:
-        _save_persist_vars_without_grad(executor, cur_dir, main_program)
-    if is_chief and lookup_table and pserver_endpoints:
-        _save_pserver_vars_by_notify(executor, cur_dir, lookup_table,
-                                     pserver_endpoints)
-    _scroll_delete(checkpoint_dir, max_num_checkpoints)
-def load_checkpoint(executor,
-                    checkpoint_dir,
-                    main_program,
-                    role_id=0,
-                    is_trainer=True,
-                    load_trainer_args=None,
-                    load_lookup_table=None):
-    """
-    This function filters out all checkpoint variables from the give
-    main_program and then try to load these variables from the
-    `checkpoint_dir` directory.
-    In the training process, we generally save a checkpoint in each
-    iteration. So there are more than one checkpoint in the
-    `checkpoint_dir` (each checkpoint has its own sub folder), use
-    `serial` to specify which serial of checkpoint you would like to
-    load.
-    A variable is a checkpoint variable and will be loaded if it meets
-    all following conditions:
-        1. It's persistable.
-        2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW.
-        3. It's name contains no "@GRAD" nor ".trainer_" nor ".block".
-    Args:
-        executor(Executor): The executor to run for loading checkpoint.
-        checkpoint_dir(str): The folder where all checkpoints are.
-        serial(int): The serial of checkpoint you would like to load.
-        main_program(Program): The program whose checkpoint variables will
-                               be loaded.
-        role_id(int):  the trainer id or the parameter server id.
-        is_trainer(bool): trainer is True and parameter server is False.
-        load_trainer_args(list|None): list about load trainer args.
-        load_lookup_table(str|None): the lookup table name
-    Returns:
-        None
-    Raises:
-        ValueError: If `checkpoint_dir` is None.
-        ValueError: If `main_program` is None.
-    Examples:
-        .. code-block:: python
-            exe = fluid.Executor(fluid.CPUPlace())
-            path = "./checkpoints"
-            prog = fluid.default_main_program()
-            load_checkpoint(executor=exe, checkpoint_dir=path,
-                    serial=9, main_program=prog)
-            # In this example, `load_checkpoint` function
-            # will first filters out all checkpoint variables in the default
-            # main program, and then try to load these variables form the
-            # folder "./checkpoints/checkpoint_9/__model__".
-    """
-    if checkpoint_dir is None:
-        raise ValueError("'checkpoint_dir' should not be None")
-    serial = _get_latest_checkpoint_serial(checkpoint_dir)
-    # there are nothing  need to be loaded
-    if serial is None or serial < 0:
-        return
-    if main_program is None:
-        raise ValueError('main_program should not be None.')
-    if is_trainer and load_trainer_args is None:
-        cur_dir = _get_serial_dir(checkpoint_dir, serial)
-        _load_persist_vars_without_grad(executor, cur_dir, main_program, True)
-        return
-    if is_trainer and load_trainer_args:
-        return _load_trainer_args(checkpoint_dir, serial, role_id,
-                                  load_trainer_args)
-    if not is_trainer and load_lookup_table:
-        _load_lookup_table_vars(executor, checkpoint_dir, main_program, role_id,
-                                load_lookup_table)
-def clean_checkpoint(checkpoint_dir, delete_dir=False):
-    """
-    clean the checkpoint dir, when the train exits normally,
-    the trainer will call clean_checkpoint to delete checkpoint directory saved before.
-    delete_dir only works when the directory is empty, otherwise, OSError is raised.
-    : param checkpoint_dir
-    : param delete_dir
-    """
-    if checkpoint_dir is None:
-        raise ValueError("'checkpoint_dir' should not be None")
-    _scroll_delete(checkpoint_dir, max_num_checkpoints=0)
-    if delete_dir and not os.listdir(checkpoint_dir):
-        os.rmdir(checkpoint_dir)
-def _load_persist_vars_without_grad(executor,
-                                    dirname,
-                                    program,
-                                    has_model_dir=False):
-    """
-    This function filters out all checkpoint variables from the give
-    program and then tries to load these variables from the given directory.
-    A variable is a checkpoint variable if it meets all following
-    conditions:
-        1. It's persistable.
-        2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW.
-        3. It's name contains no "@GRAD" nor ".trainer_" nor ".block".
-    Args:
-        executor(Executor): The executor to run for loading variables.
-        dirname(str): The directory path.
-        program(Program): The program whose checkpoint variables will
-                          be loaded.
-        has_model_dir(bool): if True, the function loads variables
-                             from a sub directory named '__model__'.
-                             Default: False
-    Returns:
-        None
-    Examples:
-        .. code-block:: python
-            exe = fluid.Executor(fluid.CPUPlace())
-            param_path = "./my_paddle_model"
-            prog = fluid.default_main_program()
-            _load_persist_vars_without_grad(executor=exe,
-                    dirname=param_path, program=prog, has_model_dir=True)
-            # In this example, `_load_persist_vars_without_grad` function
-            # will first filters out all checkpoint variables in the default
-            # main program, and then tries to load these variables form the
-            # folder "./my_paddle_model/__model__".
-    """
-    if has_model_dir:
-        dirname = _get_model_dir(dirname)
-    io.load_vars(
-        executor,
-        dirname=dirname,
-        main_program=program,
-        predicate=_is_checkpoint_var,
-        filename=None)
-def _load_lookup_table_vars(executor, dirname, program, pserver_id, table_name):
-    """
-    The parameter server will load lookup table's local file in
-    selectedrows variable.
-    Args:
-        executor(Executor): The executor to run for loading persistable variables
-        dirname(str): The directory path
-        main_program(Program): Find the variable named table_name in main_program
-        pserver_id(int): the serial number in pserver_endpoints list
-        table_name(str): lookup table name
-    Returns:
-        None
-    Examples:
-        .. code-block:: python
-            exe = fluid.Executor(fluid.CPUPlace())
-            dirname = "./checkpoints/checkpoint_9/"
-            prog = fluid.default_main_program()
-            pserver_id = 1
-            table_name = "share_w"
-            _load_lookup_table_vars(executor=exe,
-                    dirname=dirname, program=prog, pserver_id=pserver_id,
-                    table_name=table_name)
-    """
-    for var in program.list_vars():
-        if var.name == table_name:
-            lookup_table_var = var
-            break
-    assert lookup_table_var is not None
-    lookup_table_dir = os.path.join(dirname, LOOKUP_TABLE_DIR)
-    table_file = table_name + CHECKPOINT_SEPARATOR + str(pserver_id)
-    load_prog = framework.Program()
-    load_block = load_prog.global_block()
-    load_block.append_op(
-        type='load',
-        inputs={},
-        outputs={'Out': [lookup_table_var]},
-        attrs={'file_path': os.path.join(lookup_table_dir, table_file)})
-    executor.run(load_prog)
-def _save_persist_vars_without_grad(executor, dirname, program):
-    """
-    This function filters out all checkpoint variables from the give
-    program and then save these variables to a sub-folder '__model__' of
-    the given directory.
-    A variable is a checkpoint variable if it meets all following
-    conditions:
-        1. It's persistable.
-        2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW.
-        3. It's name contains no "@GRAD" nor ".trainer_" nor ".block".
-    Args:
-        executor(Executor): The executor to run for saving variables.
-        dirname(str): The directory path.
-        program(Program): The program whose checkpoint variables will
-                          be saved.
-    Returns:
-        None
-    Examples:
-        .. code-block:: python
-            exe = fluid.Executor(fluid.CPUPlace())
-            param_path = "./my_paddle_model"
-            prog = fluid.default_main_program()
-            _save_persist_vars_without_grad(executor=exe,
-                    dirname=param_path, program=prog)
-            # In this example, `_save_persist_vars_without_grad` function
-            # will first filters out all checkpoint variables in the default
-            # main program, and then saves these variables to the folder
-            # "./my_paddle_model/__model__".
-    """
-    cur_dir = _get_model_dir(dirname)
-    io.save_vars(
-        executor,
-        dirname=cur_dir,
-        main_program=program,
-        vars=None,
-        predicate=_is_checkpoint_var,
-        filename=None)
-    _write_success(cur_dir)
-def _save_pserver_vars_by_notify(executor, dirname, lookup_table,
-                                 ps_endpoint_list):
-    """
-    This function will send checkpoint notify message from Trainer 0
-    to all the pservers.
-    The checkpoint notify message contains lookup table name,
-    the absolute path on pserver to save lookup_table.
-    Args:
-        executor(Executor): The executor to run for send checkpoint notify.
-        dirname(str): The folder where to save checkpoints.
-        lookup_table(string): the lookup table name, when use distribute
-            lookup table, we can get lookup table name by DistributeTranspiler.
-            table_name
-        ps_endpoint_list(list): the parameter server ip:port list.
-            when use distribute lookup table, we can get ps_endpoint_list by
-            distribute arguments.
-    Return:
-        None
-    Examples:
-        .. code-block:: python
-            exe = fluid.Executor(fluid.CPUPlace())
-            param_path = "./my_paddle_model"
-            prog = fluid.default_main_program()
-            table_name = "share_w"
-            ps_endpoints = ["127.0.0.1:6000","127.0.0.1:6001"]
-            _save_pserver_vars_by_notify(executor=exe,
-                    dirname=param_path, lookup_table=table_name,
-                    ps_endpoint_list=ps_endpoints)
-    """
-    cur_dir = _get_lookuptable_dir(dirname)
-    checkpoint_notify_program = framework.Program()
-    checkpoint_notify_block = checkpoint_notify_program.global_block()
-    attrs = {}
-    attrs['epmap'] = ps_endpoint_list
-    attrs['dir'] = cur_dir
-    attrs['lookup_table'] = lookup_table
-    checkpoint_notify_block.append_op(
-        type='checkpoint_notify', inputs={}, outputs={}, attrs=attrs)
-    executor.run(checkpoint_notify_program)
-def _save_trainer_args(dirname, trainer_id, trainer_args):
-    assert isinstance(trainer_args, dict)
-    cur_dir = _get_trainer_dir(dirname, trainer_id)
-    for name, value in six.iteritems(trainer_args):
-        args_file = os.path.join(cur_dir, name)
-        with open(args_file, 'w') as f:
-            f.write(str(value))
-    _write_success(cur_dir)
-def _load_trainer_args(checkpoint_dir, serial, trainer_id, trainer_args):
-    """
-    trainer will load some args from it's independent directory,
-    such as epoch_id and step_id.
-    Args:
-        checkpoint_dir(str): The folder where all checkpoints are.
-        serial(int): The serial of checkpoint you would like to load.
-        trainer_id(int): current trainer id.
-        trainer_args(list): list about load trainer args
-    Return:
-        None
-    Examples:
-        .. code-block:: python
-            param_path = "./checkpoint/"
-            serial = 7
-            trainer_id = 2
-            trainer_args = ["epoch_id", "step_id"]
-            _load_trainer_args(checkpoint_dir=param_path, serial=serial,
-            trainer_id=trainer_id, trainer_args=trainer_args)
-    """
-    assert isinstance(trainer_args, list)
-    cur_dir = _get_serial_dir(checkpoint_dir, serial)
-    cur_dir = _get_trainer_dir(cur_dir, trainer_id)
-    ret_values = []
-    for arg in trainer_args:
-        cur_file = os.path.join(cur_dir, arg)
-        with open(cur_file, 'r') as f:
-            contents = f.read()
-            ret_values.append(contents.strip())
-    return ret_values
-def _is_checkpoint_var(var):
-    """
-    the checkpoint will not save or load all the variables.
-    var type is FEED_MINIBATCH/FETCH_LIST/RAW or var name ends with @GRAD are discarded.
-    : param var(Variable)
-    """
-    if var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \
-            var.desc.type() == core.VarDesc.VarType.FETCH_LIST or \
-            var.desc.type() == core.VarDesc.VarType.RAW:
-        return False
-    # @GRAD are named for gradient variables, checkpoint will not save it.
-    if "@GRAD" in var.name:
-        return False
-    # .trainer_ are named for distribute train variables, checkpoint will not save it.
-    if ".trainer_" in var.name:
-        return False
-    # .block is named for distribute train variables, checkpoint will not save it.
-    if ".block" in var.name:
-        return False
-    return var.persistable
-def _make_chekcpoint_dirs(dirs):
-    """
-    _make_chekcpoint_dirs will makedir local directory directly, when the directory is exist, it will ignore it.
-    """
-    assert dirs is not None
-    if os.path.isfile(dirs):
-        raise OSError(errno.ENOTDIR, "dirs path should be a Directory.", dirs)
-    if not os.path.isdir(dirs):
-        try:
-            os.makedirs(dirs)
-        except OSError as err:
-            if err.errno != errno.EEXIST:
-                raise err
-def _get_dir_serial(dirname):
-    _, serial = dirname.split(CHECKPOINT_SEPARATOR)
-    try:
-        serial_num = int(serial)
-    except ValueError:
-        serial_num = -1
-    return serial_num
-def _get_serial_dir(dirname, serial):
-    serial_folder = CHECKPOINT_PREFIX + CHECKPOINT_SEPARATOR + str(serial)
-    serial_dir = os.path.join(dirname, serial_folder)
-    _make_chekcpoint_dirs(serial_dir)
-    return serial_dir
-def _get_model_dir(dirname):
-    model_dir = os.path.join(dirname, MODEL_DIR)
-    _make_chekcpoint_dirs(model_dir)
-    return model_dir
-def _get_lookuptable_dir(dirname):
-    lookuptable_dir = os.path.join(dirname, LOOKUP_TABLE_DIR)
-    _make_chekcpoint_dirs(lookuptable_dir)
-    return lookuptable_dir
-def _get_trainer_dir(dirname, trainer_id):
-    trainer_folder = TRAINER_PREFIX + CHECKPOINT_SEPARATOR + str(trainer_id)
-    trainer_dir = os.path.join(dirname, trainer_folder)
-    _make_chekcpoint_dirs(trainer_dir)
-    return trainer_dir
-def _scroll_delete(dirname, max_num_checkpoints=3):
-    dirs = os.listdir(dirname)
-    serial_map = {}
-    for serial in dirs:
-        serial_num = _get_dir_serial(serial)
-        serial_map[serial_num] = serial
-    if len(list(serial_map.keys())) <= max_num_checkpoints:
-        return
-    serials = list(serial_map.keys())
-    serials.sort(reverse=True)
-    serials = serials[max_num_checkpoints:]
-    for serial in serials:
-        cur_dir = _get_serial_dir(dirname, serial)
-        try:
-            shutil.rmtree(cur_dir)
-        except OSError as err:
-            if err.errno != errno.ENOENT:
-                raise err
-def _write_success(dirname):
-    """
-    write an empty file named "_SUCCESS" in checkpoint dir, indicate this checkpoint is correct.
-    : param dirname
-    """
-    success_file = os.path.join(dirname, SUCCESS_MARK_FILENAME)
-    with open(success_file, 'a') as f:
-        now = time.ctime()
-        f.write(now)
-def _get_latest_checkpoint_serial(checkpoint_dir):
-    """
-    get the latest file in checkpoint directory, the _SUCCESS file must exist in the directory
-    : param checkpoint_dir
-    """
-    if not checkpoint_dir:
-        return -1
-    def has_success(checkpoint_dir, cur_dir):
-        """
-        is _SUCCESS in this dir
-        """
-        serial = _get_dir_serial(cur_dir)
-        if serial == -1 or not os.path.isdir(
-                os.path.join(checkpoint_dir, cur_dir)):
-            return -1
-        success_path = os.path.join(
-            _get_serial_dir(checkpoint_dir, serial), MODEL_DIR,
-            SUCCESS_MARK_FILENAME)
-        if os.path.isfile(success_path):
-            return serial
-    if not os.path.isdir(checkpoint_dir):
-        return -1
-    current_dir = -1
-    dirs = os.listdir(checkpoint_dir)
-    for cur_dir in dirs:
-        success_num = has_success(checkpoint_dir, cur_dir)
-        if success_num > current_dir:
-            current_dir = success_num
-    return current_dir
--- a/python/paddle/fluid/inferencer.py
+++ b/python/paddle/fluid/inferencer.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# NOTE: inferencer is moved into fluid.contrib.inferencer.
-__all__ = []
--- a/python/paddle/fluid/tests/book/CMakeLists.txt
+++ b/python/paddle/fluid/tests/book/CMakeLists.txt
@@ -5,7 +5,3 @@ string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
 foreach(src ${TEST_OPS})
    py_test(${src} SRCS ${src}.py)
 endforeach()
-if(WITH_HIGH_LEVEL_API_TEST)
-  add_subdirectory(high-level-api)
-endif()
--- a/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt
+++ b/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt
-file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*_new_api.py")
-string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
-# This test is buggy
-# py_test(test_understand_sentiment_dynamic_rnn SRCS
-# 	test_understand_sentiment_dynamic_rnn.py SERIAL)
-LIST(REMOVE_ITEM TEST_OPS test_understand_sentiment_dynamic_rnn_new_api)
-if(NOT APPLE)
-    # default test
-    foreach(src ${TEST_OPS})
-        py_test(${src} SRCS ${src}.py)
-    endforeach()
-else()
-    foreach(src ${TEST_OPS})
-        if(${src} STREQUAL "test_image_classification_vgg_new_api")
-            message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
-        elseif(${src} STREQUAL "test_image_classification_resnet_new_api")
-            message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
-        elseif(${src} STREQUAL "test_recognize_digits_conv_new_api")
-            message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
-        elseif(${src} STREQUAL "test_recognize_digits_mlp_new_api")
-            message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
-        elseif()
-            py_test(${src} SRCS ${src}.py)
-            set_tests_properties(${src} PROPERTIES LABELS "RUN_TYPE=DIST")
-        endif()
-    endforeach()
-endif()
--- a/python/paddle/fluid/tests/book/high-level-api/cifar10_small_test_set.py
+++ b/python/paddle/fluid/tests/book/high-level-api/cifar10_small_test_set.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-CIFAR dataset.
-This module will download dataset from
-https://www.cs.toronto.edu/~kriz/cifar.html and parse train/test set into
-paddle reader creators.
-The CIFAR-10 dataset consists of 60000 32x32 color images in 10 classes,
-with 6000 images per class. There are 50000 training images and 10000 test
-images.
-The CIFAR-100 dataset is just like the CIFAR-10, except it has 100 classes
-containing 600 images each. There are 500 training images and 100 testing
-images per class.
-"""
-from __future__ import print_function
-import itertools
-import numpy
-import paddle.dataset.common
-import tarfile
-import six
-from six.moves import cPickle as pickle
-__all__ = ['train10']
-URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/'
-CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz'
-CIFAR10_MD5 = 'c58f30108f718f92721af3b95e74349a'
-def reader_creator(filename, sub_name, batch_size=None):
-    def read_batch(batch):
-        data = batch[six.b('data')]
-        labels = batch.get(
-            six.b('labels'), batch.get(six.b('fine_labels'), None))
-        assert labels is not None
-        for sample, label in six.moves.zip(data, labels):
-            yield (sample / 255.0).astype(numpy.float32), int(label)
-    def reader():
-        with tarfile.open(filename, mode='r') as f:
-            names = [
-                each_item.name for each_item in f if sub_name in each_item.name
-            ]
-            batch_count = 0
-            for name in names:
-                if six.PY2:
-                    batch = pickle.load(f.extractfile(name))
-                else:
-                    batch = pickle.load(f.extractfile(name), encoding='bytes')
-                for item in read_batch(batch):
-                    if isinstance(batch_size, int) and batch_count > batch_size:
-                        break
-                    batch_count += 1
-                    yield item
-    return reader
-def train10(batch_size=None):
-    """
-    CIFAR-10 training set creator.
-    It returns a reader creator, each sample in the reader is image pixels in
-    [0, 1] and label in [0, 9].
-    :return: Training reader creator
-    :rtype: callable
-    """
-    return reader_creator(
-        paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
-        'data_batch',
-        batch_size=batch_size)
-def test10(batch_size=None):
-    """
-    CIFAR-10 test set creator.
-    It returns a reader creator, each sample in the reader is image pixels in
-    [0, 1] and label in [0, 9].
-    :return: Test reader creator.
-    :rtype: callable
-    """
-    return reader_creator(
-        paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
-        'test_batch',
-        batch_size=batch_size)
--- a/python/paddle/fluid/tests/book/high-level-api/test_fit_a_line_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_fit_a_line_new_api.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import paddle
-import paddle.fluid as fluid
-import sys
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-import contextlib
-import numpy
-import unittest
-# train reader
-BATCH_SIZE = 20
-train_reader = paddle.batch(
-    paddle.reader.shuffle(
-        paddle.dataset.uci_housing.train(), buf_size=500),
-    batch_size=BATCH_SIZE)
-test_reader = paddle.batch(
-    paddle.reader.shuffle(
-        paddle.dataset.uci_housing.test(), buf_size=500),
-    batch_size=BATCH_SIZE)
-def inference_program():
-    x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-    y_predict = fluid.layers.fc(input=x, size=1, act=None)
-    return y_predict
-def train_program():
-    y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-    y_predict = inference_program()
-    loss = fluid.layers.square_error_cost(input=y_predict, label=y)
-    avg_loss = fluid.layers.mean(loss)
-    return [avg_loss, y_predict]
-def optimizer_func():
-    return fluid.optimizer.SGD(learning_rate=0.001)
-def train(use_cuda, train_program, params_dirname, inference_model_dirname):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    trainer = Trainer(
-        train_func=train_program, place=place, optimizer_func=optimizer_func)
-    def event_handler(event):
-        if isinstance(event, EndStepEvent):
-            if event.step == 10:
-                test_metrics = trainer.test(
-                    reader=test_reader, feed_order=['x', 'y'])
-                print(test_metrics)
-                '''
-                ...
-                ['25.768919467926025']
-                ['15.343549569447836']
-                ...
-                '''
-                if params_dirname is not None:
-                    trainer.save_params(params_dirname)
-                    trainer.save_inference_model(inference_model_dirname,
-                                                 ['x'], [1])
-                trainer.stop()
-    trainer.train(
-        reader=train_reader,
-        num_epochs=100,
-        event_handler=event_handler,
-        feed_order=['x', 'y'])
-# infer
-def infer(use_cuda, inference_program, params_dirname=None):
-    if params_dirname is None:
-        return
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    inferencer = Inferencer(
-        infer_func=inference_program, param_path=params_dirname, place=place)
-    batch_size = 10
-    tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
-    results = inferencer.infer({'x': tensor_x})
-    print("infer results: ", results[0])
-def infer_by_saved_model(use_cuda, save_dirname=None):
-    if save_dirname is None:
-        return
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    exe = fluid.Executor(place)
-    inference_scope = fluid.core.Scope()
-    with fluid.scope_guard(inference_scope):
-        # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be fed
-        # data using feed operators), and the fetch_targets (variables that
-        # we want to obtain data from using fetch operators).
-        [inference_program, feed_target_names,
-         fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
-        # The input's dimension should be 2-D and the second dim is 13
-        # The input data should be >= 0
-        batch_size = 10
-        test_reader = paddle.batch(
-            paddle.dataset.uci_housing.test(), batch_size=batch_size)
-        test_data = next(test_reader())
-        test_feat = numpy.array(
-            [data[0] for data in test_data]).astype("float32")
-        test_label = numpy.array(
-            [data[1] for data in test_data]).astype("float32")
-        assert feed_target_names[0] == 'x'
-        results = exe.run(inference_program,
-                          feed={feed_target_names[0]: numpy.array(test_feat)},
-                          fetch_list=fetch_targets)
-        print("infer shape: ", results[0].shape)
-        print("infer results: ", results[0])
-        print("ground truth: ", test_label)
-def main(use_cuda):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    # Directory for saving the trained model
-    params_dirname = "fit_a_line.model"
-    inference_model_dirname = "fit_a_line.inference_model"
-    train(use_cuda, train_program, params_dirname, inference_model_dirname)
-    infer(use_cuda, inference_program, params_dirname)
-    infer_by_saved_model(use_cuda, inference_model_dirname)
-class TestFitALine(unittest.TestCase):
-    def test_cpu(self):
-        with self.program_scope_guard():
-            with fluid.unique_name.guard():
-                main(use_cuda=False)
-    def test_cuda(self):
-        with self.program_scope_guard():
-            with fluid.unique_name.guard():
-                main(use_cuda=True)
-    @contextlib.contextmanager
-    def program_scope_guard(self):
-        prog = fluid.Program()
-        startup_prog = fluid.Program()
-        scope = fluid.core.Scope()
-        with fluid.scope_guard(scope):
-            with fluid.program_guard(prog, startup_prog):
-                yield
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/tests/book/high-level-api/test_image_classification_resnet_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_image_classification_resnet_new_api.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import sys
-import paddle
-import paddle.fluid as fluid
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-import paddle.fluid.core as core
-import numpy
-import os
-import cifar10_small_test_set
-def resnet_cifar10(input, depth=32):
-    def conv_bn_layer(input,
-                      ch_out,
-                      filter_size,
-                      stride,
-                      padding,
-                      act='relu',
-                      bias_attr=False):
-        tmp = fluid.layers.conv2d(
-            input=input,
-            filter_size=filter_size,
-            num_filters=ch_out,
-            stride=stride,
-            padding=padding,
-            act=None,
-            bias_attr=bias_attr)
-        return fluid.layers.batch_norm(input=tmp, act=act)
-    def shortcut(input, ch_in, ch_out, stride):
-        if ch_in != ch_out:
-            return conv_bn_layer(input, ch_out, 1, stride, 0, None)
-        else:
-            return input
-    def basicblock(input, ch_in, ch_out, stride):
-        tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
-        tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None, bias_attr=True)
-        short = shortcut(input, ch_in, ch_out, stride)
-        return fluid.layers.elementwise_add(x=tmp, y=short, act='relu')
-    def layer_warp(block_func, input, ch_in, ch_out, count, stride):
-        tmp = block_func(input, ch_in, ch_out, stride)
-        for i in range(1, count):
-            tmp = block_func(tmp, ch_out, ch_out, 1)
-        return tmp
-    assert (depth - 2) % 6 == 0
-    n = (depth - 2) // 6
-    conv1 = conv_bn_layer(
-        input=input, ch_out=16, filter_size=3, stride=1, padding=1)
-    res1 = layer_warp(basicblock, conv1, 16, 16, n, 1)
-    res2 = layer_warp(basicblock, res1, 16, 32, n, 2)
-    res3 = layer_warp(basicblock, res2, 32, 64, n, 2)
-    pool = fluid.layers.pool2d(
-        input=res3, pool_size=8, pool_type='avg', pool_stride=1)
-    predict = fluid.layers.fc(input=pool, size=10, act='softmax')
-    return predict
-def inference_network():
-    data_shape = [3, 32, 32]
-    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
-    predict = resnet_cifar10(images, 32)
-    return predict
-def train_network():
-    predict = inference_network()
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-    cost = fluid.layers.cross_entropy(input=predict, label=label)
-    avg_cost = fluid.layers.mean(cost)
-    accuracy = fluid.layers.accuracy(input=predict, label=label)
-    return [avg_cost, accuracy]
-def optimizer_func():
-    return fluid.optimizer.Adam(learning_rate=0.001)
-def train(use_cuda, train_program, parallel, params_dirname):
-    BATCH_SIZE = 128
-    EPOCH_NUM = 1
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            cifar10_small_test_set.train10(batch_size=10), buf_size=128 * 10),
-        batch_size=BATCH_SIZE,
-        drop_last=False)
-    test_reader = paddle.batch(
-        paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE, drop_last=False)
-    def event_handler(event):
-        if isinstance(event, EndStepEvent):
-            avg_cost, accuracy = trainer.test(
-                reader=test_reader, feed_order=['pixel', 'label'])
-            print('Loss {0:2.2}, Acc {1:2.2}'.format(avg_cost, accuracy))
-            if accuracy > 0.01:  # Low threshold for speeding up CI
-                if params_dirname is not None:
-                    trainer.save_params(params_dirname)
-                return
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    trainer = Trainer(
-        train_func=train_program,
-        optimizer_func=optimizer_func,
-        place=place,
-        parallel=parallel)
-    trainer.train(
-        reader=train_reader,
-        num_epochs=EPOCH_NUM,
-        event_handler=event_handler,
-        feed_order=['pixel', 'label'])
-def infer(use_cuda, inference_program, parallel, params_dirname=None):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    inferencer = Inferencer(
-        infer_func=inference_program,
-        param_path=params_dirname,
-        place=place,
-        parallel=parallel)
-    # The input's dimension of conv should be 4-D or 5-D.
-    # Use normilized image pixels as input data, which should be in the range
-    # [0, 1.0].
-    tensor_img = numpy.random.rand(1, 3, 32, 32).astype("float32")
-    results = inferencer.infer({'pixel': tensor_img})
-    print("infer results: ", results)
-def main(use_cuda, parallel):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    save_path = "image_classification_resnet.inference.model"
-    os.environ['CPU_NUM'] = str(4)
-    train(
-        use_cuda=use_cuda,
-        train_program=train_network,
-        params_dirname=save_path,
-        parallel=parallel)
-    # FIXME(zcd): in the inference stage, the number of
-    # input data is one, it is not appropriate to use parallel.
-    if parallel and use_cuda:
-        return
-    os.environ['CPU_NUM'] = str(1)
-    infer(
-        use_cuda=use_cuda,
-        inference_program=inference_network,
-        params_dirname=save_path,
-        parallel=parallel)
-if __name__ == '__main__':
-    on_ci = bool(int(os.environ.get("SKIP_UNSTABLE_CI", '0')))
-    if not on_ci:
-        for use_cuda in (False, True):
-            for parallel in (False, True):
-                if use_cuda and not core.is_compiled_with_cuda():
-                    continue
-                main(use_cuda=use_cuda, parallel=parallel)
--- a/python/paddle/fluid/tests/book/high-level-api/test_image_classification_vgg_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_image_classification_vgg_new_api.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import sys
-import paddle
-import paddle.fluid as fluid
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-import paddle.fluid.core as core
-import numpy
-import os
-import cifar10_small_test_set
-def vgg16_bn_drop(input):
-    def conv_block(input, num_filter, groups, dropouts):
-        return fluid.nets.img_conv_group(
-            input=input,
-            pool_size=2,
-            pool_stride=2,
-            conv_num_filter=[num_filter] * groups,
-            conv_filter_size=3,
-            conv_act='relu',
-            conv_with_batchnorm=True,
-            conv_batchnorm_drop_rate=dropouts,
-            pool_type='max')
-    conv1 = conv_block(input, 64, 2, [0.3, 0])
-    conv2 = conv_block(conv1, 128, 2, [0.4, 0])
-    conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
-    conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
-    conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
-    drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5)
-    fc1 = fluid.layers.fc(input=drop, size=4096, act=None)
-    bn = fluid.layers.batch_norm(input=fc1, act='relu')
-    drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5)
-    fc2 = fluid.layers.fc(input=drop2, size=4096, act=None)
-    predict = fluid.layers.fc(input=fc2, size=10, act='softmax')
-    return predict
-def inference_network():
-    data_shape = [3, 32, 32]
-    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
-    predict = vgg16_bn_drop(images)
-    return predict
-def train_network():
-    predict = inference_network()
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-    cost = fluid.layers.cross_entropy(input=predict, label=label)
-    avg_cost = fluid.layers.mean(cost)
-    accuracy = fluid.layers.accuracy(input=predict, label=label)
-    return [avg_cost, accuracy]
-def optimizer_func():
-    return fluid.optimizer.Adam(learning_rate=0.001)
-def train(use_cuda, train_program, parallel, params_dirname):
-    BATCH_SIZE = 128
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            cifar10_small_test_set.train10(batch_size=10), buf_size=128 * 10),
-        batch_size=BATCH_SIZE,
-        drop_last=False)
-    # Use only part of the test set data validation program
-    test_reader = paddle.batch(
-        cifar10_small_test_set.test10(BATCH_SIZE),
-        batch_size=BATCH_SIZE,
-        drop_last=False)
-    def event_handler(event):
-        if isinstance(event, EndStepEvent):
-            avg_cost, accuracy = trainer.test(
-                reader=test_reader, feed_order=['pixel', 'label'])
-            print('Loss {0:2.2}, Acc {1:2.2}'.format(avg_cost, accuracy))
-            if accuracy > 0.01:  # Low threshold for speeding up CI
-                if params_dirname is not None:
-                    trainer.save_params(params_dirname)
-                return
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    trainer = Trainer(
-        train_func=train_program,
-        place=place,
-        optimizer_func=optimizer_func,
-        parallel=parallel)
-    trainer.train(
-        reader=train_reader,
-        num_epochs=1,
-        event_handler=event_handler,
-        feed_order=['pixel', 'label'])
-def infer(use_cuda, inference_program, parallel, params_dirname=None):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    inferencer = Inferencer(
-        infer_func=inference_program,
-        param_path=params_dirname,
-        place=place,
-        parallel=parallel)
-    # The input's dimension of conv should be 4-D or 5-D.
-    # Use normilized image pixels as input data, which should be in the range
-    # [0, 1.0].
-    tensor_img = numpy.random.rand(1, 3, 32, 32).astype("float32")
-    results = inferencer.infer({'pixel': tensor_img})
-    print("infer results: ", results)
-def main(use_cuda, parallel):
-    save_path = "image_classification_vgg.inference.model"
-    os.environ['CPU_NUM'] = str(4)
-    train(
-        use_cuda=use_cuda,
-        train_program=train_network,
-        params_dirname=save_path,
-        parallel=parallel)
-    # FIXME(zcd): in the inference stage, the number of
-    # input data is one, it is not appropriate to use parallel.
-    if parallel and use_cuda:
-        return
-    os.environ['CPU_NUM'] = str(1)
-    infer(
-        use_cuda=use_cuda,
-        inference_program=inference_network,
-        params_dirname=save_path,
-        parallel=parallel)
-if __name__ == '__main__':
-    for use_cuda in (False, True):
-        for parallel in (False, True):
-            if use_cuda and not core.is_compiled_with_cuda():
-                continue
-            main(use_cuda=use_cuda, parallel=parallel)
--- a/python/paddle/fluid/tests/book/high-level-api/test_label_semantic_roles_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_label_semantic_roles_new_api.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import paddle
-import paddle.fluid as fluid
-import sys
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-import numpy as np
-WORD_DICT, VERB_DICT, LABEL_DICT = paddle.dataset.conll05.get_dict()
-WORD_DICT_LEN = len(WORD_DICT)
-LABEL_DICT_LEN = len(LABEL_DICT)
-PRED_DICT_LEN = len(VERB_DICT)
-MARK_DICT_LEN = 2
-IS_SPARSE = True
-BATCH_SIZE = 10
-EMBEDDING_NAME = 'emb'
-def lstm_net():
-    WORD_DIM = 32
-    MARK_DIM = 5
-    HIDDEN_DIM = 512
-    DEPTH = 8
-    # Data definitions
-    word = fluid.layers.data(
-        name='word_data', shape=[1], dtype='int64', lod_level=1)
-    predicate = fluid.layers.data(
-        name='verb_data', shape=[1], dtype='int64', lod_level=1)
-    ctx_n2 = fluid.layers.data(
-        name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
-    ctx_n1 = fluid.layers.data(
-        name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
-    ctx_0 = fluid.layers.data(
-        name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
-    ctx_p1 = fluid.layers.data(
-        name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
-    ctx_p2 = fluid.layers.data(
-        name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
-    mark = fluid.layers.data(
-        name='mark_data', shape=[1], dtype='int64', lod_level=1)
-    # 8 features
-    predicate_embedding = fluid.layers.embedding(
-        input=predicate,
-        size=[PRED_DICT_LEN, WORD_DIM],
-        dtype='float32',
-        is_sparse=IS_SPARSE,
-        param_attr='vemb')
-    mark_embedding = fluid.layers.embedding(
-        input=mark,
-        size=[MARK_DICT_LEN, MARK_DIM],
-        dtype='float32',
-        is_sparse=IS_SPARSE)
-    word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
-    emb_layers = [
-        fluid.layers.embedding(
-            size=[WORD_DICT_LEN, WORD_DIM],
-            input=x,
-            param_attr=fluid.ParamAttr(name=EMBEDDING_NAME))
-        for x in word_input
-        #name=EMBEDDING_NAME, trainable=False)) for x in word_input
-    ]
-    emb_layers.append(predicate_embedding)
-    emb_layers.append(mark_embedding)
-    hidden_0_layers = [
-        fluid.layers.fc(input=emb, size=HIDDEN_DIM, act='tanh')
-        for emb in emb_layers
-    ]
-    hidden_0 = fluid.layers.sums(input=hidden_0_layers)
-    lstm_0 = fluid.layers.dynamic_lstm(
-        input=hidden_0,
-        size=HIDDEN_DIM,
-        candidate_activation='relu',
-        gate_activation='sigmoid',
-        cell_activation='sigmoid')
-    # stack L-LSTM and R-LSTM with direct edges
-    input_tmp = [hidden_0, lstm_0]
-    for i in range(1, DEPTH):
-        mix_hidden = fluid.layers.sums(input=[
-            fluid.layers.fc(input=input_tmp[0], size=HIDDEN_DIM, act='tanh'),
-            fluid.layers.fc(input=input_tmp[1], size=HIDDEN_DIM, act='tanh')
-        ])
-        lstm = fluid.layers.dynamic_lstm(
-            input=mix_hidden,
-            size=HIDDEN_DIM,
-            candidate_activation='relu',
-            gate_activation='sigmoid',
-            cell_activation='sigmoid',
-            is_reverse=((i % 2) == 1))
-        input_tmp = [mix_hidden, lstm]
-    feature_out = fluid.layers.sums(input=[
-        fluid.layers.fc(input=input_tmp[0], size=LABEL_DICT_LEN, act='tanh'),
-        fluid.layers.fc(input=input_tmp[1], size=LABEL_DICT_LEN, act='tanh')
-    ])
-    return feature_out
-def inference_program():
-    predict = lstm_net()
-    return predict
-def train_program():
-    MIX_HIDDEN_LR = 1e-3
-    predict = lstm_net()
-    target = fluid.layers.data(
-        name='target', shape=[1], dtype='int64', lod_level=1)
-    crf_cost = fluid.layers.linear_chain_crf(
-        input=predict,
-        label=target,
-        param_attr=fluid.ParamAttr(
-            name='crfw', learning_rate=MIX_HIDDEN_LR))
-    avg_cost = fluid.layers.mean(crf_cost)
-    return [avg_cost]
-def optimize_func():
-    return fluid.optimizer.SGD(learning_rate=fluid.layers.exponential_decay(
-        learning_rate=0.01, decay_steps=100000, decay_rate=0.5, staircase=True))
-def train(use_cuda, train_program, params_dirname):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    trainer = Trainer(
-        train_func=train_program, place=place, optimizer_func=optimize_func)
-    feed_order = [
-        'word_data', 'ctx_n2_data', 'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data',
-        'ctx_p2_data', 'verb_data', 'mark_data', 'target'
-    ]
-    #embedding_param = fluid.global_scope().find_var(
-    #        EMBEDDING_NAME).get_tensor()
-    #embedding_param.set(
-    #        load_parameter(conll05.get_embedding(), WORD_DICT_LEN, WORD_DIM),
-    #        place)
-    def event_handler(event):
-        if isinstance(event, EndEpochEvent):
-            test_reader = paddle.batch(
-                paddle.dataset.conll05.test(), batch_size=BATCH_SIZE)
-            avg_cost_set = trainer.test(
-                reader=test_reader, feed_order=feed_order)
-            # get avg cost
-            avg_cost = np.array(avg_cost_set).mean()
-            print("avg_cost: %s" % avg_cost)
-            if float(avg_cost) < 100.0:  # Large value to increase CI speed
-                trainer.save_params(params_dirname)
-            else:
-                print(
-                    ('BatchID {0}, Test Loss {1:0.2}'.format(event.epoch + 1,
-                                                             float(avg_cost))))
-                if math.isnan(float(avg_cost)):
-                    sys.exit("got NaN loss, training failed.")
-        elif isinstance(event, EndStepEvent):
-            print("Step {0}, Epoch {1} Metrics {2}".format(
-                event.step, event.epoch, list(map(np.array, event.metrics))))
-            if event.step == 1:  # Run 2 iterations to speed CI
-                trainer.save_params(params_dirname)
-                trainer.stop()
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.conll05.test(), buf_size=8192),
-        batch_size=BATCH_SIZE)
-    trainer.train(
-        num_epochs=1,
-        event_handler=event_handler,
-        reader=train_reader,
-        feed_order=feed_order)
-def infer(use_cuda, inference_program, params_dirname):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    inferencer = Inferencer(
-        inference_program, param_path=params_dirname, place=place)
-    # Setup input by creating LoDTensor to represent sequence of words.
-    # Here each word is the basic element of the LoDTensor and the shape of
-    # each word (base_shape) should be [1] since it is simply an index to
-    # look up for the corresponding word vector.
-    # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]],
-    # which has only one level of detail. Then the created LoDTensor will have only
-    # one higher level structure (sequence of words, or sentence) than the basic
-    # element (word). Hence the LoDTensor will hold data for three sentences of
-    # length 3, 4 and 2, respectively.
-    # Note that recursive_sequence_lengths should be a list of lists.
-    recursive_seq_lens = [[3, 4, 2]]
-    base_shape = [1]
-    # The range of random integers is [low, high]
-    word = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1)
-    ctx_n2 = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1)
-    ctx_n1 = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1)
-    ctx_0 = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1)
-    ctx_p1 = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1)
-    ctx_p2 = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1)
-    pred = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=PRED_DICT_LEN - 1)
-    mark = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=MARK_DICT_LEN - 1)
-    results = inferencer.infer(
-        {
-            'word_data': word,
-            'ctx_n2_data': ctx_n2,
-            'ctx_n1_data': ctx_n1,
-            'ctx_0_data': ctx_0,
-            'ctx_p1_data': ctx_p1,
-            'ctx_p2_data': ctx_p2,
-            'verb_data': pred,
-            'mark_data': mark
-        },
-        return_numpy=False)
-    print("infer results: ", np.array(results[0]).shape)
-def main(use_cuda):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    params_dirname = "label_semantic_roles.inference.model"
-    train(use_cuda, train_program, params_dirname)
-    infer(use_cuda, inference_program, params_dirname)
-if __name__ == '__main__':
-    for use_cuda in (False, True):
-        main(use_cuda=use_cuda)
--- a/python/paddle/fluid/tests/book/high-level-api/test_machine_translation_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_machine_translation_new_api.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import contextlib
-import sys
-import numpy as np
-import paddle
-import paddle.fluid as fluid
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-import paddle.fluid.framework as framework
-import paddle.fluid.layers as pd
-from paddle.fluid.executor import Executor
-from functools import partial
-import unittest
-dict_size = 30000
-source_dict_dim = target_dict_dim = dict_size
-hidden_dim = 32
-word_dim = 16
-batch_size = 2
-max_length = 8
-topk_size = 50
-trg_dic_size = 10000
-beam_size = 2
-decoder_size = hidden_dim
-def encoder(is_sparse):
-    # encoder
-    src_word_id = pd.data(
-        name="src_word_id", shape=[1], dtype='int64', lod_level=1)
-    src_embedding = pd.embedding(
-        input=src_word_id,
-        size=[dict_size, word_dim],
-        dtype='float32',
-        is_sparse=is_sparse,
-        param_attr=fluid.ParamAttr(name='vemb'))
-    fc1 = pd.fc(input=src_embedding, size=hidden_dim * 4, act='tanh')
-    lstm_hidden0, lstm_0 = pd.dynamic_lstm(input=fc1, size=hidden_dim * 4)
-    encoder_out = pd.sequence_last_step(input=lstm_hidden0)
-    return encoder_out
-def train_decoder(context, is_sparse):
-    # decoder
-    trg_language_word = pd.data(
-        name="target_language_word", shape=[1], dtype='int64', lod_level=1)
-    trg_embedding = pd.embedding(
-        input=trg_language_word,
-        size=[dict_size, word_dim],
-        dtype='float32',
-        is_sparse=is_sparse,
-        param_attr=fluid.ParamAttr(name='vemb'))
-    rnn = pd.DynamicRNN()
-    with rnn.block():
-        current_word = rnn.step_input(trg_embedding)
-        pre_state = rnn.memory(init=context)
-        current_state = pd.fc(input=[current_word, pre_state],
-                              size=decoder_size,
-                              act='tanh')
-        current_score = pd.fc(input=current_state,
-                              size=target_dict_dim,
-                              act='softmax')
-        rnn.update_memory(pre_state, current_state)
-        rnn.output(current_score)
-    return rnn()
-def decode(context, is_sparse):
-    init_state = context
-    array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length)
-    counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True)
-    # fill the first element with init_state
-    state_array = pd.create_array('float32')
-    pd.array_write(init_state, array=state_array, i=counter)
-    # ids, scores as memory
-    ids_array = pd.create_array('int64')
-    scores_array = pd.create_array('float32')
-    init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2)
-    init_scores = pd.data(
-        name="init_scores", shape=[1], dtype="float32", lod_level=2)
-    pd.array_write(init_ids, array=ids_array, i=counter)
-    pd.array_write(init_scores, array=scores_array, i=counter)
-    cond = pd.less_than(x=counter, y=array_len)
-    while_op = pd.While(cond=cond)
-    with while_op.block():
-        pre_ids = pd.array_read(array=ids_array, i=counter)
-        pre_state = pd.array_read(array=state_array, i=counter)
-        pre_score = pd.array_read(array=scores_array, i=counter)
-        # expand the lod of pre_state to be the same with pre_score
-        pre_state_expanded = pd.sequence_expand(pre_state, pre_score)
-        pre_ids_emb = pd.embedding(
-            input=pre_ids,
-            size=[dict_size, word_dim],
-            dtype='float32',
-            is_sparse=is_sparse)
-        # use rnn unit to update rnn
-        current_state = pd.fc(input=[pre_state_expanded, pre_ids_emb],
-                              size=decoder_size,
-                              act='tanh')
-        current_state_with_lod = pd.lod_reset(x=current_state, y=pre_score)
-        # use score to do beam search
-        current_score = pd.fc(input=current_state_with_lod,
-                              size=target_dict_dim,
-                              act='softmax')
-        topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
-        # calculate accumulated scores after topk to reduce computation cost
-        accu_scores = pd.elementwise_add(
-            x=pd.log(topk_scores), y=pd.reshape(
-                pre_score, shape=[-1]), axis=0)
-        selected_ids, selected_scores = pd.beam_search(
-            pre_ids,
-            pre_score,
-            topk_indices,
-            accu_scores,
-            beam_size,
-            end_id=10,
-            level=0)
-        pd.increment(x=counter, value=1, in_place=True)
-        # update the memories
-        pd.array_write(current_state, array=state_array, i=counter)
-        pd.array_write(selected_ids, array=ids_array, i=counter)
-        pd.array_write(selected_scores, array=scores_array, i=counter)
-        # update the break condition: up to the max length or all candidates of
-        # source sentences have ended.
-        length_cond = pd.less_than(x=counter, y=array_len)
-        finish_cond = pd.logical_not(pd.is_empty(x=selected_ids))
-        pd.logical_and(x=length_cond, y=finish_cond, out=cond)
-    translation_ids, translation_scores = pd.beam_search_decode(
-        ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10)
-    # return init_ids, init_scores
-    return translation_ids, translation_scores
-def train_program(is_sparse):
-    context = encoder(is_sparse)
-    rnn_out = train_decoder(context, is_sparse)
-    label = pd.data(
-        name="target_language_next_word", shape=[1], dtype='int64', lod_level=1)
-    cost = pd.cross_entropy(input=rnn_out, label=label)
-    avg_cost = pd.mean(cost)
-    return avg_cost
-def optimizer_func():
-    return fluid.optimizer.Adagrad(
-        learning_rate=1e-4,
-        regularization=fluid.regularizer.L2DecayRegularizer(
-            regularization_coeff=0.1))
-def train(use_cuda, is_sparse, is_local=True):
-    EPOCH_NUM = 1
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.wmt14.train(dict_size), buf_size=1000),
-        batch_size=batch_size)
-    feed_order = [
-        'src_word_id', 'target_language_word', 'target_language_next_word'
-    ]
-    def event_handler(event):
-        if isinstance(event, EndStepEvent):
-            print('pass_id=' + str(event.epoch) + ' batch=' + str(event.step))
-            if event.step == 10:
-                trainer.stop()
-    trainer = Trainer(
-        train_func=partial(train_program, is_sparse),
-        place=place,
-        optimizer_func=optimizer_func)
-    trainer.train(
-        reader=train_reader,
-        num_epochs=EPOCH_NUM,
-        event_handler=event_handler,
-        feed_order=feed_order)
-def decode_main(use_cuda, is_sparse):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    context = encoder(is_sparse)
-    translation_ids, translation_scores = decode(context, is_sparse)
-    exe = Executor(place)
-    exe.run(framework.default_startup_program())
-    init_ids_data = np.array([1 for _ in range(batch_size)], dtype='int64')
-    init_scores_data = np.array(
-        [1. for _ in range(batch_size)], dtype='float32')
-    init_ids_data = init_ids_data.reshape((batch_size, 1))
-    init_scores_data = init_scores_data.reshape((batch_size, 1))
-    init_recursive_seq_lens = [1] * batch_size
-    init_recursive_seq_lens = [init_recursive_seq_lens, init_recursive_seq_lens]
-    init_ids = fluid.create_lod_tensor(init_ids_data, init_recursive_seq_lens,
-                                       place)
-    init_scores = fluid.create_lod_tensor(init_scores_data,
-                                          init_recursive_seq_lens, place)
-    train_data = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.wmt14.train(dict_size), buf_size=1000),
-        batch_size=batch_size)
-    feed_order = ['src_word_id']
-    feed_list = [
-        framework.default_main_program().global_block().var(var_name)
-        for var_name in feed_order
-    ]
-    feeder = fluid.DataFeeder(feed_list, place)
-    for data in train_data():
-        feed_dict = feeder.feed([[x[0]] for x in data])
-        feed_dict['init_ids'] = init_ids
-        feed_dict['init_scores'] = init_scores
-        result_ids, result_scores = exe.run(
-            framework.default_main_program(),
-            feed=feed_dict,
-            fetch_list=[translation_ids, translation_scores],
-            return_numpy=False)
-        print(result_ids.recursive_sequence_lengths())
-        break
-class TestMachineTranslation(unittest.TestCase):
-    pass
-@contextlib.contextmanager
-def scope_prog_guard():
-    prog = fluid.Program()
-    startup_prog = fluid.Program()
-    scope = fluid.core.Scope()
-    with fluid.scope_guard(scope):
-        with fluid.program_guard(prog, startup_prog):
-            yield
-def inject_test_train(use_cuda, is_sparse):
-    f_name = 'test_{0}_{1}_train'.format('cuda' if use_cuda else 'cpu', 'sparse'
-                                         if is_sparse else 'dense')
-    def f(*args):
-        with scope_prog_guard():
-            train(use_cuda, is_sparse)
-    setattr(TestMachineTranslation, f_name, f)
-def inject_test_decode(use_cuda, is_sparse, decorator=None):
-    f_name = 'test_{0}_{1}_decode'.format('cuda'
-                                          if use_cuda else 'cpu', 'sparse'
-                                          if is_sparse else 'dense')
-    def f(*args):
-        with scope_prog_guard():
-            decode_main(use_cuda, is_sparse)
-    if decorator is not None:
-        f = decorator(f)
-    setattr(TestMachineTranslation, f_name, f)
-for _use_cuda_ in (False, True):
-    for _is_sparse_ in (False, True):
-        inject_test_train(_use_cuda_, _is_sparse_)
-for _use_cuda_ in (False, True):
-    for _is_sparse_ in (False, True):
-        _decorator_ = None
-        if _use_cuda_:
-            _decorator_ = unittest.skip(
-                reason='Beam Search does not support CUDA!')
-        inject_test_decode(
-            is_sparse=_is_sparse_, use_cuda=_use_cuda_, decorator=_decorator_)
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/tests/book/high-level-api/test_recognize_digits_conv_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_recognize_digits_conv_new_api.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import sys
-import paddle.fluid as fluid
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-import paddle.fluid.core as core
-import paddle
-import numpy
-import math
-import sys
-import os
-BATCH_SIZE = 64
-def inference_program():
-    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
-    conv_pool_1 = fluid.nets.simple_img_conv_pool(
-        input=img,
-        filter_size=5,
-        num_filters=20,
-        pool_size=2,
-        pool_stride=2,
-        act="relu")
-    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
-    conv_pool_2 = fluid.nets.simple_img_conv_pool(
-        input=conv_pool_1,
-        filter_size=5,
-        num_filters=50,
-        pool_size=2,
-        pool_stride=2,
-        act="relu")
-    prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
-    return prediction
-def train_program():
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-    predict = inference_program()
-    cost = fluid.layers.cross_entropy(input=predict, label=label)
-    avg_cost = fluid.layers.mean(cost)
-    acc = fluid.layers.accuracy(input=predict, label=label)
-    return [avg_cost, acc]
-def optimizer_func():
-    return fluid.optimizer.Adam(learning_rate=0.001)
-def train(use_cuda, train_program, parallel, params_dirname):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    trainer = Trainer(
-        train_func=train_program,
-        place=place,
-        optimizer_func=optimizer_func,
-        parallel=parallel)
-    def event_handler(event):
-        if isinstance(event, EndEpochEvent):
-            test_reader = paddle.batch(
-                paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
-            avg_cost, acc = trainer.test(
-                reader=test_reader, feed_order=['img', 'label'])
-            print("avg_cost: %s" % avg_cost)
-            print("acc     : %s" % acc)
-            if acc > 0.2:  # Smaller value to increase CI speed
-                trainer.save_params(params_dirname)
-            else:
-                print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
-                    event.epoch + 1, avg_cost, acc))
-                if math.isnan(avg_cost):
-                    sys.exit("got NaN loss, training failed.")
-        elif isinstance(event, EndStepEvent):
-            print(
-                ("Step {0}, Epoch {1} Metrics {2}".format(
-                    event.step, event.epoch,
-                    list(map(numpy.array, event.metrics)))))
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.mnist.train(), buf_size=500),
-        batch_size=BATCH_SIZE)
-    trainer.train(
-        num_epochs=1,
-        event_handler=event_handler,
-        reader=train_reader,
-        feed_order=['img', 'label'])
-def infer(use_cuda, inference_program, parallel, params_dirname=None):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    inferencer = Inferencer(
-        infer_func=inference_program,
-        param_path=params_dirname,
-        place=place,
-        parallel=parallel)
-    batch_size = 1
-    tensor_img = numpy.random.uniform(-1.0, 1.0,
-                                      [batch_size, 1, 28, 28]).astype("float32")
-    results = inferencer.infer({'img': tensor_img})
-    print("infer results: ", results[0])
-def main(use_cuda, parallel):
-    params_dirname = "recognize_digits_conv.inference.model"
-    # call train() with is_local argument to run distributed train
-    os.environ['CPU_NUM'] = str(4)
-    train(
-        use_cuda=use_cuda,
-        train_program=train_program,
-        params_dirname=params_dirname,
-        parallel=parallel)
-    # FIXME(zcd): in the inference stage, the number of
-    # input data is one, it is not appropriate to use parallel.
-    if parallel and use_cuda:
-        return
-    os.environ['CPU_NUM'] = str(1)
-    infer(
-        use_cuda=use_cuda,
-        inference_program=inference_program,
-        params_dirname=params_dirname,
-        parallel=parallel)
-if __name__ == '__main__':
-    for use_cuda in (False, True):
-        for parallel in (False, True):
-            if use_cuda and not core.is_compiled_with_cuda():
-                continue
-            main(use_cuda=use_cuda, parallel=parallel)
--- a/python/paddle/fluid/tests/book/high-level-api/test_recognize_digits_mlp_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_recognize_digits_mlp_new_api.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import sys
-import paddle.fluid as fluid
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-import paddle.fluid.core as core
-import paddle
-import numpy
-import math
-import sys
-import os
-BATCH_SIZE = 64
-def inference_program():
-    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
-    hidden = fluid.layers.fc(input=img, size=200, act='tanh')
-    hidden = fluid.layers.fc(input=hidden, size=200, act='tanh')
-    prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
-    return prediction
-def train_program():
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-    predict = inference_program()
-    cost = fluid.layers.cross_entropy(input=predict, label=label)
-    avg_cost = fluid.layers.mean(cost)
-    acc = fluid.layers.accuracy(input=predict, label=label)
-    return [avg_cost, acc]
-def optimizer_func():
-    return fluid.optimizer.Adam(learning_rate=0.001)
-def train(use_cuda, train_program, params_dirname, parallel):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    trainer = Trainer(
-        train_func=train_program,
-        place=place,
-        optimizer_func=optimizer_func,
-        parallel=parallel)
-    def event_handler(event):
-        if isinstance(event, EndEpochEvent):
-            test_reader = paddle.batch(
-                paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
-            avg_cost, acc = trainer.test(
-                reader=test_reader, feed_order=['img', 'label'])
-            print("avg_cost: %s" % avg_cost)
-            print("acc     : %s" % acc)
-            if acc > 0.2:  # Smaller value to increase CI speed
-                trainer.save_params(params_dirname)
-            else:
-                print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
-                    event.epoch + 1, avg_cost, acc))
-                if math.isnan(avg_cost):
-                    sys.exit("got NaN loss, training failed.")
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.mnist.train(), buf_size=500),
-        batch_size=BATCH_SIZE)
-    trainer.train(
-        num_epochs=1,
-        event_handler=event_handler,
-        reader=train_reader,
-        feed_order=['img', 'label'])
-def infer(use_cuda, inference_program, parallel, params_dirname=None):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    inferencer = Inferencer(
-        infer_func=inference_program,
-        param_path=params_dirname,
-        place=place,
-        parallel=parallel)
-    batch_size = 1
-    tensor_img = numpy.random.uniform(-1.0, 1.0,
-                                      [batch_size, 1, 28, 28]).astype("float32")
-    results = inferencer.infer({'img': tensor_img})
-    print("infer results: ", results[0])
-def main(use_cuda, parallel):
-    params_dirname = "recognize_digits_mlp.inference.model"
-    # call train() with is_local argument to run distributed train
-    os.environ['CPU_NUM'] = str(4)
-    train(
-        use_cuda=use_cuda,
-        train_program=train_program,
-        params_dirname=params_dirname,
-        parallel=parallel)
-    # FIXME(zcd): in the inference stage, the number of
-    # input data is one, it is not appropriate to use parallel.
-    if parallel and use_cuda:
-        return
-    os.environ['CPU_NUM'] = str(1)
-    infer(
-        use_cuda=use_cuda,
-        inference_program=inference_program,
-        params_dirname=params_dirname,
-        parallel=parallel)
-if __name__ == '__main__':
-    for use_cuda in (False, True):
-        for parallel in (False, True):
-            if use_cuda and not core.is_compiled_with_cuda():
-                continue
-            main(use_cuda=use_cuda, parallel=parallel)
--- a/python/paddle/fluid/tests/book/high-level-api/test_recommender_system_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_recommender_system_new_api.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import math
-import sys
-import numpy as np
-import paddle
-import paddle.fluid as fluid
-import sys
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-import paddle.fluid.layers as layers
-import paddle.fluid.nets as nets
-IS_SPARSE = True
-USE_GPU = False
-BATCH_SIZE = 256
-def get_usr_combined_features():
-    # FIXME(dzh) : old API integer_value(10) may have range check.
-    # currently we don't have user configurated check.
-    USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1
-    uid = layers.data(name='user_id', shape=[1], dtype='int64')
-    usr_emb = layers.embedding(
-        input=uid,
-        dtype='float32',
-        size=[USR_DICT_SIZE, 32],
-        param_attr='user_table',
-        is_sparse=IS_SPARSE)
-    usr_fc = layers.fc(input=usr_emb, size=32)
-    USR_GENDER_DICT_SIZE = 2
-    usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64')
-    usr_gender_emb = layers.embedding(
-        input=usr_gender_id,
-        size=[USR_GENDER_DICT_SIZE, 16],
-        param_attr='gender_table',
-        is_sparse=IS_SPARSE)
-    usr_gender_fc = layers.fc(input=usr_gender_emb, size=16)
-    USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
-    usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64")
-    usr_age_emb = layers.embedding(
-        input=usr_age_id,
-        size=[USR_AGE_DICT_SIZE, 16],
-        is_sparse=IS_SPARSE,
-        param_attr='age_table')
-    usr_age_fc = layers.fc(input=usr_age_emb, size=16)
-    USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
-    usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64")
-    usr_job_emb = layers.embedding(
-        input=usr_job_id,
-        size=[USR_JOB_DICT_SIZE, 16],
-        param_attr='job_table',
-        is_sparse=IS_SPARSE)
-    usr_job_fc = layers.fc(input=usr_job_emb, size=16)
-    concat_embed = layers.concat(
-        input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], axis=1)
-    usr_combined_features = layers.fc(input=concat_embed, size=200, act="tanh")
-    return usr_combined_features
-def get_mov_combined_features():
-    MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1
-    mov_id = layers.data(name='movie_id', shape=[1], dtype='int64')
-    mov_emb = layers.embedding(
-        input=mov_id,
-        dtype='float32',
-        size=[MOV_DICT_SIZE, 32],
-        param_attr='movie_table',
-        is_sparse=IS_SPARSE)
-    mov_fc = layers.fc(input=mov_emb, size=32)
-    CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
-    category_id = layers.data(
-        name='category_id', shape=[1], dtype='int64', lod_level=1)
-    mov_categories_emb = layers.embedding(
-        input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE)
-    mov_categories_hidden = layers.sequence_pool(
-        input=mov_categories_emb, pool_type="sum")
-    MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
-    mov_title_id = layers.data(
-        name='movie_title', shape=[1], dtype='int64', lod_level=1)
-    mov_title_emb = layers.embedding(
-        input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE)
-    mov_title_conv = nets.sequence_conv_pool(
-        input=mov_title_emb,
-        num_filters=32,
-        filter_size=3,
-        act="tanh",
-        pool_type="sum")
-    concat_embed = layers.concat(
-        input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1)
-    # FIXME(dzh) : need tanh operator
-    mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh")
-    return mov_combined_features
-def inference_program():
-    usr_combined_features = get_usr_combined_features()
-    mov_combined_features = get_mov_combined_features()
-    inference = layers.cos_sim(X=usr_combined_features, Y=mov_combined_features)
-    scale_infer = layers.scale(x=inference, scale=5.0)
-    return scale_infer
-def train_program():
-    scale_infer = inference_program()
-    label = layers.data(name='score', shape=[1], dtype='float32')
-    square_cost = layers.square_error_cost(input=scale_infer, label=label)
-    avg_cost = layers.mean(square_cost)
-    return [avg_cost, scale_infer]
-def optimizer_func():
-    return fluid.optimizer.SGD(learning_rate=0.2)
-def train(use_cuda, train_program, params_dirname):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    trainer = Trainer(
-        train_func=train_program, place=place, optimizer_func=optimizer_func)
-    feed_order = [
-        'user_id', 'gender_id', 'age_id', 'job_id', 'movie_id', 'category_id',
-        'movie_title', 'score'
-    ]
-    def event_handler(event):
-        if isinstance(event, EndStepEvent):
-            test_reader = paddle.batch(
-                paddle.dataset.movielens.test(), batch_size=BATCH_SIZE)
-            avg_cost_set = trainer.test(
-                reader=test_reader, feed_order=feed_order)
-            # get avg cost
-            avg_cost = np.array(avg_cost_set).mean()
-            print("avg_cost: %s" % avg_cost)
-            if float(avg_cost) < 4:  # Smaller value to increase CI speed
-                trainer.save_params(params_dirname)
-                trainer.stop()
-            else:
-                print(
-                    ('BatchID {0}, Test Loss {1:0.2}'.format(event.epoch + 1,
-                                                             float(avg_cost))))
-                if math.isnan(float(avg_cost)):
-                    sys.exit("got NaN loss, training failed.")
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.movielens.train(), buf_size=8192),
-        batch_size=BATCH_SIZE)
-    trainer.train(
-        num_epochs=1,
-        event_handler=event_handler,
-        reader=train_reader,
-        feed_order=feed_order)
-def infer(use_cuda, inference_program, params_dirname):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    inferencer = Inferencer(
-        inference_program, param_path=params_dirname, place=place)
-    # Use the first data from paddle.dataset.movielens.test() as input.
-    # Use create_lod_tensor(data, recursive_sequence_lengths, place) API 
-    # to generate LoD Tensor where `data` is a list of sequences of index 
-    # numbers, `recursive_sequence_lengths` is the length-based level of detail 
-    # (lod) info associated with `data`.
-    # For example, data = [[10, 2, 3], [2, 3]] means that it contains
-    # two sequences of indexes, of length 3 and 2, respectively.
-    # Correspondingly, recursive_sequence_lengths = [[3, 2]] contains one 
-    # level of detail info, indicating that `data` consists of two sequences 
-    # of length 3 and 2, respectively. 
-    user_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place)
-    gender_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place)
-    age_id = fluid.create_lod_tensor([[np.int64(0)]], [[1]], place)
-    job_id = fluid.create_lod_tensor([[np.int64(10)]], [[1]], place)
-    movie_id = fluid.create_lod_tensor([[np.int64(783)]], [[1]], place)
-    category_id = fluid.create_lod_tensor(
-        [np.array(
-            [10, 8, 9], dtype='int64')], [[3]], place)
-    movie_title = fluid.create_lod_tensor(
-        [np.array(
-            [1069, 4140, 2923, 710, 988], dtype='int64')], [[5]], place)
-    results = inferencer.infer(
-        {
-            'user_id': user_id,
-            'gender_id': gender_id,
-            'age_id': age_id,
-            'job_id': job_id,
-            'movie_id': movie_id,
-            'category_id': category_id,
-            'movie_title': movie_title
-        },
-        return_numpy=False)
-    print("infer results: ", np.array(results[0]))
-def main(use_cuda):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    params_dirname = "recommender_system.inference.model"
-    train(
-        use_cuda=use_cuda,
-        train_program=train_program,
-        params_dirname=params_dirname)
-    infer(
-        use_cuda=use_cuda,
-        inference_program=inference_program,
-        params_dirname=params_dirname)
-if __name__ == '__main__':
-    main(USE_GPU)
--- a/python/paddle/fluid/tests/book/high-level-api/test_understand_sentiment_conv_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_understand_sentiment_conv_new_api.py
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import paddle
-import paddle.fluid as fluid
-import sys
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-from functools import partial
-import numpy as np
-CLASS_DIM = 2
-EMB_DIM = 128
-HID_DIM = 512
-BATCH_SIZE = 128
-def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
-    emb = fluid.layers.embedding(
-        input=data, size=[input_dim, emb_dim], is_sparse=True)
-    conv_3 = fluid.nets.sequence_conv_pool(
-        input=emb,
-        num_filters=hid_dim,
-        filter_size=3,
-        act="tanh",
-        pool_type="sqrt")
-    conv_4 = fluid.nets.sequence_conv_pool(
-        input=emb,
-        num_filters=hid_dim,
-        filter_size=4,
-        act="tanh",
-        pool_type="sqrt")
-    prediction = fluid.layers.fc(input=[conv_3, conv_4],
-                                 size=class_dim,
-                                 act="softmax")
-    return prediction
-def inference_program(word_dict):
-    data = fluid.layers.data(
-        name="words", shape=[1], dtype="int64", lod_level=1)
-    dict_dim = len(word_dict)
-    net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM)
-    return net
-def train_program(word_dict):
-    prediction = inference_program(word_dict)
-    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
-    cost = fluid.layers.cross_entropy(input=prediction, label=label)
-    avg_cost = fluid.layers.mean(cost)
-    accuracy = fluid.layers.accuracy(input=prediction, label=label)
-    return [avg_cost, accuracy]
-def optimizer_func():
-    return fluid.optimizer.Adagrad(learning_rate=0.002)
-def train(use_cuda, train_program, params_dirname):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    word_dict = paddle.dataset.imdb.word_dict()
-    trainer = Trainer(
-        train_func=partial(train_program, word_dict),
-        place=place,
-        optimizer_func=optimizer_func)
-    def event_handler(event):
-        if isinstance(event, EndEpochEvent):
-            test_reader = paddle.batch(
-                paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
-            avg_cost, acc = trainer.test(
-                reader=test_reader, feed_order=['words', 'label'])
-            print("avg_cost: %s" % avg_cost)
-            print("acc     : %s" % acc)
-            if acc > 0.2:  # Smaller value to increase CI speed
-                trainer.save_params(params_dirname)
-                trainer.stop()
-            else:
-                print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
-                    event.epoch + 1, avg_cost, acc))
-                if math.isnan(avg_cost):
-                    sys.exit("got NaN loss, training failed.")
-        elif isinstance(event, EndStepEvent):
-            print("Step {0}, Epoch {1} Metrics {2}".format(
-                event.step, event.epoch, list(map(np.array, event.metrics))))
-            if event.step == 1:  # Run 2 iterations to speed CI
-                trainer.save_params(params_dirname)
-                trainer.stop()
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.imdb.train(word_dict), buf_size=25000),
-        batch_size=BATCH_SIZE)
-    trainer.train(
-        num_epochs=1,
-        event_handler=event_handler,
-        reader=train_reader,
-        feed_order=['words', 'label'])
-def infer(use_cuda, inference_program, params_dirname=None):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    word_dict = paddle.dataset.imdb.word_dict()
-    inferencer = Inferencer(
-        infer_func=partial(inference_program, word_dict),
-        param_path=params_dirname,
-        place=place)
-    # Setup input by creating LoDTensor to represent sequence of words.
-    # Here each word is the basic element of the LoDTensor and the shape of
-    # each word (base_shape) should be [1] since it is simply an index to
-    # look up for the corresponding word vector.
-    # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]],
-    # which has only one level of detail. Then the created LoDTensor will have only
-    # one higher level structure (sequence of words, or sentence) than the basic
-    # element (word). Hence the LoDTensor will hold data for three sentences of
-    # length 3, 4 and 2, respectively.
-    # Note that recursive_sequence_lengths should be a list of lists.
-    recursive_seq_lens = [[3, 4, 2]]
-    base_shape = [1]
-    # The range of random integers is [low, high]
-    tensor_words = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=len(word_dict) - 1)
-    results = inferencer.infer({'words': tensor_words})
-    print("infer results: ", results)
-def main(use_cuda):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    params_dirname = "understand_sentiment_conv.inference.model"
-    train(use_cuda, train_program, params_dirname)
-    infer(use_cuda, inference_program, params_dirname)
-if __name__ == '__main__':
-    for use_cuda in (False, True):
-        main(use_cuda=use_cuda)
--- a/python/paddle/fluid/tests/book/high-level-api/test_understand_sentiment_dynamic_rnn_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_understand_sentiment_dynamic_rnn_new_api.py
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import paddle
-import paddle.fluid as fluid
-import sys
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-from functools import partial
-import numpy as np
-CLASS_DIM = 2
-EMB_DIM = 128
-BATCH_SIZE = 128
-LSTM_SIZE = 128
-def dynamic_rnn_lstm(data, input_dim, class_dim, emb_dim, lstm_size):
-    emb = fluid.layers.embedding(
-        input=data, size=[input_dim, emb_dim], is_sparse=True)
-    sentence = fluid.layers.fc(input=emb, size=lstm_size, act='tanh')
-    rnn = fluid.layers.DynamicRNN()
-    with rnn.block():
-        word = rnn.step_input(sentence)
-        prev_hidden = rnn.memory(value=0.0, shape=[lstm_size])
-        prev_cell = rnn.memory(value=0.0, shape=[lstm_size])
-        def gate_common(ipt, hidden, size):
-            gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True)
-            gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False)
-            return gate0 + gate1
-        forget_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
-                                                         lstm_size))
-        input_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
-                                                        lstm_size))
-        output_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
-                                                         lstm_size))
-        cell_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
-                                                       lstm_size))
-        cell = forget_gate * prev_cell + input_gate * cell_gate
-        hidden = output_gate * fluid.layers.tanh(x=cell)
-        rnn.update_memory(prev_cell, cell)
-        rnn.update_memory(prev_hidden, hidden)
-        rnn.output(hidden)
-    last = fluid.layers.sequence_last_step(rnn())
-    prediction = fluid.layers.fc(input=last, size=class_dim, act="softmax")
-    return prediction
-def inference_program(word_dict):
-    data = fluid.layers.data(
-        name="words", shape=[1], dtype="int64", lod_level=1)
-    dict_dim = len(word_dict)
-    pred = dynamic_rnn_lstm(data, dict_dim, CLASS_DIM, EMB_DIM, LSTM_SIZE)
-    return pred
-def train_program(word_dict):
-    prediction = inference_program(word_dict)
-    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
-    cost = fluid.layers.cross_entropy(input=prediction, label=label)
-    avg_cost = fluid.layers.mean(cost)
-    accuracy = fluid.layers.accuracy(input=prediction, label=label)
-    return [avg_cost, accuracy]
-def optimizer_func():
-    return fluid.optimizer.Adagrad(learning_rate=0.002)
-def train(use_cuda, train_program, params_dirname):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    word_dict = paddle.dataset.imdb.word_dict()
-    trainer = Trainer(
-        train_func=partial(train_program, word_dict),
-        place=place,
-        optimizer_func=optimizer_func)
-    def event_handler(event):
-        if isinstance(event, EndEpochEvent):
-            test_reader = paddle.batch(
-                paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
-            avg_cost, acc = trainer.test(
-                reader=test_reader, feed_order=['words', 'label'])
-            print("avg_cost: %s" % avg_cost)
-            print("acc     : %s" % acc)
-            if acc > 0.2:  # Smaller value to increase CI speed
-                trainer.save_params(params_dirname)
-                trainer.stop()
-            else:
-                print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
-                    event.epoch + 1, avg_cost, acc))
-                if math.isnan(avg_cost):
-                    sys.exit("got NaN loss, training failed.")
-        elif isinstance(event, EndStepEvent):
-            print("Step {0}, Epoch {1} Metrics {2}".format(
-                event.step, event.epoch, list(map(np.array, event.metrics))))
-            if event.step == 1:  # Run 2 iterations to speed CI
-                trainer.save_params(params_dirname)
-                trainer.stop()
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.imdb.train(word_dict), buf_size=25000),
-        batch_size=BATCH_SIZE)
-    trainer.train(
-        num_epochs=1,
-        event_handler=event_handler,
-        reader=train_reader,
-        feed_order=['words', 'label'])
-def infer(use_cuda, inference_program, params_dirname=None):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    word_dict = paddle.dataset.imdb.word_dict()
-    inferencer = Inferencer(
-        infer_func=partial(inference_program, word_dict),
-        param_path=params_dirname,
-        place=place)
-    # Setup input by creating LoDTensor to represent sequence of words.
-    # Here each word is the basic element of the LoDTensor and the shape of
-    # each word (base_shape) should be [1] since it is simply an index to
-    # look up for the corresponding word vector.
-    # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]],
-    # which has only one level of detail. Then the created LoDTensor will have only
-    # one higher level structure (sequence of words, or sentence) than the basic
-    # element (word). Hence the LoDTensor will hold data for three sentences of
-    # length 3, 4 and 2, respectively.
-    # Note that recursive_sequence_lengths should be a list of lists.
-    recursive_seq_lens = [[3, 4, 2]]
-    base_shape = [1]
-    # The range of random integers is [low, high]
-    tensor_words = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=len(word_dict) - 1)
-    results = inferencer.infer({'words': tensor_words})
-    print("infer results: ", results)
-def main(use_cuda):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    params_dirname = "understand_sentiment_conv.inference.model"
-    train(use_cuda, train_program, params_dirname)
-    infer(use_cuda, inference_program, params_dirname)
-if __name__ == '__main__':
-    for use_cuda in (False, True):
-        main(use_cuda=use_cuda)
--- a/python/paddle/fluid/tests/book/high-level-api/test_understand_sentiment_stacked_lstm_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_understand_sentiment_stacked_lstm_new_api.py
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import paddle
-import paddle.fluid as fluid
-import sys
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-from functools import partial
-import numpy as np
-CLASS_DIM = 2
-EMB_DIM = 128
-HID_DIM = 512
-STACKED_NUM = 3
-BATCH_SIZE = 128
-def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
-    assert stacked_num % 2 == 1
-    emb = fluid.layers.embedding(
-        input=data, size=[input_dim, emb_dim], is_sparse=True)
-    fc1 = fluid.layers.fc(input=emb, size=hid_dim)
-    lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hid_dim)
-    inputs = [fc1, lstm1]
-    for i in range(2, stacked_num + 1):
-        fc = fluid.layers.fc(input=inputs, size=hid_dim)
-        lstm, cell = fluid.layers.dynamic_lstm(
-            input=fc, size=hid_dim, is_reverse=(i % 2) == 0)
-        inputs = [fc, lstm]
-    fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
-    lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max')
-    prediction = fluid.layers.fc(input=[fc_last, lstm_last],
-                                 size=class_dim,
-                                 act='softmax')
-    return prediction
-def inference_program(word_dict):
-    data = fluid.layers.data(
-        name="words", shape=[1], dtype="int64", lod_level=1)
-    dict_dim = len(word_dict)
-    net = stacked_lstm_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM,
-                           STACKED_NUM)
-    return net
-def train_program(word_dict):
-    prediction = inference_program(word_dict)
-    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
-    cost = fluid.layers.cross_entropy(input=prediction, label=label)
-    avg_cost = fluid.layers.mean(cost)
-    accuracy = fluid.layers.accuracy(input=prediction, label=label)
-    return [avg_cost, accuracy]
-def optimizer_func():
-    return fluid.optimizer.Adagrad(learning_rate=0.002)
-def train(use_cuda, train_program, params_dirname):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    word_dict = paddle.dataset.imdb.word_dict()
-    trainer = Trainer(
-        train_func=partial(train_program, word_dict),
-        place=place,
-        optimizer_func=optimizer_func)
-    def event_handler(event):
-        if isinstance(event, EndEpochEvent):
-            test_reader = paddle.batch(
-                paddle.dataset.imdb.test(word_dict),
-                batch_size=BATCH_SIZE,
-                drop_last=False)
-            avg_cost, acc = trainer.test(
-                reader=test_reader, feed_order=['words', 'label'])
-            print("avg_cost: %s" % avg_cost)
-            print("acc     : %s" % acc)
-            if acc > 0.2:  # Smaller value to increase CI speed
-                trainer.save_params(params_dirname)
-                trainer.stop()
-            else:
-                print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
-                    event.epoch + 1, avg_cost, acc))
-                if math.isnan(avg_cost):
-                    sys.exit("got NaN loss, training failed.")
-        elif isinstance(event, EndStepEvent):
-            print("Step {0}, Epoch {1} Metrics {2}".format(
-                event.step, event.epoch, list(map(np.array, event.metrics))))
-            if event.step == 1:  # Run 2 iterations to speed CI
-                trainer.save_params(params_dirname)
-                trainer.stop()
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.imdb.train(word_dict), buf_size=25000),
-        batch_size=BATCH_SIZE,
-        drop_last=False)
-    trainer.train(
-        num_epochs=1,
-        event_handler=event_handler,
-        reader=train_reader,
-        feed_order=['words', 'label'])
-def infer(use_cuda, inference_program, params_dirname=None):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    word_dict = paddle.dataset.imdb.word_dict()
-    inferencer = Inferencer(
-        infer_func=partial(inference_program, word_dict),
-        param_path=params_dirname,
-        place=place)
-    # Setup input by creating LoDTensor to represent sequence of words.
-    # Here each word is the basic element of the LoDTensor and the shape of
-    # each word (base_shape) should be [1] since it is simply an index to
-    # look up for the corresponding word vector.
-    # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]],
-    # which has only one level of detail. Then the created LoDTensor will have only
-    # one higher level structure (sequence of words, or sentence) than the basic
-    # element (word). Hence the LoDTensor will hold data for three sentences of
-    # length 3, 4 and 2, respectively.
-    # Note that recursive_sequence_lengths should be a list of lists.
-    recursive_seq_lens = [[3, 4, 2]]
-    base_shape = [1]
-    # The range of random integers is [low, high]
-    tensor_words = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=len(word_dict) - 1)
-    results = inferencer.infer({'words': tensor_words})
-    print("infer results: ", results)
-def main(use_cuda):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    params_dirname = "understand_sentiment_stacked_lstm.inference.model"
-    train(use_cuda, train_program, params_dirname)
-    infer(use_cuda, inference_program, params_dirname)
-if __name__ == '__main__':
-    for use_cuda in (False, True):
-        main(use_cuda=use_cuda)
--- a/python/paddle/fluid/tests/book/high-level-api/test_word2vec_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_word2vec_new_api.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import paddle
-import paddle.fluid as fluid
-import sys
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-import numpy as np
-import math
-import sys
-from functools import partial
-PASS_NUM = 100
-EMBED_SIZE = 32
-HIDDEN_SIZE = 256
-N = 5
-BATCH_SIZE = 32
-word_dict = paddle.dataset.imikolov.build_dict()
-dict_size = len(word_dict)
-def inference_program(is_sparse):
-    first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
-    second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
-    third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
-    forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64')
-    embed_first = fluid.layers.embedding(
-        input=first_word,
-        size=[dict_size, EMBED_SIZE],
-        dtype='float32',
-        is_sparse=is_sparse,
-        param_attr='shared_w')
-    embed_second = fluid.layers.embedding(
-        input=second_word,
-        size=[dict_size, EMBED_SIZE],
-        dtype='float32',
-        is_sparse=is_sparse,
-        param_attr='shared_w')
-    embed_third = fluid.layers.embedding(
-        input=third_word,
-        size=[dict_size, EMBED_SIZE],
-        dtype='float32',
-        is_sparse=is_sparse,
-        param_attr='shared_w')
-    embed_forth = fluid.layers.embedding(
-        input=forth_word,
-        size=[dict_size, EMBED_SIZE],
-        dtype='float32',
-        is_sparse=is_sparse,
-        param_attr='shared_w')
-    concat_embed = fluid.layers.concat(
-        input=[embed_first, embed_second, embed_third, embed_forth], axis=1)
-    hidden1 = fluid.layers.fc(input=concat_embed,
-                              size=HIDDEN_SIZE,
-                              act='sigmoid')
-    predict_word = fluid.layers.fc(input=hidden1, size=dict_size, act='softmax')
-    return predict_word
-def train_program(is_sparse):
-    # The declaration of 'next_word' must be after the invoking of inference_program,
-    # or the data input order of train program would be [next_word, firstw, secondw,
-    # thirdw, forthw], which is not correct.
-    predict_word = inference_program(is_sparse)
-    next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
-    cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
-    avg_cost = fluid.layers.mean(cost)
-    return avg_cost
-def optimizer_func():
-    return fluid.optimizer.SGD(learning_rate=0.001)
-def train(use_cuda, train_program, params_dirname):
-    train_reader = paddle.batch(
-        paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE)
-    test_reader = paddle.batch(
-        paddle.dataset.imikolov.test(word_dict, N), BATCH_SIZE)
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    def event_handler(event):
-        if isinstance(event, EndStepEvent):
-            outs = trainer.test(
-                reader=test_reader,
-                feed_order=['firstw', 'secondw', 'thirdw', 'forthw', 'nextw'])
-            avg_cost = outs[0]
-            print("loss= ", avg_cost)
-            if avg_cost < 10.0:
-                trainer.save_params(params_dirname)
-                trainer.stop()
-            if math.isnan(avg_cost):
-                sys.exit("got NaN loss, training failed.")
-    trainer = Trainer(
-        train_func=train_program, optimizer_func=optimizer_func, place=place)
-    trainer.train(
-        reader=train_reader,
-        num_epochs=1,
-        event_handler=event_handler,
-        feed_order=['firstw', 'secondw', 'thirdw', 'forthw', 'nextw'])
-def infer(use_cuda, inference_program, params_dirname=None):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    inferencer = Inferencer(
-        infer_func=inference_program, param_path=params_dirname, place=place)
-    # Setup inputs by creating 4 LoDTensors representing 4 words. Here each word 
-    # is simply an index to look up for the corresponding word vector and hence 
-    # the shape of word (base_shape) should be [1]. The recursive_sequence_lengths, 
-    # which is length-based level of detail (lod) of each LoDTensor, should be [[1]] 
-    # meaning there is only one level of detail and there is only one sequence of 
-    # one word on this level.
-    # Note that recursive_sequence_lengths should be a list of lists.
-    recursive_seq_lens = [[1]]
-    base_shape = [1]
-    # The range of random integers is [low, high]
-    first_word = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1)
-    second_word = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1)
-    third_word = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1)
-    fourth_word = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1)
-    result = inferencer.infer(
-        {
-            'firstw': first_word,
-            'secondw': second_word,
-            'thirdw': third_word,
-            'forthw': fourth_word
-        },
-        return_numpy=False)
-    print(np.array(result[0]))
-def main(use_cuda, is_sparse):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    params_dirname = "word2vec.inference.model"
-    train(
-        use_cuda=use_cuda,
-        train_program=partial(train_program, is_sparse),
-        params_dirname=params_dirname)
-    infer(
-        use_cuda=use_cuda,
-        inference_program=partial(inference_program, is_sparse),
-        params_dirname=params_dirname)
-if __name__ == '__main__':
-    for use_cuda in (False, True):
-        for is_sparse in (False, True):
-            main(use_cuda=use_cuda, is_sparse=is_sparse)