remove high level api (#23854) (#23991)

b312cb89 · zhangchunle · GitHub · e5985675 · b312cb89 · b312cb89
21 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -83,7 +83,6 @@ option(WITH_CONTRIB     "Compile the third-party contributation"        OFF)
 option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF)
 option(WITH_GRPC     "Use grpc as the default rpc framework"            ${WITH_DISTRIBUTE})
 option(WITH_INFERENCE_API_TEST   "Test fluid inference C++ high-level api interface"  OFF)
-option(WITH_HIGH_LEVEL_API_TEST   "Test fluid python high-level api interface"  OFF)
 option(PY_VERSION       "Compile PaddlePaddle with python3 support"     ${PY_VERSION})
 option(WITH_DGC   "Use DGC(Deep Gradient Compression) or not" ${WITH_DISTRIBUTE})
 option(SANITIZER_TYPE "Choose the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined" OFF)

--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -211,7 +211,6 @@ function cmake_base() {
        -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
        -DWITH_CONTRIB=${WITH_CONTRIB:-ON}
        -DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON}
-        -DWITH_HIGH_LEVEL_API_TEST=${WITH_HIGH_LEVEL_API_TEST:-OFF}
        -DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR}
        -DPY_VERSION=${PY_VERSION:-2.7}
        -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build}
@@ -244,7 +243,6 @@ EOF
        -DWITH_CONTRIB=${WITH_CONTRIB:-ON} \
        -DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON} \
        -DWITH_HIGH_LEVEL_API_TEST=${WITH_HIGH_LEVEL_API_TEST:-OFF} \
-        -DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} \
        -DPY_VERSION=${PY_VERSION:-2.7} \
        -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build} \
        -DWITH_GRPC=${grpc_flag} \

--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
@@ -47,7 +47,6 @@ from .dataset import *
 from .data import *
 from . import trainer_desc
-from . import inferencer
 from . import io
 from . import evaluator
@@ -92,7 +91,7 @@ from .dygraph.varbase_patch_methods import monkey_patch_varbase
 Tensor = LoDTensor
 __all__ = framework.__all__ + executor.__all__ + \
-    trainer_desc.__all__ + inferencer.__all__ + transpiler.__all__ + \
+    trainer_desc.__all__ + transpiler.__all__ + \
    parallel_executor.__all__ + lod_tensor.__all__ + \
    data_feed_desc.__all__ + compiler.__all__ + backward.__all__  + [
        'io',

--- a/python/paddle/fluid/contrib/inferencer.py
+++ b/python/paddle/fluid/contrib/inferencer.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-from ..wrapped_decorator import signature_safe_contextmanager
-from .. import core
-from .. import executor
-from .. import framework
-from .. import io
-from .. import parallel_executor
-from .. import unique_name
-from .trainer import check_and_get_place
-__all__ = ['Inferencer', ]
-class Inferencer(object):
-    """
-    Inferencer High Level API.
-    Args:
-        infer_func (Python func): Infer function that will return predict Variable
-        param_path (str): The path where the inference model is saved by fluid.io.save_params
-        place (Place): place to do the inference
-        parallel (bool): use parallel_executor to run the inference, it will use multi CPU/GPU.
-    Examples:
-        .. code-block:: python
-            def inference_program():
-                x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-                y_predict = fluid.layers.fc(input=x, size=1, act=None)
-                return y_predict
-            place = fluid.CPUPlace()
-            inferencer = fluid.Inferencer(
-                infer_func=inference_program, param_path="/tmp/model", place=place)
-    """
-    def __init__(self, infer_func, param_path, place=None, parallel=False):
-        self.param_path = param_path
-        self.scope = core.Scope()
-        self.parallel = parallel
-        self.place = check_and_get_place(place)
-        self.inference_program = framework.Program()
-        with framework.program_guard(self.inference_program):
-            with unique_name.guard():
-                self.predict_var = infer_func()
-        with self._prog_and_scope_guard():
-            # load params from param_path into scope
-            io.load_params(executor.Executor(self.place), param_path)
-        if parallel:
-            with self._prog_and_scope_guard():
-                self.exe = parallel_executor.ParallelExecutor(
-                    use_cuda=isinstance(self.place, core.CUDAPlace),
-                    loss_name=self.predict_var.name)
-        else:
-            self.exe = executor.Executor(self.place)
-        self.inference_program = self.inference_program.clone(for_test=True)
-    def infer(self, inputs, return_numpy=True):
-        """
-        Do Inference for Inputs
-        Args:
-            inputs (map): a map of {"input_name": input_var} that will be feed into the inference program
-            return_numpy (bool): transform return value into numpy or not
-        Returns:
-            Tensor or Numpy: the predict value of the inference model for the inputs
-        Examples:
-            .. code-block:: python
-                tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
-                results = inferencer.infer({'x': tensor_x})
-        """
-        if not isinstance(inputs, dict):
-            raise ValueError(
-                "inputs should be a map of {'input_name': input_var}")
-        with self._prog_and_scope_guard():
-            results = self.exe.run(feed=inputs,
-                                   fetch_list=[self.predict_var.name],
-                                   return_numpy=return_numpy)
-        return results
-    @signature_safe_contextmanager
-    def _prog_and_scope_guard(self):
-        with framework.program_guard(main_program=self.inference_program):
-            with executor.scope_guard(self.scope):
-                yield
--- a/python/paddle/fluid/contrib/trainer.py
+++ b/python/paddle/fluid/contrib/trainer.py
--- a/python/paddle/fluid/inferencer.py
+++ b/python/paddle/fluid/inferencer.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# NOTE: inferencer is moved into fluid.contrib.inferencer.
-__all__ = []
--- a/python/paddle/fluid/tests/book/CMakeLists.txt
+++ b/python/paddle/fluid/tests/book/CMakeLists.txt
@@ -5,7 +5,3 @@ string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
 foreach(src ${TEST_OPS})
    py_test(${src} SRCS ${src}.py)
 endforeach()
-if(WITH_HIGH_LEVEL_API_TEST)
-  add_subdirectory(high-level-api)
-endif()
--- a/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt
+++ b/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt
-file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*_new_api.py")
-string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
-# This test is buggy
-# py_test(test_understand_sentiment_dynamic_rnn SRCS
-# 	test_understand_sentiment_dynamic_rnn.py SERIAL)
-LIST(REMOVE_ITEM TEST_OPS test_understand_sentiment_dynamic_rnn_new_api)
-if(NOT APPLE)
-    # default test
-    foreach(src ${TEST_OPS})
-        py_test(${src} SRCS ${src}.py)
-    endforeach()
-else()
-    foreach(src ${TEST_OPS})
-        if(${src} STREQUAL "test_image_classification_vgg_new_api")
-            message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
-        elseif(${src} STREQUAL "test_image_classification_resnet_new_api")
-            message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
-        elseif(${src} STREQUAL "test_recognize_digits_conv_new_api")
-            message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
-        elseif(${src} STREQUAL "test_recognize_digits_mlp_new_api")
-            message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
-        elseif()
-            py_test(${src} SRCS ${src}.py)
-            set_tests_properties(${src} PROPERTIES LABELS "RUN_TYPE=DIST")
-        endif()
-    endforeach()
-endif()
--- a/python/paddle/fluid/tests/book/high-level-api/cifar10_small_test_set.py
+++ b/python/paddle/fluid/tests/book/high-level-api/cifar10_small_test_set.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-CIFAR dataset.
-This module will download dataset from
-https://www.cs.toronto.edu/~kriz/cifar.html and parse train/test set into
-paddle reader creators.
-The CIFAR-10 dataset consists of 60000 32x32 color images in 10 classes,
-with 6000 images per class. There are 50000 training images and 10000 test
-images.
-The CIFAR-100 dataset is just like the CIFAR-10, except it has 100 classes
-containing 600 images each. There are 500 training images and 100 testing
-images per class.
-"""
-from __future__ import print_function
-import itertools
-import numpy
-import paddle.dataset.common
-import tarfile
-import six
-from six.moves import cPickle as pickle
-__all__ = ['train10']
-URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/'
-CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz'
-CIFAR10_MD5 = 'c58f30108f718f92721af3b95e74349a'
-def reader_creator(filename, sub_name, batch_size=None):
-    def read_batch(batch):
-        data = batch[six.b('data')]
-        labels = batch.get(
-            six.b('labels'), batch.get(six.b('fine_labels'), None))
-        assert labels is not None
-        for sample, label in six.moves.zip(data, labels):
-            yield (sample / 255.0).astype(numpy.float32), int(label)
-    def reader():
-        with tarfile.open(filename, mode='r') as f:
-            names = [
-                each_item.name for each_item in f if sub_name in each_item.name
-            ]
-            batch_count = 0
-            for name in names:
-                if six.PY2:
-                    batch = pickle.load(f.extractfile(name))
-                else:
-                    batch = pickle.load(f.extractfile(name), encoding='bytes')
-                for item in read_batch(batch):
-                    if isinstance(batch_size, int) and batch_count > batch_size:
-                        break
-                    batch_count += 1
-                    yield item
-    return reader
-def train10(batch_size=None):
-    """
-    CIFAR-10 training set creator.
-    It returns a reader creator, each sample in the reader is image pixels in
-    [0, 1] and label in [0, 9].
-    :return: Training reader creator
-    :rtype: callable
-    """
-    return reader_creator(
-        paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
-        'data_batch',
-        batch_size=batch_size)
-def test10(batch_size=None):
-    """
-    CIFAR-10 test set creator.
-    It returns a reader creator, each sample in the reader is image pixels in
-    [0, 1] and label in [0, 9].
-    :return: Test reader creator.
-    :rtype: callable
-    """
-    return reader_creator(
-        paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
-        'test_batch',
-        batch_size=batch_size)
--- a/python/paddle/fluid/tests/book/high-level-api/test_fit_a_line_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_fit_a_line_new_api.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import paddle
-import paddle.fluid as fluid
-import sys
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-import contextlib
-import numpy
-import unittest
-# train reader
-BATCH_SIZE = 20
-train_reader = paddle.batch(
-    paddle.reader.shuffle(
-        paddle.dataset.uci_housing.train(), buf_size=500),
-    batch_size=BATCH_SIZE)
-test_reader = paddle.batch(
-    paddle.reader.shuffle(
-        paddle.dataset.uci_housing.test(), buf_size=500),
-    batch_size=BATCH_SIZE)
-def inference_program():
-    x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-    y_predict = fluid.layers.fc(input=x, size=1, act=None)
-    return y_predict
-def train_program():
-    y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-    y_predict = inference_program()
-    loss = fluid.layers.square_error_cost(input=y_predict, label=y)
-    avg_loss = fluid.layers.mean(loss)
-    return [avg_loss, y_predict]
-def optimizer_func():
-    return fluid.optimizer.SGD(learning_rate=0.001)
-def train(use_cuda, train_program, params_dirname, inference_model_dirname):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    trainer = Trainer(
-        train_func=train_program, place=place, optimizer_func=optimizer_func)
-    def event_handler(event):
-        if isinstance(event, EndStepEvent):
-            if event.step == 10:
-                test_metrics = trainer.test(
-                    reader=test_reader, feed_order=['x', 'y'])
-                print(test_metrics)
-                '''
-                ...
-                ['25.768919467926025']
-                ['15.343549569447836']
-                ...
-                '''
-                if params_dirname is not None:
-                    trainer.save_params(params_dirname)
-                    trainer.save_inference_model(inference_model_dirname,
-                                                 ['x'], [1])
-                trainer.stop()
-    trainer.train(
-        reader=train_reader,
-        num_epochs=100,
-        event_handler=event_handler,
-        feed_order=['x', 'y'])
-# infer
-def infer(use_cuda, inference_program, params_dirname=None):
-    if params_dirname is None:
-        return
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    inferencer = Inferencer(
-        infer_func=inference_program, param_path=params_dirname, place=place)
-    batch_size = 10
-    tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
-    results = inferencer.infer({'x': tensor_x})
-    print("infer results: ", results[0])
-def infer_by_saved_model(use_cuda, save_dirname=None):
-    if save_dirname is None:
-        return
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    exe = fluid.Executor(place)
-    inference_scope = fluid.core.Scope()
-    with fluid.scope_guard(inference_scope):
-        # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be fed
-        # data using feed operators), and the fetch_targets (variables that
-        # we want to obtain data from using fetch operators).
-        [inference_program, feed_target_names,
-         fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
-        # The input's dimension should be 2-D and the second dim is 13
-        # The input data should be >= 0
-        batch_size = 10
-        test_reader = paddle.batch(
-            paddle.dataset.uci_housing.test(), batch_size=batch_size)
-        test_data = next(test_reader())
-        test_feat = numpy.array(
-            [data[0] for data in test_data]).astype("float32")
-        test_label = numpy.array(
-            [data[1] for data in test_data]).astype("float32")
-        assert feed_target_names[0] == 'x'
-        results = exe.run(inference_program,
-                          feed={feed_target_names[0]: numpy.array(test_feat)},
-                          fetch_list=fetch_targets)
-        print("infer shape: ", results[0].shape)
-        print("infer results: ", results[0])
-        print("ground truth: ", test_label)
-def main(use_cuda):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    # Directory for saving the trained model
-    params_dirname = "fit_a_line.model"
-    inference_model_dirname = "fit_a_line.inference_model"
-    train(use_cuda, train_program, params_dirname, inference_model_dirname)
-    infer(use_cuda, inference_program, params_dirname)
-    infer_by_saved_model(use_cuda, inference_model_dirname)
-class TestFitALine(unittest.TestCase):
-    def test_cpu(self):
-        with self.program_scope_guard():
-            with fluid.unique_name.guard():
-                main(use_cuda=False)
-    def test_cuda(self):
-        with self.program_scope_guard():
-            with fluid.unique_name.guard():
-                main(use_cuda=True)
-    @contextlib.contextmanager
-    def program_scope_guard(self):
-        prog = fluid.Program()
-        startup_prog = fluid.Program()
-        scope = fluid.core.Scope()
-        with fluid.scope_guard(scope):
-            with fluid.program_guard(prog, startup_prog):
-                yield
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/tests/book/high-level-api/test_image_classification_resnet_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_image_classification_resnet_new_api.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import sys
-import paddle
-import paddle.fluid as fluid
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-import paddle.fluid.core as core
-import numpy
-import os
-import cifar10_small_test_set
-def resnet_cifar10(input, depth=32):
-    def conv_bn_layer(input,
-                      ch_out,
-                      filter_size,
-                      stride,
-                      padding,
-                      act='relu',
-                      bias_attr=False):
-        tmp = fluid.layers.conv2d(
-            input=input,
-            filter_size=filter_size,
-            num_filters=ch_out,
-            stride=stride,
-            padding=padding,
-            act=None,
-            bias_attr=bias_attr)
-        return fluid.layers.batch_norm(input=tmp, act=act)
-    def shortcut(input, ch_in, ch_out, stride):
-        if ch_in != ch_out:
-            return conv_bn_layer(input, ch_out, 1, stride, 0, None)
-        else:
-            return input
-    def basicblock(input, ch_in, ch_out, stride):
-        tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
-        tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None, bias_attr=True)
-        short = shortcut(input, ch_in, ch_out, stride)
-        return fluid.layers.elementwise_add(x=tmp, y=short, act='relu')
-    def layer_warp(block_func, input, ch_in, ch_out, count, stride):
-        tmp = block_func(input, ch_in, ch_out, stride)
-        for i in range(1, count):
-            tmp = block_func(tmp, ch_out, ch_out, 1)
-        return tmp
-    assert (depth - 2) % 6 == 0
-    n = (depth - 2) // 6
-    conv1 = conv_bn_layer(
-        input=input, ch_out=16, filter_size=3, stride=1, padding=1)
-    res1 = layer_warp(basicblock, conv1, 16, 16, n, 1)
-    res2 = layer_warp(basicblock, res1, 16, 32, n, 2)
-    res3 = layer_warp(basicblock, res2, 32, 64, n, 2)
-    pool = fluid.layers.pool2d(
-        input=res3, pool_size=8, pool_type='avg', pool_stride=1)
-    predict = fluid.layers.fc(input=pool, size=10, act='softmax')
-    return predict
-def inference_network():
-    data_shape = [3, 32, 32]
-    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
-    predict = resnet_cifar10(images, 32)
-    return predict
-def train_network():
-    predict = inference_network()
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-    cost = fluid.layers.cross_entropy(input=predict, label=label)
-    avg_cost = fluid.layers.mean(cost)
-    accuracy = fluid.layers.accuracy(input=predict, label=label)
-    return [avg_cost, accuracy]
-def optimizer_func():
-    return fluid.optimizer.Adam(learning_rate=0.001)
-def train(use_cuda, train_program, parallel, params_dirname):
-    BATCH_SIZE = 128
-    EPOCH_NUM = 1
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            cifar10_small_test_set.train10(batch_size=10), buf_size=128 * 10),
-        batch_size=BATCH_SIZE,
-        drop_last=False)
-    test_reader = paddle.batch(
-        paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE, drop_last=False)
-    def event_handler(event):
-        if isinstance(event, EndStepEvent):
-            avg_cost, accuracy = trainer.test(
-                reader=test_reader, feed_order=['pixel', 'label'])
-            print('Loss {0:2.2}, Acc {1:2.2}'.format(avg_cost, accuracy))
-            if accuracy > 0.01:  # Low threshold for speeding up CI
-                if params_dirname is not None:
-                    trainer.save_params(params_dirname)
-                return
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    trainer = Trainer(
-        train_func=train_program,
-        optimizer_func=optimizer_func,
-        place=place,
-        parallel=parallel)
-    trainer.train(
-        reader=train_reader,
-        num_epochs=EPOCH_NUM,
-        event_handler=event_handler,
-        feed_order=['pixel', 'label'])
-def infer(use_cuda, inference_program, parallel, params_dirname=None):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    inferencer = Inferencer(
-        infer_func=inference_program,
-        param_path=params_dirname,
-        place=place,
-        parallel=parallel)
-    # The input's dimension of conv should be 4-D or 5-D.
-    # Use normilized image pixels as input data, which should be in the range
-    # [0, 1.0].
-    tensor_img = numpy.random.rand(1, 3, 32, 32).astype("float32")
-    results = inferencer.infer({'pixel': tensor_img})
-    print("infer results: ", results)
-def main(use_cuda, parallel):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    save_path = "image_classification_resnet.inference.model"
-    os.environ['CPU_NUM'] = str(4)
-    train(
-        use_cuda=use_cuda,
-        train_program=train_network,
-        params_dirname=save_path,
-        parallel=parallel)
-    # FIXME(zcd): in the inference stage, the number of
-    # input data is one, it is not appropriate to use parallel.
-    if parallel and use_cuda:
-        return
-    os.environ['CPU_NUM'] = str(1)
-    infer(
-        use_cuda=use_cuda,
-        inference_program=inference_network,
-        params_dirname=save_path,
-        parallel=parallel)
-if __name__ == '__main__':
-    on_ci = bool(int(os.environ.get("SKIP_UNSTABLE_CI", '0')))
-    if not on_ci:
-        for use_cuda in (False, True):
-            for parallel in (False, True):
-                if use_cuda and not core.is_compiled_with_cuda():
-                    continue
-                main(use_cuda=use_cuda, parallel=parallel)
--- a/python/paddle/fluid/tests/book/high-level-api/test_image_classification_vgg_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_image_classification_vgg_new_api.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import sys
-import paddle
-import paddle.fluid as fluid
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-import paddle.fluid.core as core
-import numpy
-import os
-import cifar10_small_test_set
-def vgg16_bn_drop(input):
-    def conv_block(input, num_filter, groups, dropouts):
-        return fluid.nets.img_conv_group(
-            input=input,
-            pool_size=2,
-            pool_stride=2,
-            conv_num_filter=[num_filter] * groups,
-            conv_filter_size=3,
-            conv_act='relu',
-            conv_with_batchnorm=True,
-            conv_batchnorm_drop_rate=dropouts,
-            pool_type='max')
-    conv1 = conv_block(input, 64, 2, [0.3, 0])
-    conv2 = conv_block(conv1, 128, 2, [0.4, 0])
-    conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
-    conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
-    conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
-    drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5)
-    fc1 = fluid.layers.fc(input=drop, size=4096, act=None)
-    bn = fluid.layers.batch_norm(input=fc1, act='relu')
-    drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5)
-    fc2 = fluid.layers.fc(input=drop2, size=4096, act=None)
-    predict = fluid.layers.fc(input=fc2, size=10, act='softmax')
-    return predict
-def inference_network():
-    data_shape = [3, 32, 32]
-    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
-    predict = vgg16_bn_drop(images)
-    return predict
-def train_network():
-    predict = inference_network()
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-    cost = fluid.layers.cross_entropy(input=predict, label=label)
-    avg_cost = fluid.layers.mean(cost)
-    accuracy = fluid.layers.accuracy(input=predict, label=label)
-    return [avg_cost, accuracy]
-def optimizer_func():
-    return fluid.optimizer.Adam(learning_rate=0.001)
-def train(use_cuda, train_program, parallel, params_dirname):
-    BATCH_SIZE = 128
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            cifar10_small_test_set.train10(batch_size=10), buf_size=128 * 10),
-        batch_size=BATCH_SIZE,
-        drop_last=False)
-    # Use only part of the test set data validation program
-    test_reader = paddle.batch(
-        cifar10_small_test_set.test10(BATCH_SIZE),
-        batch_size=BATCH_SIZE,
-        drop_last=False)
-    def event_handler(event):
-        if isinstance(event, EndStepEvent):
-            avg_cost, accuracy = trainer.test(
-                reader=test_reader, feed_order=['pixel', 'label'])
-            print('Loss {0:2.2}, Acc {1:2.2}'.format(avg_cost, accuracy))
-            if accuracy > 0.01:  # Low threshold for speeding up CI
-                if params_dirname is not None:
-                    trainer.save_params(params_dirname)
-                return
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    trainer = Trainer(
-        train_func=train_program,
-        place=place,
-        optimizer_func=optimizer_func,
-        parallel=parallel)
-    trainer.train(
-        reader=train_reader,
-        num_epochs=1,
-        event_handler=event_handler,
-        feed_order=['pixel', 'label'])
-def infer(use_cuda, inference_program, parallel, params_dirname=None):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    inferencer = Inferencer(
-        infer_func=inference_program,
-        param_path=params_dirname,
-        place=place,
-        parallel=parallel)
-    # The input's dimension of conv should be 4-D or 5-D.
-    # Use normilized image pixels as input data, which should be in the range
-    # [0, 1.0].
-    tensor_img = numpy.random.rand(1, 3, 32, 32).astype("float32")
-    results = inferencer.infer({'pixel': tensor_img})
-    print("infer results: ", results)
-def main(use_cuda, parallel):
-    save_path = "image_classification_vgg.inference.model"
-    os.environ['CPU_NUM'] = str(4)
-    train(
-        use_cuda=use_cuda,
-        train_program=train_network,
-        params_dirname=save_path,
-        parallel=parallel)
-    # FIXME(zcd): in the inference stage, the number of
-    # input data is one, it is not appropriate to use parallel.
-    if parallel and use_cuda:
-        return
-    os.environ['CPU_NUM'] = str(1)
-    infer(
-        use_cuda=use_cuda,
-        inference_program=inference_network,
-        params_dirname=save_path,
-        parallel=parallel)
-if __name__ == '__main__':
-    for use_cuda in (False, True):
-        for parallel in (False, True):
-            if use_cuda and not core.is_compiled_with_cuda():
-                continue
-            main(use_cuda=use_cuda, parallel=parallel)
--- a/python/paddle/fluid/tests/book/high-level-api/test_label_semantic_roles_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_label_semantic_roles_new_api.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import paddle
-import paddle.fluid as fluid
-import sys
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-import numpy as np
-WORD_DICT, VERB_DICT, LABEL_DICT = paddle.dataset.conll05.get_dict()
-WORD_DICT_LEN = len(WORD_DICT)
-LABEL_DICT_LEN = len(LABEL_DICT)
-PRED_DICT_LEN = len(VERB_DICT)
-MARK_DICT_LEN = 2
-IS_SPARSE = True
-BATCH_SIZE = 10
-EMBEDDING_NAME = 'emb'
-def lstm_net():
-    WORD_DIM = 32
-    MARK_DIM = 5
-    HIDDEN_DIM = 512
-    DEPTH = 8
-    # Data definitions
-    word = fluid.layers.data(
-        name='word_data', shape=[1], dtype='int64', lod_level=1)
-    predicate = fluid.layers.data(
-        name='verb_data', shape=[1], dtype='int64', lod_level=1)
-    ctx_n2 = fluid.layers.data(
-        name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
-    ctx_n1 = fluid.layers.data(
-        name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
-    ctx_0 = fluid.layers.data(
-        name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
-    ctx_p1 = fluid.layers.data(
-        name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
-    ctx_p2 = fluid.layers.data(
-        name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
-    mark = fluid.layers.data(
-        name='mark_data', shape=[1], dtype='int64', lod_level=1)
-    # 8 features
-    predicate_embedding = fluid.layers.embedding(
-        input=predicate,
-        size=[PRED_DICT_LEN, WORD_DIM],
-        dtype='float32',
-        is_sparse=IS_SPARSE,
-        param_attr='vemb')
-    mark_embedding = fluid.layers.embedding(
-        input=mark,
-        size=[MARK_DICT_LEN, MARK_DIM],
-        dtype='float32',
-        is_sparse=IS_SPARSE)
-    word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
-    emb_layers = [
-        fluid.layers.embedding(
-            size=[WORD_DICT_LEN, WORD_DIM],
-            input=x,
-            param_attr=fluid.ParamAttr(name=EMBEDDING_NAME))
-        for x in word_input
-        #name=EMBEDDING_NAME, trainable=False)) for x in word_input
-    ]
-    emb_layers.append(predicate_embedding)
-    emb_layers.append(mark_embedding)
-    hidden_0_layers = [
-        fluid.layers.fc(input=emb, size=HIDDEN_DIM, act='tanh')
-        for emb in emb_layers
-    ]
-    hidden_0 = fluid.layers.sums(input=hidden_0_layers)
-    lstm_0 = fluid.layers.dynamic_lstm(
-        input=hidden_0,
-        size=HIDDEN_DIM,
-        candidate_activation='relu',
-        gate_activation='sigmoid',
-        cell_activation='sigmoid')
-    # stack L-LSTM and R-LSTM with direct edges
-    input_tmp = [hidden_0, lstm_0]
-    for i in range(1, DEPTH):
-        mix_hidden = fluid.layers.sums(input=[
-            fluid.layers.fc(input=input_tmp[0], size=HIDDEN_DIM, act='tanh'),
-            fluid.layers.fc(input=input_tmp[1], size=HIDDEN_DIM, act='tanh')
-        ])
-        lstm = fluid.layers.dynamic_lstm(
-            input=mix_hidden,
-            size=HIDDEN_DIM,
-            candidate_activation='relu',
-            gate_activation='sigmoid',
-            cell_activation='sigmoid',
-            is_reverse=((i % 2) == 1))
-        input_tmp = [mix_hidden, lstm]
-    feature_out = fluid.layers.sums(input=[
-        fluid.layers.fc(input=input_tmp[0], size=LABEL_DICT_LEN, act='tanh'),
-        fluid.layers.fc(input=input_tmp[1], size=LABEL_DICT_LEN, act='tanh')
-    ])
-    return feature_out
-def inference_program():
-    predict = lstm_net()
-    return predict
-def train_program():
-    MIX_HIDDEN_LR = 1e-3
-    predict = lstm_net()
-    target = fluid.layers.data(
-        name='target', shape=[1], dtype='int64', lod_level=1)
-    crf_cost = fluid.layers.linear_chain_crf(
-        input=predict,
-        label=target,
-        param_attr=fluid.ParamAttr(
-            name='crfw', learning_rate=MIX_HIDDEN_LR))
-    avg_cost = fluid.layers.mean(crf_cost)
-    return [avg_cost]
-def optimize_func():
-    return fluid.optimizer.SGD(learning_rate=fluid.layers.exponential_decay(
-        learning_rate=0.01, decay_steps=100000, decay_rate=0.5, staircase=True))
-def train(use_cuda, train_program, params_dirname):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    trainer = Trainer(
-        train_func=train_program, place=place, optimizer_func=optimize_func)
-    feed_order = [
-        'word_data', 'ctx_n2_data', 'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data',
-        'ctx_p2_data', 'verb_data', 'mark_data', 'target'
-    ]
-    #embedding_param = fluid.global_scope().find_var(
-    #        EMBEDDING_NAME).get_tensor()
-    #embedding_param.set(
-    #        load_parameter(conll05.get_embedding(), WORD_DICT_LEN, WORD_DIM),
-    #        place)
-    def event_handler(event):
-        if isinstance(event, EndEpochEvent):
-            test_reader = paddle.batch(
-                paddle.dataset.conll05.test(), batch_size=BATCH_SIZE)
-            avg_cost_set = trainer.test(
-                reader=test_reader, feed_order=feed_order)
-            # get avg cost
-            avg_cost = np.array(avg_cost_set).mean()
-            print("avg_cost: %s" % avg_cost)
-            if float(avg_cost) < 100.0:  # Large value to increase CI speed
-                trainer.save_params(params_dirname)
-            else:
-                print(
-                    ('BatchID {0}, Test Loss {1:0.2}'.format(event.epoch + 1,
-                                                             float(avg_cost))))
-                if math.isnan(float(avg_cost)):
-                    sys.exit("got NaN loss, training failed.")
-        elif isinstance(event, EndStepEvent):
-            print("Step {0}, Epoch {1} Metrics {2}".format(
-                event.step, event.epoch, list(map(np.array, event.metrics))))
-            if event.step == 1:  # Run 2 iterations to speed CI
-                trainer.save_params(params_dirname)
-                trainer.stop()
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.conll05.test(), buf_size=8192),
-        batch_size=BATCH_SIZE)
-    trainer.train(
-        num_epochs=1,
-        event_handler=event_handler,
-        reader=train_reader,
-        feed_order=feed_order)
-def infer(use_cuda, inference_program, params_dirname):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    inferencer = Inferencer(
-        inference_program, param_path=params_dirname, place=place)
-    # Setup input by creating LoDTensor to represent sequence of words.
-    # Here each word is the basic element of the LoDTensor and the shape of
-    # each word (base_shape) should be [1] since it is simply an index to
-    # look up for the corresponding word vector.
-    # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]],
-    # which has only one level of detail. Then the created LoDTensor will have only
-    # one higher level structure (sequence of words, or sentence) than the basic
-    # element (word). Hence the LoDTensor will hold data for three sentences of
-    # length 3, 4 and 2, respectively.
-    # Note that recursive_sequence_lengths should be a list of lists.
-    recursive_seq_lens = [[3, 4, 2]]
-    base_shape = [1]
-    # The range of random integers is [low, high]
-    word = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1)
-    ctx_n2 = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1)
-    ctx_n1 = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1)
-    ctx_0 = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1)
-    ctx_p1 = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1)
-    ctx_p2 = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1)
-    pred = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=PRED_DICT_LEN - 1)
-    mark = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=MARK_DICT_LEN - 1)
-    results = inferencer.infer(
-        {
-            'word_data': word,
-            'ctx_n2_data': ctx_n2,
-            'ctx_n1_data': ctx_n1,
-            'ctx_0_data': ctx_0,
-            'ctx_p1_data': ctx_p1,
-            'ctx_p2_data': ctx_p2,
-            'verb_data': pred,
-            'mark_data': mark
-        },
-        return_numpy=False)
-    print("infer results: ", np.array(results[0]).shape)
-def main(use_cuda):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    params_dirname = "label_semantic_roles.inference.model"
-    train(use_cuda, train_program, params_dirname)
-    infer(use_cuda, inference_program, params_dirname)
-if __name__ == '__main__':
-    for use_cuda in (False, True):
-        main(use_cuda=use_cuda)
--- a/python/paddle/fluid/tests/book/high-level-api/test_machine_translation_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_machine_translation_new_api.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import contextlib
-import sys
-import numpy as np
-import paddle
-import paddle.fluid as fluid
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-import paddle.fluid.framework as framework
-import paddle.fluid.layers as pd
-from paddle.fluid.executor import Executor
-from functools import partial
-import unittest
-dict_size = 30000
-source_dict_dim = target_dict_dim = dict_size
-hidden_dim = 32
-word_dim = 16
-batch_size = 2
-max_length = 8
-topk_size = 50
-trg_dic_size = 10000
-beam_size = 2
-decoder_size = hidden_dim
-def encoder(is_sparse):
-    # encoder
-    src_word_id = pd.data(
-        name="src_word_id", shape=[1], dtype='int64', lod_level=1)
-    src_embedding = pd.embedding(
-        input=src_word_id,
-        size=[dict_size, word_dim],
-        dtype='float32',
-        is_sparse=is_sparse,
-        param_attr=fluid.ParamAttr(name='vemb'))
-    fc1 = pd.fc(input=src_embedding, size=hidden_dim * 4, act='tanh')
-    lstm_hidden0, lstm_0 = pd.dynamic_lstm(input=fc1, size=hidden_dim * 4)
-    encoder_out = pd.sequence_last_step(input=lstm_hidden0)
-    return encoder_out
-def train_decoder(context, is_sparse):
-    # decoder
-    trg_language_word = pd.data(
-        name="target_language_word", shape=[1], dtype='int64', lod_level=1)
-    trg_embedding = pd.embedding(
-        input=trg_language_word,
-        size=[dict_size, word_dim],
-        dtype='float32',
-        is_sparse=is_sparse,
-        param_attr=fluid.ParamAttr(name='vemb'))
-    rnn = pd.DynamicRNN()
-    with rnn.block():
-        current_word = rnn.step_input(trg_embedding)
-        pre_state = rnn.memory(init=context)
-        current_state = pd.fc(input=[current_word, pre_state],
-                              size=decoder_size,
-                              act='tanh')
-        current_score = pd.fc(input=current_state,
-                              size=target_dict_dim,
-                              act='softmax')
-        rnn.update_memory(pre_state, current_state)
-        rnn.output(current_score)
-    return rnn()
-def decode(context, is_sparse):
-    init_state = context
-    array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length)
-    counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True)
-    # fill the first element with init_state
-    state_array = pd.create_array('float32')
-    pd.array_write(init_state, array=state_array, i=counter)
-    # ids, scores as memory
-    ids_array = pd.create_array('int64')
-    scores_array = pd.create_array('float32')
-    init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2)
-    init_scores = pd.data(
-        name="init_scores", shape=[1], dtype="float32", lod_level=2)
-    pd.array_write(init_ids, array=ids_array, i=counter)
-    pd.array_write(init_scores, array=scores_array, i=counter)
-    cond = pd.less_than(x=counter, y=array_len)
-    while_op = pd.While(cond=cond)
-    with while_op.block():
-        pre_ids = pd.array_read(array=ids_array, i=counter)
-        pre_state = pd.array_read(array=state_array, i=counter)
-        pre_score = pd.array_read(array=scores_array, i=counter)
-        # expand the lod of pre_state to be the same with pre_score
-        pre_state_expanded = pd.sequence_expand(pre_state, pre_score)
-        pre_ids_emb = pd.embedding(
-            input=pre_ids,
-            size=[dict_size, word_dim],
-            dtype='float32',
-            is_sparse=is_sparse)
-        # use rnn unit to update rnn
-        current_state = pd.fc(input=[pre_state_expanded, pre_ids_emb],
-                              size=decoder_size,
-                              act='tanh')
-        current_state_with_lod = pd.lod_reset(x=current_state, y=pre_score)
-        # use score to do beam search
-        current_score = pd.fc(input=current_state_with_lod,
-                              size=target_dict_dim,
-                              act='softmax')
-        topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
-        # calculate accumulated scores after topk to reduce computation cost
-        accu_scores = pd.elementwise_add(
-            x=pd.log(topk_scores), y=pd.reshape(
-                pre_score, shape=[-1]), axis=0)
-        selected_ids, selected_scores = pd.beam_search(
-            pre_ids,
-            pre_score,
-            topk_indices,
-            accu_scores,
-            beam_size,
-            end_id=10,
-            level=0)
-        pd.increment(x=counter, value=1, in_place=True)
-        # update the memories
-        pd.array_write(current_state, array=state_array, i=counter)
-        pd.array_write(selected_ids, array=ids_array, i=counter)
-        pd.array_write(selected_scores, array=scores_array, i=counter)
-        # update the break condition: up to the max length or all candidates of
-        # source sentences have ended.
-        length_cond = pd.less_than(x=counter, y=array_len)
-        finish_cond = pd.logical_not(pd.is_empty(x=selected_ids))
-        pd.logical_and(x=length_cond, y=finish_cond, out=cond)
-    translation_ids, translation_scores = pd.beam_search_decode(
-        ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10)
-    # return init_ids, init_scores
-    return translation_ids, translation_scores
-def train_program(is_sparse):
-    context = encoder(is_sparse)
-    rnn_out = train_decoder(context, is_sparse)
-    label = pd.data(
-        name="target_language_next_word", shape=[1], dtype='int64', lod_level=1)
-    cost = pd.cross_entropy(input=rnn_out, label=label)
-    avg_cost = pd.mean(cost)
-    return avg_cost
-def optimizer_func():
-    return fluid.optimizer.Adagrad(
-        learning_rate=1e-4,
-        regularization=fluid.regularizer.L2DecayRegularizer(
-            regularization_coeff=0.1))
-def train(use_cuda, is_sparse, is_local=True):
-    EPOCH_NUM = 1
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.wmt14.train(dict_size), buf_size=1000),
-        batch_size=batch_size)
-    feed_order = [
-        'src_word_id', 'target_language_word', 'target_language_next_word'
-    ]
-    def event_handler(event):
-        if isinstance(event, EndStepEvent):
-            print('pass_id=' + str(event.epoch) + ' batch=' + str(event.step))
-            if event.step == 10:
-                trainer.stop()
-    trainer = Trainer(
-        train_func=partial(train_program, is_sparse),
-        place=place,
-        optimizer_func=optimizer_func)
-    trainer.train(
-        reader=train_reader,
-        num_epochs=EPOCH_NUM,
-        event_handler=event_handler,
-        feed_order=feed_order)
-def decode_main(use_cuda, is_sparse):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    context = encoder(is_sparse)
-    translation_ids, translation_scores = decode(context, is_sparse)
-    exe = Executor(place)
-    exe.run(framework.default_startup_program())
-    init_ids_data = np.array([1 for _ in range(batch_size)], dtype='int64')
-    init_scores_data = np.array(
-        [1. for _ in range(batch_size)], dtype='float32')
-    init_ids_data = init_ids_data.reshape((batch_size, 1))
-    init_scores_data = init_scores_data.reshape((batch_size, 1))
-    init_recursive_seq_lens = [1] * batch_size
-    init_recursive_seq_lens = [init_recursive_seq_lens, init_recursive_seq_lens]
-    init_ids = fluid.create_lod_tensor(init_ids_data, init_recursive_seq_lens,
-                                       place)
-    init_scores = fluid.create_lod_tensor(init_scores_data,
-                                          init_recursive_seq_lens, place)
-    train_data = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.wmt14.train(dict_size), buf_size=1000),
-        batch_size=batch_size)
-    feed_order = ['src_word_id']
-    feed_list = [
-        framework.default_main_program().global_block().var(var_name)
-        for var_name in feed_order
-    ]
-    feeder = fluid.DataFeeder(feed_list, place)
-    for data in train_data():
-        feed_dict = feeder.feed([[x[0]] for x in data])
-        feed_dict['init_ids'] = init_ids
-        feed_dict['init_scores'] = init_scores
-        result_ids, result_scores = exe.run(
-            framework.default_main_program(),
-            feed=feed_dict,
-            fetch_list=[translation_ids, translation_scores],
-            return_numpy=False)
-        print(result_ids.recursive_sequence_lengths())
-        break
-class TestMachineTranslation(unittest.TestCase):
-    pass
-@contextlib.contextmanager
-def scope_prog_guard():
-    prog = fluid.Program()
-    startup_prog = fluid.Program()
-    scope = fluid.core.Scope()
-    with fluid.scope_guard(scope):
-        with fluid.program_guard(prog, startup_prog):
-            yield
-def inject_test_train(use_cuda, is_sparse):
-    f_name = 'test_{0}_{1}_train'.format('cuda' if use_cuda else 'cpu', 'sparse'
-                                         if is_sparse else 'dense')
-    def f(*args):
-        with scope_prog_guard():
-            train(use_cuda, is_sparse)
-    setattr(TestMachineTranslation, f_name, f)
-def inject_test_decode(use_cuda, is_sparse, decorator=None):
-    f_name = 'test_{0}_{1}_decode'.format('cuda'
-                                          if use_cuda else 'cpu', 'sparse'
-                                          if is_sparse else 'dense')
-    def f(*args):
-        with scope_prog_guard():
-            decode_main(use_cuda, is_sparse)
-    if decorator is not None:
-        f = decorator(f)
-    setattr(TestMachineTranslation, f_name, f)
-for _use_cuda_ in (False, True):
-    for _is_sparse_ in (False, True):
-        inject_test_train(_use_cuda_, _is_sparse_)
-for _use_cuda_ in (False, True):
-    for _is_sparse_ in (False, True):
-        _decorator_ = None
-        if _use_cuda_:
-            _decorator_ = unittest.skip(
-                reason='Beam Search does not support CUDA!')
-        inject_test_decode(
-            is_sparse=_is_sparse_, use_cuda=_use_cuda_, decorator=_decorator_)
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/tests/book/high-level-api/test_recognize_digits_conv_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_recognize_digits_conv_new_api.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import sys
-import paddle.fluid as fluid
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-import paddle.fluid.core as core
-import paddle
-import numpy
-import math
-import sys
-import os
-BATCH_SIZE = 64
-def inference_program():
-    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
-    conv_pool_1 = fluid.nets.simple_img_conv_pool(
-        input=img,
-        filter_size=5,
-        num_filters=20,
-        pool_size=2,
-        pool_stride=2,
-        act="relu")
-    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
-    conv_pool_2 = fluid.nets.simple_img_conv_pool(
-        input=conv_pool_1,
-        filter_size=5,
-        num_filters=50,
-        pool_size=2,
-        pool_stride=2,
-        act="relu")
-    prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
-    return prediction
-def train_program():
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-    predict = inference_program()
-    cost = fluid.layers.cross_entropy(input=predict, label=label)
-    avg_cost = fluid.layers.mean(cost)
-    acc = fluid.layers.accuracy(input=predict, label=label)
-    return [avg_cost, acc]
-def optimizer_func():
-    return fluid.optimizer.Adam(learning_rate=0.001)
-def train(use_cuda, train_program, parallel, params_dirname):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    trainer = Trainer(
-        train_func=train_program,
-        place=place,
-        optimizer_func=optimizer_func,
-        parallel=parallel)
-    def event_handler(event):
-        if isinstance(event, EndEpochEvent):
-            test_reader = paddle.batch(
-                paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
-            avg_cost, acc = trainer.test(
-                reader=test_reader, feed_order=['img', 'label'])
-            print("avg_cost: %s" % avg_cost)
-            print("acc     : %s" % acc)
-            if acc > 0.2:  # Smaller value to increase CI speed
-                trainer.save_params(params_dirname)
-            else:
-                print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
-                    event.epoch + 1, avg_cost, acc))
-                if math.isnan(avg_cost):
-                    sys.exit("got NaN loss, training failed.")
-        elif isinstance(event, EndStepEvent):
-            print(
-                ("Step {0}, Epoch {1} Metrics {2}".format(
-                    event.step, event.epoch,
-                    list(map(numpy.array, event.metrics)))))
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.mnist.train(), buf_size=500),
-        batch_size=BATCH_SIZE)
-    trainer.train(
-        num_epochs=1,
-        event_handler=event_handler,
-        reader=train_reader,
-        feed_order=['img', 'label'])
-def infer(use_cuda, inference_program, parallel, params_dirname=None):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    inferencer = Inferencer(
-        infer_func=inference_program,
-        param_path=params_dirname,
-        place=place,
-        parallel=parallel)
-    batch_size = 1
-    tensor_img = numpy.random.uniform(-1.0, 1.0,
-                                      [batch_size, 1, 28, 28]).astype("float32")
-    results = inferencer.infer({'img': tensor_img})
-    print("infer results: ", results[0])
-def main(use_cuda, parallel):
-    params_dirname = "recognize_digits_conv.inference.model"
-    # call train() with is_local argument to run distributed train
-    os.environ['CPU_NUM'] = str(4)
-    train(
-        use_cuda=use_cuda,
-        train_program=train_program,
-        params_dirname=params_dirname,
-        parallel=parallel)
-    # FIXME(zcd): in the inference stage, the number of
-    # input data is one, it is not appropriate to use parallel.
-    if parallel and use_cuda:
-        return
-    os.environ['CPU_NUM'] = str(1)
-    infer(
-        use_cuda=use_cuda,
-        inference_program=inference_program,
-        params_dirname=params_dirname,
-        parallel=parallel)
-if __name__ == '__main__':
-    for use_cuda in (False, True):
-        for parallel in (False, True):
-            if use_cuda and not core.is_compiled_with_cuda():
-                continue
-            main(use_cuda=use_cuda, parallel=parallel)
--- a/python/paddle/fluid/tests/book/high-level-api/test_recognize_digits_mlp_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_recognize_digits_mlp_new_api.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import sys
-import paddle.fluid as fluid
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-import paddle.fluid.core as core
-import paddle
-import numpy
-import math
-import sys
-import os
-BATCH_SIZE = 64
-def inference_program():
-    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
-    hidden = fluid.layers.fc(input=img, size=200, act='tanh')
-    hidden = fluid.layers.fc(input=hidden, size=200, act='tanh')
-    prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
-    return prediction
-def train_program():
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-    predict = inference_program()
-    cost = fluid.layers.cross_entropy(input=predict, label=label)
-    avg_cost = fluid.layers.mean(cost)
-    acc = fluid.layers.accuracy(input=predict, label=label)
-    return [avg_cost, acc]
-def optimizer_func():
-    return fluid.optimizer.Adam(learning_rate=0.001)
-def train(use_cuda, train_program, params_dirname, parallel):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    trainer = Trainer(
-        train_func=train_program,
-        place=place,
-        optimizer_func=optimizer_func,
-        parallel=parallel)
-    def event_handler(event):
-        if isinstance(event, EndEpochEvent):
-            test_reader = paddle.batch(
-                paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
-            avg_cost, acc = trainer.test(
-                reader=test_reader, feed_order=['img', 'label'])
-            print("avg_cost: %s" % avg_cost)
-            print("acc     : %s" % acc)
-            if acc > 0.2:  # Smaller value to increase CI speed
-                trainer.save_params(params_dirname)
-            else:
-                print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
-                    event.epoch + 1, avg_cost, acc))
-                if math.isnan(avg_cost):
-                    sys.exit("got NaN loss, training failed.")
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.mnist.train(), buf_size=500),
-        batch_size=BATCH_SIZE)
-    trainer.train(
-        num_epochs=1,
-        event_handler=event_handler,
-        reader=train_reader,
-        feed_order=['img', 'label'])
-def infer(use_cuda, inference_program, parallel, params_dirname=None):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    inferencer = Inferencer(
-        infer_func=inference_program,
-        param_path=params_dirname,
-        place=place,
-        parallel=parallel)
-    batch_size = 1
-    tensor_img = numpy.random.uniform(-1.0, 1.0,
-                                      [batch_size, 1, 28, 28]).astype("float32")
-    results = inferencer.infer({'img': tensor_img})
-    print("infer results: ", results[0])
-def main(use_cuda, parallel):
-    params_dirname = "recognize_digits_mlp.inference.model"
-    # call train() with is_local argument to run distributed train
-    os.environ['CPU_NUM'] = str(4)
-    train(
-        use_cuda=use_cuda,
-        train_program=train_program,
-        params_dirname=params_dirname,
-        parallel=parallel)
-    # FIXME(zcd): in the inference stage, the number of
-    # input data is one, it is not appropriate to use parallel.
-    if parallel and use_cuda:
-        return
-    os.environ['CPU_NUM'] = str(1)
-    infer(
-        use_cuda=use_cuda,
-        inference_program=inference_program,
-        params_dirname=params_dirname,
-        parallel=parallel)
-if __name__ == '__main__':
-    for use_cuda in (False, True):
-        for parallel in (False, True):
-            if use_cuda and not core.is_compiled_with_cuda():
-                continue
-            main(use_cuda=use_cuda, parallel=parallel)
--- a/python/paddle/fluid/tests/book/high-level-api/test_recommender_system_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_recommender_system_new_api.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import math
-import sys
-import numpy as np
-import paddle
-import paddle.fluid as fluid
-import sys
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-import paddle.fluid.layers as layers
-import paddle.fluid.nets as nets
-IS_SPARSE = True
-USE_GPU = False
-BATCH_SIZE = 256
-def get_usr_combined_features():
-    # FIXME(dzh) : old API integer_value(10) may have range check.
-    # currently we don't have user configurated check.
-    USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1
-    uid = layers.data(name='user_id', shape=[1], dtype='int64')
-    usr_emb = layers.embedding(
-        input=uid,
-        dtype='float32',
-        size=[USR_DICT_SIZE, 32],
-        param_attr='user_table',
-        is_sparse=IS_SPARSE)
-    usr_fc = layers.fc(input=usr_emb, size=32)
-    USR_GENDER_DICT_SIZE = 2
-    usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64')
-    usr_gender_emb = layers.embedding(
-        input=usr_gender_id,
-        size=[USR_GENDER_DICT_SIZE, 16],
-        param_attr='gender_table',
-        is_sparse=IS_SPARSE)
-    usr_gender_fc = layers.fc(input=usr_gender_emb, size=16)
-    USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
-    usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64")
-    usr_age_emb = layers.embedding(
-        input=usr_age_id,
-        size=[USR_AGE_DICT_SIZE, 16],
-        is_sparse=IS_SPARSE,
-        param_attr='age_table')
-    usr_age_fc = layers.fc(input=usr_age_emb, size=16)
-    USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
-    usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64")
-    usr_job_emb = layers.embedding(
-        input=usr_job_id,
-        size=[USR_JOB_DICT_SIZE, 16],
-        param_attr='job_table',
-        is_sparse=IS_SPARSE)
-    usr_job_fc = layers.fc(input=usr_job_emb, size=16)
-    concat_embed = layers.concat(
-        input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], axis=1)
-    usr_combined_features = layers.fc(input=concat_embed, size=200, act="tanh")
-    return usr_combined_features
-def get_mov_combined_features():
-    MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1
-    mov_id = layers.data(name='movie_id', shape=[1], dtype='int64')
-    mov_emb = layers.embedding(
-        input=mov_id,
-        dtype='float32',
-        size=[MOV_DICT_SIZE, 32],
-        param_attr='movie_table',
-        is_sparse=IS_SPARSE)
-    mov_fc = layers.fc(input=mov_emb, size=32)
-    CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
-    category_id = layers.data(
-        name='category_id', shape=[1], dtype='int64', lod_level=1)
-    mov_categories_emb = layers.embedding(
-        input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE)
-    mov_categories_hidden = layers.sequence_pool(
-        input=mov_categories_emb, pool_type="sum")
-    MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
-    mov_title_id = layers.data(
-        name='movie_title', shape=[1], dtype='int64', lod_level=1)
-    mov_title_emb = layers.embedding(
-        input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE)
-    mov_title_conv = nets.sequence_conv_pool(
-        input=mov_title_emb,
-        num_filters=32,
-        filter_size=3,
-        act="tanh",
-        pool_type="sum")
-    concat_embed = layers.concat(
-        input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1)
-    # FIXME(dzh) : need tanh operator
-    mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh")
-    return mov_combined_features
-def inference_program():
-    usr_combined_features = get_usr_combined_features()
-    mov_combined_features = get_mov_combined_features()
-    inference = layers.cos_sim(X=usr_combined_features, Y=mov_combined_features)
-    scale_infer = layers.scale(x=inference, scale=5.0)
-    return scale_infer
-def train_program():
-    scale_infer = inference_program()
-    label = layers.data(name='score', shape=[1], dtype='float32')
-    square_cost = layers.square_error_cost(input=scale_infer, label=label)
-    avg_cost = layers.mean(square_cost)
-    return [avg_cost, scale_infer]
-def optimizer_func():
-    return fluid.optimizer.SGD(learning_rate=0.2)
-def train(use_cuda, train_program, params_dirname):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    trainer = Trainer(
-        train_func=train_program, place=place, optimizer_func=optimizer_func)
-    feed_order = [
-        'user_id', 'gender_id', 'age_id', 'job_id', 'movie_id', 'category_id',
-        'movie_title', 'score'
-    ]
-    def event_handler(event):
-        if isinstance(event, EndStepEvent):
-            test_reader = paddle.batch(
-                paddle.dataset.movielens.test(), batch_size=BATCH_SIZE)
-            avg_cost_set = trainer.test(
-                reader=test_reader, feed_order=feed_order)
-            # get avg cost
-            avg_cost = np.array(avg_cost_set).mean()
-            print("avg_cost: %s" % avg_cost)
-            if float(avg_cost) < 4:  # Smaller value to increase CI speed
-                trainer.save_params(params_dirname)
-                trainer.stop()
-            else:
-                print(
-                    ('BatchID {0}, Test Loss {1:0.2}'.format(event.epoch + 1,
-                                                             float(avg_cost))))
-                if math.isnan(float(avg_cost)):
-                    sys.exit("got NaN loss, training failed.")
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.movielens.train(), buf_size=8192),
-        batch_size=BATCH_SIZE)
-    trainer.train(
-        num_epochs=1,
-        event_handler=event_handler,
-        reader=train_reader,
-        feed_order=feed_order)
-def infer(use_cuda, inference_program, params_dirname):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    inferencer = Inferencer(
-        inference_program, param_path=params_dirname, place=place)
-    # Use the first data from paddle.dataset.movielens.test() as input.
-    # Use create_lod_tensor(data, recursive_sequence_lengths, place) API 
-    # to generate LoD Tensor where `data` is a list of sequences of index 
-    # numbers, `recursive_sequence_lengths` is the length-based level of detail 
-    # (lod) info associated with `data`.
-    # For example, data = [[10, 2, 3], [2, 3]] means that it contains
-    # two sequences of indexes, of length 3 and 2, respectively.
-    # Correspondingly, recursive_sequence_lengths = [[3, 2]] contains one 
-    # level of detail info, indicating that `data` consists of two sequences 
-    # of length 3 and 2, respectively. 
-    user_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place)
-    gender_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place)
-    age_id = fluid.create_lod_tensor([[np.int64(0)]], [[1]], place)
-    job_id = fluid.create_lod_tensor([[np.int64(10)]], [[1]], place)
-    movie_id = fluid.create_lod_tensor([[np.int64(783)]], [[1]], place)
-    category_id = fluid.create_lod_tensor(
-        [np.array(
-            [10, 8, 9], dtype='int64')], [[3]], place)
-    movie_title = fluid.create_lod_tensor(
-        [np.array(
-            [1069, 4140, 2923, 710, 988], dtype='int64')], [[5]], place)
-    results = inferencer.infer(
-        {
-            'user_id': user_id,
-            'gender_id': gender_id,
-            'age_id': age_id,
-            'job_id': job_id,
-            'movie_id': movie_id,
-            'category_id': category_id,
-            'movie_title': movie_title
-        },
-        return_numpy=False)
-    print("infer results: ", np.array(results[0]))
-def main(use_cuda):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    params_dirname = "recommender_system.inference.model"
-    train(
-        use_cuda=use_cuda,
-        train_program=train_program,
-        params_dirname=params_dirname)
-    infer(
-        use_cuda=use_cuda,
-        inference_program=inference_program,
-        params_dirname=params_dirname)
-if __name__ == '__main__':
-    main(USE_GPU)
--- a/python/paddle/fluid/tests/book/high-level-api/test_understand_sentiment_conv_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_understand_sentiment_conv_new_api.py
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import paddle
-import paddle.fluid as fluid
-import sys
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-from functools import partial
-import numpy as np
-CLASS_DIM = 2
-EMB_DIM = 128
-HID_DIM = 512
-BATCH_SIZE = 128
-def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
-    emb = fluid.layers.embedding(
-        input=data, size=[input_dim, emb_dim], is_sparse=True)
-    conv_3 = fluid.nets.sequence_conv_pool(
-        input=emb,
-        num_filters=hid_dim,
-        filter_size=3,
-        act="tanh",
-        pool_type="sqrt")
-    conv_4 = fluid.nets.sequence_conv_pool(
-        input=emb,
-        num_filters=hid_dim,
-        filter_size=4,
-        act="tanh",
-        pool_type="sqrt")
-    prediction = fluid.layers.fc(input=[conv_3, conv_4],
-                                 size=class_dim,
-                                 act="softmax")
-    return prediction
-def inference_program(word_dict):
-    data = fluid.layers.data(
-        name="words", shape=[1], dtype="int64", lod_level=1)
-    dict_dim = len(word_dict)
-    net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM)
-    return net
-def train_program(word_dict):
-    prediction = inference_program(word_dict)
-    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
-    cost = fluid.layers.cross_entropy(input=prediction, label=label)
-    avg_cost = fluid.layers.mean(cost)
-    accuracy = fluid.layers.accuracy(input=prediction, label=label)
-    return [avg_cost, accuracy]
-def optimizer_func():
-    return fluid.optimizer.Adagrad(learning_rate=0.002)
-def train(use_cuda, train_program, params_dirname):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    word_dict = paddle.dataset.imdb.word_dict()
-    trainer = Trainer(
-        train_func=partial(train_program, word_dict),
-        place=place,
-        optimizer_func=optimizer_func)
-    def event_handler(event):
-        if isinstance(event, EndEpochEvent):
-            test_reader = paddle.batch(
-                paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
-            avg_cost, acc = trainer.test(
-                reader=test_reader, feed_order=['words', 'label'])
-            print("avg_cost: %s" % avg_cost)
-            print("acc     : %s" % acc)
-            if acc > 0.2:  # Smaller value to increase CI speed
-                trainer.save_params(params_dirname)
-                trainer.stop()
-            else:
-                print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
-                    event.epoch + 1, avg_cost, acc))
-                if math.isnan(avg_cost):
-                    sys.exit("got NaN loss, training failed.")
-        elif isinstance(event, EndStepEvent):
-            print("Step {0}, Epoch {1} Metrics {2}".format(
-                event.step, event.epoch, list(map(np.array, event.metrics))))
-            if event.step == 1:  # Run 2 iterations to speed CI
-                trainer.save_params(params_dirname)
-                trainer.stop()
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.imdb.train(word_dict), buf_size=25000),
-        batch_size=BATCH_SIZE)
-    trainer.train(
-        num_epochs=1,
-        event_handler=event_handler,
-        reader=train_reader,
-        feed_order=['words', 'label'])
-def infer(use_cuda, inference_program, params_dirname=None):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    word_dict = paddle.dataset.imdb.word_dict()
-    inferencer = Inferencer(
-        infer_func=partial(inference_program, word_dict),
-        param_path=params_dirname,
-        place=place)
-    # Setup input by creating LoDTensor to represent sequence of words.
-    # Here each word is the basic element of the LoDTensor and the shape of
-    # each word (base_shape) should be [1] since it is simply an index to
-    # look up for the corresponding word vector.
-    # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]],
-    # which has only one level of detail. Then the created LoDTensor will have only
-    # one higher level structure (sequence of words, or sentence) than the basic
-    # element (word). Hence the LoDTensor will hold data for three sentences of
-    # length 3, 4 and 2, respectively.
-    # Note that recursive_sequence_lengths should be a list of lists.
-    recursive_seq_lens = [[3, 4, 2]]
-    base_shape = [1]
-    # The range of random integers is [low, high]
-    tensor_words = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=len(word_dict) - 1)
-    results = inferencer.infer({'words': tensor_words})
-    print("infer results: ", results)
-def main(use_cuda):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    params_dirname = "understand_sentiment_conv.inference.model"
-    train(use_cuda, train_program, params_dirname)
-    infer(use_cuda, inference_program, params_dirname)
-if __name__ == '__main__':
-    for use_cuda in (False, True):
-        main(use_cuda=use_cuda)
--- a/python/paddle/fluid/tests/book/high-level-api/test_understand_sentiment_dynamic_rnn_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_understand_sentiment_dynamic_rnn_new_api.py
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import paddle
-import paddle.fluid as fluid
-import sys
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-from functools import partial
-import numpy as np
-CLASS_DIM = 2
-EMB_DIM = 128
-BATCH_SIZE = 128
-LSTM_SIZE = 128
-def dynamic_rnn_lstm(data, input_dim, class_dim, emb_dim, lstm_size):
-    emb = fluid.layers.embedding(
-        input=data, size=[input_dim, emb_dim], is_sparse=True)
-    sentence = fluid.layers.fc(input=emb, size=lstm_size, act='tanh')
-    rnn = fluid.layers.DynamicRNN()
-    with rnn.block():
-        word = rnn.step_input(sentence)
-        prev_hidden = rnn.memory(value=0.0, shape=[lstm_size])
-        prev_cell = rnn.memory(value=0.0, shape=[lstm_size])
-        def gate_common(ipt, hidden, size):
-            gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True)
-            gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False)
-            return gate0 + gate1
-        forget_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
-                                                         lstm_size))
-        input_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
-                                                        lstm_size))
-        output_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
-                                                         lstm_size))
-        cell_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
-                                                       lstm_size))
-        cell = forget_gate * prev_cell + input_gate * cell_gate
-        hidden = output_gate * fluid.layers.tanh(x=cell)
-        rnn.update_memory(prev_cell, cell)
-        rnn.update_memory(prev_hidden, hidden)
-        rnn.output(hidden)
-    last = fluid.layers.sequence_last_step(rnn())
-    prediction = fluid.layers.fc(input=last, size=class_dim, act="softmax")
-    return prediction
-def inference_program(word_dict):
-    data = fluid.layers.data(
-        name="words", shape=[1], dtype="int64", lod_level=1)
-    dict_dim = len(word_dict)
-    pred = dynamic_rnn_lstm(data, dict_dim, CLASS_DIM, EMB_DIM, LSTM_SIZE)
-    return pred
-def train_program(word_dict):
-    prediction = inference_program(word_dict)
-    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
-    cost = fluid.layers.cross_entropy(input=prediction, label=label)
-    avg_cost = fluid.layers.mean(cost)
-    accuracy = fluid.layers.accuracy(input=prediction, label=label)
-    return [avg_cost, accuracy]
-def optimizer_func():
-    return fluid.optimizer.Adagrad(learning_rate=0.002)
-def train(use_cuda, train_program, params_dirname):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    word_dict = paddle.dataset.imdb.word_dict()
-    trainer = Trainer(
-        train_func=partial(train_program, word_dict),
-        place=place,
-        optimizer_func=optimizer_func)
-    def event_handler(event):
-        if isinstance(event, EndEpochEvent):
-            test_reader = paddle.batch(
-                paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
-            avg_cost, acc = trainer.test(
-                reader=test_reader, feed_order=['words', 'label'])
-            print("avg_cost: %s" % avg_cost)
-            print("acc     : %s" % acc)
-            if acc > 0.2:  # Smaller value to increase CI speed
-                trainer.save_params(params_dirname)
-                trainer.stop()
-            else:
-                print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
-                    event.epoch + 1, avg_cost, acc))
-                if math.isnan(avg_cost):
-                    sys.exit("got NaN loss, training failed.")
-        elif isinstance(event, EndStepEvent):
-            print("Step {0}, Epoch {1} Metrics {2}".format(
-                event.step, event.epoch, list(map(np.array, event.metrics))))
-            if event.step == 1:  # Run 2 iterations to speed CI
-                trainer.save_params(params_dirname)
-                trainer.stop()
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.imdb.train(word_dict), buf_size=25000),
-        batch_size=BATCH_SIZE)
-    trainer.train(
-        num_epochs=1,
-        event_handler=event_handler,
-        reader=train_reader,
-        feed_order=['words', 'label'])
-def infer(use_cuda, inference_program, params_dirname=None):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    word_dict = paddle.dataset.imdb.word_dict()
-    inferencer = Inferencer(
-        infer_func=partial(inference_program, word_dict),
-        param_path=params_dirname,
-        place=place)
-    # Setup input by creating LoDTensor to represent sequence of words.
-    # Here each word is the basic element of the LoDTensor and the shape of
-    # each word (base_shape) should be [1] since it is simply an index to
-    # look up for the corresponding word vector.
-    # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]],
-    # which has only one level of detail. Then the created LoDTensor will have only
-    # one higher level structure (sequence of words, or sentence) than the basic
-    # element (word). Hence the LoDTensor will hold data for three sentences of
-    # length 3, 4 and 2, respectively.
-    # Note that recursive_sequence_lengths should be a list of lists.
-    recursive_seq_lens = [[3, 4, 2]]
-    base_shape = [1]
-    # The range of random integers is [low, high]
-    tensor_words = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=len(word_dict) - 1)
-    results = inferencer.infer({'words': tensor_words})
-    print("infer results: ", results)
-def main(use_cuda):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    params_dirname = "understand_sentiment_conv.inference.model"
-    train(use_cuda, train_program, params_dirname)
-    infer(use_cuda, inference_program, params_dirname)
-if __name__ == '__main__':
-    for use_cuda in (False, True):
-        main(use_cuda=use_cuda)
--- a/python/paddle/fluid/tests/book/high-level-api/test_understand_sentiment_stacked_lstm_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_understand_sentiment_stacked_lstm_new_api.py
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import paddle
-import paddle.fluid as fluid
-import sys
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-from functools import partial
-import numpy as np
-CLASS_DIM = 2
-EMB_DIM = 128
-HID_DIM = 512
-STACKED_NUM = 3
-BATCH_SIZE = 128
-def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
-    assert stacked_num % 2 == 1
-    emb = fluid.layers.embedding(
-        input=data, size=[input_dim, emb_dim], is_sparse=True)
-    fc1 = fluid.layers.fc(input=emb, size=hid_dim)
-    lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hid_dim)
-    inputs = [fc1, lstm1]
-    for i in range(2, stacked_num + 1):
-        fc = fluid.layers.fc(input=inputs, size=hid_dim)
-        lstm, cell = fluid.layers.dynamic_lstm(
-            input=fc, size=hid_dim, is_reverse=(i % 2) == 0)
-        inputs = [fc, lstm]
-    fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
-    lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max')
-    prediction = fluid.layers.fc(input=[fc_last, lstm_last],
-                                 size=class_dim,
-                                 act='softmax')
-    return prediction
-def inference_program(word_dict):
-    data = fluid.layers.data(
-        name="words", shape=[1], dtype="int64", lod_level=1)
-    dict_dim = len(word_dict)
-    net = stacked_lstm_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM,
-                           STACKED_NUM)
-    return net
-def train_program(word_dict):
-    prediction = inference_program(word_dict)
-    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
-    cost = fluid.layers.cross_entropy(input=prediction, label=label)
-    avg_cost = fluid.layers.mean(cost)
-    accuracy = fluid.layers.accuracy(input=prediction, label=label)
-    return [avg_cost, accuracy]
-def optimizer_func():
-    return fluid.optimizer.Adagrad(learning_rate=0.002)
-def train(use_cuda, train_program, params_dirname):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    word_dict = paddle.dataset.imdb.word_dict()
-    trainer = Trainer(
-        train_func=partial(train_program, word_dict),
-        place=place,
-        optimizer_func=optimizer_func)
-    def event_handler(event):
-        if isinstance(event, EndEpochEvent):
-            test_reader = paddle.batch(
-                paddle.dataset.imdb.test(word_dict),
-                batch_size=BATCH_SIZE,
-                drop_last=False)
-            avg_cost, acc = trainer.test(
-                reader=test_reader, feed_order=['words', 'label'])
-            print("avg_cost: %s" % avg_cost)
-            print("acc     : %s" % acc)
-            if acc > 0.2:  # Smaller value to increase CI speed
-                trainer.save_params(params_dirname)
-                trainer.stop()
-            else:
-                print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
-                    event.epoch + 1, avg_cost, acc))
-                if math.isnan(avg_cost):
-                    sys.exit("got NaN loss, training failed.")
-        elif isinstance(event, EndStepEvent):
-            print("Step {0}, Epoch {1} Metrics {2}".format(
-                event.step, event.epoch, list(map(np.array, event.metrics))))
-            if event.step == 1:  # Run 2 iterations to speed CI
-                trainer.save_params(params_dirname)
-                trainer.stop()
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.imdb.train(word_dict), buf_size=25000),
-        batch_size=BATCH_SIZE,
-        drop_last=False)
-    trainer.train(
-        num_epochs=1,
-        event_handler=event_handler,
-        reader=train_reader,
-        feed_order=['words', 'label'])
-def infer(use_cuda, inference_program, params_dirname=None):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    word_dict = paddle.dataset.imdb.word_dict()
-    inferencer = Inferencer(
-        infer_func=partial(inference_program, word_dict),
-        param_path=params_dirname,
-        place=place)
-    # Setup input by creating LoDTensor to represent sequence of words.
-    # Here each word is the basic element of the LoDTensor and the shape of
-    # each word (base_shape) should be [1] since it is simply an index to
-    # look up for the corresponding word vector.
-    # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]],
-    # which has only one level of detail. Then the created LoDTensor will have only
-    # one higher level structure (sequence of words, or sentence) than the basic
-    # element (word). Hence the LoDTensor will hold data for three sentences of
-    # length 3, 4 and 2, respectively.
-    # Note that recursive_sequence_lengths should be a list of lists.
-    recursive_seq_lens = [[3, 4, 2]]
-    base_shape = [1]
-    # The range of random integers is [low, high]
-    tensor_words = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=len(word_dict) - 1)
-    results = inferencer.infer({'words': tensor_words})
-    print("infer results: ", results)
-def main(use_cuda):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    params_dirname = "understand_sentiment_stacked_lstm.inference.model"
-    train(use_cuda, train_program, params_dirname)
-    infer(use_cuda, inference_program, params_dirname)
-if __name__ == '__main__':
-    for use_cuda in (False, True):
-        main(use_cuda=use_cuda)
--- a/python/paddle/fluid/tests/book/high-level-api/test_word2vec_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_word2vec_new_api.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import paddle
-import paddle.fluid as fluid
-import sys
-try:
-    from paddle.fluid.contrib.trainer import *
-    from paddle.fluid.contrib.inferencer import *
-except ImportError:
-    print(
-        "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
-        file=sys.stderr)
-    from paddle.fluid.trainer import *
-    from paddle.fluid.inferencer import *
-import numpy as np
-import math
-import sys
-from functools import partial
-PASS_NUM = 100
-EMBED_SIZE = 32
-HIDDEN_SIZE = 256
-N = 5
-BATCH_SIZE = 32
-word_dict = paddle.dataset.imikolov.build_dict()
-dict_size = len(word_dict)
-def inference_program(is_sparse):
-    first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
-    second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
-    third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
-    forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64')
-    embed_first = fluid.layers.embedding(
-        input=first_word,
-        size=[dict_size, EMBED_SIZE],
-        dtype='float32',
-        is_sparse=is_sparse,
-        param_attr='shared_w')
-    embed_second = fluid.layers.embedding(
-        input=second_word,
-        size=[dict_size, EMBED_SIZE],
-        dtype='float32',
-        is_sparse=is_sparse,
-        param_attr='shared_w')
-    embed_third = fluid.layers.embedding(
-        input=third_word,
-        size=[dict_size, EMBED_SIZE],
-        dtype='float32',
-        is_sparse=is_sparse,
-        param_attr='shared_w')
-    embed_forth = fluid.layers.embedding(
-        input=forth_word,
-        size=[dict_size, EMBED_SIZE],
-        dtype='float32',
-        is_sparse=is_sparse,
-        param_attr='shared_w')
-    concat_embed = fluid.layers.concat(
-        input=[embed_first, embed_second, embed_third, embed_forth], axis=1)
-    hidden1 = fluid.layers.fc(input=concat_embed,
-                              size=HIDDEN_SIZE,
-                              act='sigmoid')
-    predict_word = fluid.layers.fc(input=hidden1, size=dict_size, act='softmax')
-    return predict_word
-def train_program(is_sparse):
-    # The declaration of 'next_word' must be after the invoking of inference_program,
-    # or the data input order of train program would be [next_word, firstw, secondw,
-    # thirdw, forthw], which is not correct.
-    predict_word = inference_program(is_sparse)
-    next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
-    cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
-    avg_cost = fluid.layers.mean(cost)
-    return avg_cost
-def optimizer_func():
-    return fluid.optimizer.SGD(learning_rate=0.001)
-def train(use_cuda, train_program, params_dirname):
-    train_reader = paddle.batch(
-        paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE)
-    test_reader = paddle.batch(
-        paddle.dataset.imikolov.test(word_dict, N), BATCH_SIZE)
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    def event_handler(event):
-        if isinstance(event, EndStepEvent):
-            outs = trainer.test(
-                reader=test_reader,
-                feed_order=['firstw', 'secondw', 'thirdw', 'forthw', 'nextw'])
-            avg_cost = outs[0]
-            print("loss= ", avg_cost)
-            if avg_cost < 10.0:
-                trainer.save_params(params_dirname)
-                trainer.stop()
-            if math.isnan(avg_cost):
-                sys.exit("got NaN loss, training failed.")
-    trainer = Trainer(
-        train_func=train_program, optimizer_func=optimizer_func, place=place)
-    trainer.train(
-        reader=train_reader,
-        num_epochs=1,
-        event_handler=event_handler,
-        feed_order=['firstw', 'secondw', 'thirdw', 'forthw', 'nextw'])
-def infer(use_cuda, inference_program, params_dirname=None):
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    inferencer = Inferencer(
-        infer_func=inference_program, param_path=params_dirname, place=place)
-    # Setup inputs by creating 4 LoDTensors representing 4 words. Here each word 
-    # is simply an index to look up for the corresponding word vector and hence 
-    # the shape of word (base_shape) should be [1]. The recursive_sequence_lengths, 
-    # which is length-based level of detail (lod) of each LoDTensor, should be [[1]] 
-    # meaning there is only one level of detail and there is only one sequence of 
-    # one word on this level.
-    # Note that recursive_sequence_lengths should be a list of lists.
-    recursive_seq_lens = [[1]]
-    base_shape = [1]
-    # The range of random integers is [low, high]
-    first_word = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1)
-    second_word = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1)
-    third_word = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1)
-    fourth_word = fluid.create_random_int_lodtensor(
-        recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1)
-    result = inferencer.infer(
-        {
-            'firstw': first_word,
-            'secondw': second_word,
-            'thirdw': third_word,
-            'forthw': fourth_word
-        },
-        return_numpy=False)
-    print(np.array(result[0]))
-def main(use_cuda, is_sparse):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    params_dirname = "word2vec.inference.model"
-    train(
-        use_cuda=use_cuda,
-        train_program=partial(train_program, is_sparse),
-        params_dirname=params_dirname)
-    infer(
-        use_cuda=use_cuda,
-        inference_program=partial(inference_program, is_sparse),
-        params_dirname=params_dirname)
-if __name__ == '__main__':
-    for use_cuda in (False, True):
-        for is_sparse in (False, True):
-            main(use_cuda=use_cuda, is_sparse=is_sparse)