Merge pull request #8404 from Xreki/core_refine_inference

Refine the inference API and unittests

Merge pull request #8404 from Xreki/core_refine_inference
Refine the inference API and unittests
dce0383f · Tao Luo · GitHub · 0dbaad57 · efb6ba35 · dce0383f
16 changed file
--- a/paddle/fluid/framework/lod_tensor.cc
+++ b/paddle/fluid/framework/lod_tensor.cc
@@ -31,8 +31,14 @@ std::ostream &operator<<(std::ostream &os, const LoD &lod) {
  os << "{";
  for (auto &v : lod) {
    os << "{";
+    bool is_first = true;
    for (auto &i : v) {
-      os << i << ",";
+      if (is_first) {
+        os << i;
+        is_first = false;
+      } else {
+        os << ", " << i;
+      }
    }
    os << "}";
  }

--- a/paddle/fluid/inference/io.cc
+++ b/paddle/fluid/inference/io.cc
@@ -32,23 +32,11 @@ void ReadBinaryFile(const std::string& filename, std::string& contents) {
  inputfs.close();
 }
-bool IsParameter(const framework::VarDesc* var,
+bool IsPersistable(const framework::VarDesc* var) {
-                 const framework::ProgramDesc& main_program) {
+  if (var->Persistable() &&
-  if (var->Persistable()) {
+      var->GetType() != framework::proto::VarType::FEED_MINIBATCH &&
-    // There are many unreachable variables in the program
+      var->GetType() != framework::proto::VarType::FETCH_LIST) {
-    for (size_t i = 0; i < main_program.Size(); ++i) {
+    return true;
-      const framework::BlockDesc& block = main_program.Block(i);
-      for (auto* op : block.AllOps()) {
-        if (op->Type() == framework::kFeedOpType) {
-          continue;
-        }
-        for (auto input_argument_name : op->InputArgumentNames()) {
-          if (input_argument_name == var->Name()) {
-            return true;
-          }
-        }
-      }
-    }
  }
  return false;
 }
@@ -65,8 +53,8 @@ void LoadPersistables(framework::Executor& executor,
  std::vector<std::string> paramlist;
  for (auto* var : global_block.AllVars()) {
-    if (IsParameter(var, main_program)) {
+    if (IsPersistable(var)) {
-      VLOG(3) << "parameter's name: " << var->Name();
+      VLOG(3) << "persistable variable's name: " << var->Name();
      framework::VarDesc* new_var = load_block->Var(var->Name());
      new_var->SetShape(var->GetShape());
@@ -101,7 +89,6 @@ void LoadPersistables(framework::Executor& executor,
  executor.Run(*load_program, &scope, 0, true, true);
-  VLOG(3) << "Ran loading successfully";
  delete load_program;
 }

--- a/paddle/fluid/inference/tests/book/CMakeLists.txt
+++ b/paddle/fluid/inference/tests/book/CMakeLists.txt
@@ -30,5 +30,5 @@ inference_test(label_semantic_roles)
 inference_test(recognize_digits ARGS mlp conv)
 inference_test(recommender_system)
 #inference_test(rnn_encoder_decoder)
-inference_test(understand_sentiment)
+inference_test(understand_sentiment ARGS conv)
 inference_test(word2vec)
--- a/paddle/fluid/inference/tests/book/test_inference_label_semantic_roles.cc
+++ b/paddle/fluid/inference/tests/book/test_inference_label_semantic_roles.cc
@@ -32,16 +32,42 @@ TEST(inference, label_semantic_roles) {
  paddle::framework::LoDTensor word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1,
      ctx_p2, mark;
  paddle::framework::LoD lod{{0, 4, 10}};
+  int64_t word_dict_len = 44068;
-  SetupLoDTensor(word, lod, static_cast<int64_t>(0), static_cast<int64_t>(1));
+  int64_t predicate_dict_len = 3162;
-  SetupLoDTensor(
+  int64_t mark_dict_len = 2;
-      predicate, lod, static_cast<int64_t>(0), static_cast<int64_t>(1));
-  SetupLoDTensor(ctx_n2, lod, static_cast<int64_t>(0), static_cast<int64_t>(1));
+  SetupLoDTensor(word,
-  SetupLoDTensor(ctx_n1, lod, static_cast<int64_t>(0), static_cast<int64_t>(1));
+                 lod,
-  SetupLoDTensor(ctx_0, lod, static_cast<int64_t>(0), static_cast<int64_t>(1));
+                 static_cast<int64_t>(0),
-  SetupLoDTensor(ctx_p1, lod, static_cast<int64_t>(0), static_cast<int64_t>(1));
+                 static_cast<int64_t>(word_dict_len - 1));
-  SetupLoDTensor(ctx_p2, lod, static_cast<int64_t>(0), static_cast<int64_t>(1));
+  SetupLoDTensor(predicate,
-  SetupLoDTensor(mark, lod, static_cast<int64_t>(0), static_cast<int64_t>(1));
+                 lod,
+                 static_cast<int64_t>(0),
+                 static_cast<int64_t>(predicate_dict_len - 1));
+  SetupLoDTensor(ctx_n2,
+                 lod,
+                 static_cast<int64_t>(0),
+                 static_cast<int64_t>(word_dict_len - 1));
+  SetupLoDTensor(ctx_n1,
+                 lod,
+                 static_cast<int64_t>(0),
+                 static_cast<int64_t>(word_dict_len - 1));
+  SetupLoDTensor(ctx_0,
+                 lod,
+                 static_cast<int64_t>(0),
+                 static_cast<int64_t>(word_dict_len - 1));
+  SetupLoDTensor(ctx_p1,
+                 lod,
+                 static_cast<int64_t>(0),
+                 static_cast<int64_t>(word_dict_len - 1));
+  SetupLoDTensor(ctx_p2,
+                 lod,
+                 static_cast<int64_t>(0),
+                 static_cast<int64_t>(word_dict_len - 1));
+  SetupLoDTensor(mark,
+                 lod,
+                 static_cast<int64_t>(0),
+                 static_cast<int64_t>(mark_dict_len - 1));
  std::vector<paddle::framework::LoDTensor*> cpu_feeds;
  cpu_feeds.push_back(&word);

--- a/paddle/fluid/inference/tests/book/test_inference_understand_sentiment.cc
+++ b/paddle/fluid/inference/tests/book/test_inference_understand_sentiment.cc
@@ -31,7 +31,12 @@ TEST(inference, understand_sentiment) {
  paddle::framework::LoDTensor words;
  paddle::framework::LoD lod{{0, 4, 10}};
-  SetupLoDTensor(words, lod, static_cast<int64_t>(0), static_cast<int64_t>(10));
+  int64_t word_dict_len = 5147;
+  SetupLoDTensor(words,
+                 lod,
+                 static_cast<int64_t>(0),
+                 static_cast<int64_t>(word_dict_len - 1));
  std::vector<paddle::framework::LoDTensor*> cpu_feeds;
  cpu_feeds.push_back(&words);

--- a/paddle/fluid/inference/tests/book/test_inference_word2vec.cc
+++ b/paddle/fluid/inference/tests/book/test_inference_word2vec.cc
@@ -31,12 +31,12 @@ TEST(inference, word2vec) {
  paddle::framework::LoDTensor first_word, second_word, third_word, fourth_word;
  paddle::framework::LoD lod{{0, 1}};
-  int64_t dict_size = 2072;  // Hard-coding the size of dictionary
+  int64_t dict_size = 2073;  // The size of dictionary
-  SetupLoDTensor(first_word, lod, static_cast<int64_t>(0), dict_size);
+  SetupLoDTensor(first_word, lod, static_cast<int64_t>(0), dict_size - 1);
-  SetupLoDTensor(second_word, lod, static_cast<int64_t>(0), dict_size);
+  SetupLoDTensor(second_word, lod, static_cast<int64_t>(0), dict_size - 1);
-  SetupLoDTensor(third_word, lod, static_cast<int64_t>(0), dict_size);
+  SetupLoDTensor(third_word, lod, static_cast<int64_t>(0), dict_size - 1);
-  SetupLoDTensor(fourth_word, lod, static_cast<int64_t>(0), dict_size);
+  SetupLoDTensor(fourth_word, lod, static_cast<int64_t>(0), dict_size - 1);
  std::vector<paddle::framework::LoDTensor*> cpu_feeds;
  cpu_feeds.push_back(&first_word);

--- a/paddle/fluid/inference/tests/test_helper.h
+++ b/paddle/fluid/inference/tests/test_helper.h
@@ -101,8 +101,8 @@ void TestInference(const std::string& dirname,
  if (IsCombined) {
    // All parameters are saved in a single file.
    // Hard-coding the file names of program and parameters in unittest.
-    // Users are free to specify different filename
+    // The file names should be consistent with that used in Python API
-    // (provided: the filenames are changed in the python api as well: io.py)
+    //  `fluid.io.save_inference_model`.
    std::string prog_filename = "__model_combined__";
    std::string param_filename = "__params_combined__";
    inference_program = paddle::inference::Load(executor,

--- a/python/paddle/fluid/io.py
+++ b/python/paddle/fluid/io.py
@@ -68,7 +68,7 @@ def save_vars(executor,
              main_program=None,
              vars=None,
              predicate=None,
-              save_file_name=None):
+              filename=None):
    """
    Save variables to directory by executor.
@@ -80,8 +80,8 @@ def save_vars(executor,
    as a bool. If it returns true, the corresponding input variable will be saved.
    :param vars: variables need to be saved. If vars is specified, program & predicate
    will be ignored
-    :param save_file_name: The name of a single file that all vars are saved to. 
+    :param filename: The name of a single file that all vars are saved to.
-    If it is None, save variables to separate files.
+        If it is None, save variables to separate files.
    :return: None
    """
@@ -95,7 +95,7 @@ def save_vars(executor,
            executor,
            dirname=dirname,
            vars=filter(predicate, main_program.list_vars()),
-            save_file_name=save_file_name)
+            filename=filename)
    else:
        save_program = Program()
        save_block = save_program.global_block()
@@ -103,7 +103,7 @@ def save_vars(executor,
        save_var_map = {}
        for each_var in vars:
            new_var = _clone_var_in_block_(save_block, each_var)
-            if save_file_name is None:
+            if filename is None:
                save_block.append_op(
                    type='save',
                    inputs={'X': [new_var]},
@@ -112,7 +112,7 @@ def save_vars(executor,
            else:
                save_var_map[new_var.name] = new_var
-        if save_file_name is not None:
+        if filename is not None:
            save_var_list = []
            for name in sorted(save_var_map.keys()):
                save_var_list.append(save_var_map[name])
@@ -121,12 +121,12 @@ def save_vars(executor,
                type='save_combine',
                inputs={'X': save_var_list},
                outputs={},
-                attrs={'file_path': os.path.join(dirname, save_file_name)})
+                attrs={'file_path': os.path.join(dirname, filename)})
        executor.run(save_program)
-def save_params(executor, dirname, main_program=None, save_file_name=None):
+def save_params(executor, dirname, main_program=None, filename=None):
    """
    Save all parameters to directory with executor.
    """
@@ -136,11 +136,10 @@ def save_params(executor, dirname, main_program=None, save_file_name=None):
        main_program=main_program,
        vars=None,
        predicate=is_parameter,
-        save_file_name=save_file_name)
+        filename=filename)
-def save_persistables(executor, dirname, main_program=None,
+def save_persistables(executor, dirname, main_program=None, filename=None):
-                      save_file_name=None):
    """
    Save all persistables to directory with executor.
    """
@@ -150,7 +149,7 @@ def save_persistables(executor, dirname, main_program=None,
        main_program=main_program,
        vars=None,
        predicate=is_persistable,
-        save_file_name=save_file_name)
+        filename=filename)
 def load_vars(executor,
@@ -158,7 +157,7 @@ def load_vars(executor,
              main_program=None,
              vars=None,
              predicate=None,
-              load_file_name=None):
+              filename=None):
    """
    Load variables from directory by executor.
@@ -170,8 +169,8 @@ def load_vars(executor,
    as a bool. If it returns true, the corresponding input variable will be loaded.
    :param vars: variables need to be loaded. If vars is specified, program &
    predicate will be ignored
-    :param load_file_name: The name of the single file that all vars are loaded from.   
+    :param filename: The name of the single file that all vars are loaded from.
-    If it is None, load variables from separate files.
+        If it is None, load variables from separate files.
    :return: None
    """
@@ -185,7 +184,7 @@ def load_vars(executor,
            executor,
            dirname=dirname,
            vars=filter(predicate, main_program.list_vars()),
-            load_file_name=load_file_name)
+            filename=filename)
    else:
        load_prog = Program()
        load_block = load_prog.global_block()
@@ -194,7 +193,7 @@ def load_vars(executor,
        for each_var in vars:
            assert isinstance(each_var, Variable)
            new_var = _clone_var_in_block_(load_block, each_var)
-            if load_file_name is None:
+            if filename is None:
                load_block.append_op(
                    type='load',
                    inputs={},
@@ -203,7 +202,7 @@ def load_vars(executor,
            else:
                load_var_map[new_var.name] = new_var
-        if load_file_name is not None:
+        if filename is not None:
            load_var_list = []
            for name in sorted(load_var_map.keys()):
                load_var_list.append(load_var_map[name])
@@ -212,12 +211,12 @@ def load_vars(executor,
                type='load_combine',
                inputs={},
                outputs={"Out": load_var_list},
-                attrs={'file_path': os.path.join(dirname, load_file_name)})
+                attrs={'file_path': os.path.join(dirname, filename)})
        executor.run(load_prog)
-def load_params(executor, dirname, main_program=None, load_file_name=None):
+def load_params(executor, dirname, main_program=None, filename=None):
    """
    load all parameters from directory by executor.
    """
@@ -226,11 +225,10 @@ def load_params(executor, dirname, main_program=None, load_file_name=None):
        dirname=dirname,
        main_program=main_program,
        predicate=is_parameter,
-        load_file_name=load_file_name)
+        filename=filename)
-def load_persistables(executor, dirname, main_program=None,
+def load_persistables(executor, dirname, main_program=None, filename=None):
-                      load_file_name=None):
    """
    load all persistables from directory by executor.
    """
@@ -239,7 +237,7 @@ def load_persistables(executor, dirname, main_program=None,
        dirname=dirname,
        main_program=main_program,
        predicate=is_persistable,
-        load_file_name=load_file_name)
+        filename=filename)
 def get_inference_program(target_vars, main_program=None):
@@ -299,7 +297,8 @@ def save_inference_model(dirname,
                         target_vars,
                         executor,
                         main_program=None,
-                         save_file_name=None):
+                         model_filename=None,
+                         params_filename=None):
    """
    Build a model especially for inference,
    and save it to directory by the executor.
@@ -310,8 +309,11 @@ def save_inference_model(dirname,
    :param executor: executor that save inference model
    :param main_program: original program, which will be pruned to build the inference model.
            Default default_main_program().
-    :param save_file_name: The name of a single file that all parameters are saved to. 
+    :param model_filename: The name of file to save inference program.
-    If it is None, save parameters to separate files.
+        If not specified, default filename `__model__` will be used.
+    :param params_filename: The name of file to save parameters.
+        It is used for the case that all parameters are saved in a single binary file.
+        If not specified, parameters are considered saved in separate files.
    :return: None
    """
@@ -342,15 +344,19 @@ def save_inference_model(dirname,
    prepend_feed_ops(inference_program, feeded_var_names)
    append_fetch_ops(inference_program, fetch_var_names)
-    if save_file_name == None:
+    if model_filename is not None:
-        model_file_name = dirname + "/__model__"
+        model_filename = os.path.basename(model_filename)
    else:
-        model_file_name = dirname + "/__model_combined__"
+        model_filename = "__model__"
+    model_filename = os.path.join(dirname, model_filename)
-    with open(model_file_name, "wb") as f:
+    if params_filename is not None:
+        params_filename = os.path.basename(params_filename)
+    with open(model_filename, "wb") as f:
        f.write(inference_program.desc.serialize_to_string())
-    save_persistables(executor, dirname, inference_program, save_file_name)
+    save_persistables(executor, dirname, inference_program, params_filename)
 def get_feed_targets_names(program):
@@ -371,15 +377,21 @@ def get_fetch_targets_names(program):
    return fetch_targets_names
-def load_inference_model(dirname, executor, load_file_name=None):
+def load_inference_model(dirname,
+                         executor,
+                         model_filename=None,
+                         params_filename=None):
    """
    Load inference model from a directory
    :param dirname: directory path
    :param executor: executor that load inference model
-    :param load_file_name: The name of the single file that all parameters are loaded from.   
+    :param model_filename: The name of file to load inference program.
-    If it is None, load parameters from separate files.
+        If not specified, default filename `__model__` will be used.
+    :param params_filename: The name of file to load parameters.
+        It is used for the case that all parameters are saved in a single binary file.
+        If not specified, parameters are considered saved in separate files.
    :return: [program, feed_target_names, fetch_targets]
             program: program especially for inference.
             feed_target_names: Names of variables that need to feed data
@@ -388,16 +400,20 @@ def load_inference_model(dirname, executor, load_file_name=None):
    if not os.path.isdir(dirname):
        raise ValueError("There is no directory named '%s'", dirname)
-    if load_file_name == None:
+    if model_filename is not None:
-        model_file_name = dirname + "/__model__"
+        model_filename = os.path.basename(model_filename)
    else:
-        model_file_name = dirname + "/__model_combined__"
+        model_filename = "__model__"
+    model_filename = os.path.join(dirname, model_filename)
+    if params_filename is not None:
+        params_filename = os.path.basename(params_filename)
-    with open(model_file_name, "rb") as f:
+    with open(model_filename, "rb") as f:
        program_desc_str = f.read()
    program = Program.parse_from_string(program_desc_str)
-    load_persistables(executor, dirname, program, load_file_name)
+    load_persistables(executor, dirname, program, params_filename)
    feed_target_names = get_feed_targets_names(program)
    fetch_target_names = get_fetch_targets_names(program)

--- a/python/paddle/fluid/tests/book/notest_rnn_encoder_decoer.py
+++ b/python/paddle/fluid/tests/book/notest_rnn_encoder_decoer.py
@@ -228,32 +228,34 @@ def infer(use_cuda, save_dirname=None):
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
-    # Use fluid.io.load_inference_model to obtain the inference program desc,
+    inference_scope = fluid.core.Scope()
-    # the feed_target_names (the names of variables that will be feeded 
+    with fluid.scope_guard(inference_scope):
-    # data using feed operators), and the fetch_targets (variables that 
+        # Use fluid.io.load_inference_model to obtain the inference program desc,
-    # we want to obtain data from using fetch operators).
+        # the feed_target_names (the names of variables that will be feeded
-    [inference_program, feed_target_names,
+        # data using feed operators), and the fetch_targets (variables that
-     fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
+        # we want to obtain data from using fetch operators).
+        [inference_program, feed_target_names,
-    lod = [0, 4, 10]
+         fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
-    word_data = create_random_lodtensor(lod, place, low=0, high=1)
-    trg_word = create_random_lodtensor(lod, place, low=0, high=1)
+        lod = [0, 4, 10]
+        word_data = create_random_lodtensor(lod, place, low=0, high=1)
-    # Construct feed as a dictionary of {feed_target_name: feed_target_data}
+        trg_word = create_random_lodtensor(lod, place, low=0, high=1)
-    # and results will contain a list of data corresponding to fetch_targets.
-    assert feed_target_names[0] == 'source_sequence'
+        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
-    assert feed_target_names[1] == 'target_sequence'
+        # and results will contain a list of data corresponding to fetch_targets.
-    results = exe.run(inference_program,
+        assert feed_target_names[0] == 'source_sequence'
-                      feed={
+        assert feed_target_names[1] == 'target_sequence'
-                          feed_target_names[0]: word_data,
+        results = exe.run(inference_program,
-                          feed_target_names[1]: trg_word,
+                          feed={
-                      },
+                              feed_target_names[0]: word_data,
-                      fetch_list=fetch_targets,
+                              feed_target_names[1]: trg_word,
-                      return_numpy=False)
+                          },
-    print(results[0].lod())
+                          fetch_list=fetch_targets,
-    np_data = np.array(results[0])
+                          return_numpy=False)
-    print("Inference shape: ", np_data.shape)
+        print(results[0].lod())
-    print("Inference results: ", np_data)
+        np_data = np.array(results[0])
+        print("Inference shape: ", np_data.shape)
+        print("Inference results: ", np_data)
 def main(use_cuda):

--- a/python/paddle/fluid/tests/book/test_fit_a_line.py
+++ b/python/paddle/fluid/tests/book/test_fit_a_line.py
@@ -72,23 +72,26 @@ def infer(use_cuda, save_dirname=None):
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
-    # Use fluid.io.load_inference_model to obtain the inference program desc,
+    inference_scope = fluid.core.Scope()
-    # the feed_target_names (the names of variables that will be feeded 
+    with fluid.scope_guard(inference_scope):
-    # data using feed operators), and the fetch_targets (variables that 
+        # Use fluid.io.load_inference_model to obtain the inference program desc,
-    # we want to obtain data from using fetch operators).
+        # the feed_target_names (the names of variables that will be feeded
-    [inference_program, feed_target_names,
+        # data using feed operators), and the fetch_targets (variables that
-     fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
+        # we want to obtain data from using fetch operators).
+        [inference_program, feed_target_names,
-    # The input's dimension should be 2-D and the second dim is 13
+         fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
-    # The input data should be >= 0
-    batch_size = 10
+        # The input's dimension should be 2-D and the second dim is 13
-    tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
+        # The input data should be >= 0
-    assert feed_target_names[0] == 'x'
+        batch_size = 10
-    results = exe.run(inference_program,
+        tensor_x = numpy.random.uniform(0, 10,
-                      feed={feed_target_names[0]: tensor_x},
+                                        [batch_size, 13]).astype("float32")
-                      fetch_list=fetch_targets)
+        assert feed_target_names[0] == 'x'
-    print("infer shape: ", results[0].shape)
+        results = exe.run(inference_program,
-    print("infer results: ", results[0])
+                          feed={feed_target_names[0]: tensor_x},
+                          fetch_list=fetch_targets)
+        print("infer shape: ", results[0].shape)
+        print("infer results: ", results[0])
 def main(use_cuda):

--- a/python/paddle/fluid/tests/book/test_image_classification.py
+++ b/python/paddle/fluid/tests/book/test_image_classification.py
@@ -174,22 +174,26 @@ def infer(use_cuda, save_dirname=None):
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
-    # Use fluid.io.load_inference_model to obtain the inference program desc,
+    inference_scope = fluid.core.Scope()
-    # the feed_target_names (the names of variables that will be feeded 
+    with fluid.scope_guard(inference_scope):
-    # data using feed operators), and the fetch_targets (variables that 
+        # Use fluid.io.load_inference_model to obtain the inference program desc,
-    # we want to obtain data from using fetch operators).
+        # the feed_target_names (the names of variables that will be feeded
-    [inference_program, feed_target_names,
+        # data using feed operators), and the fetch_targets (variables that
-     fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
+        # we want to obtain data from using fetch operators).
+        [inference_program, feed_target_names,
-    # The input's dimension of conv should be 4-D or 5-D.
+         fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
-    tensor_img = numpy.random.rand(1, 3, 32, 32).astype("float32")
+        # The input's dimension of conv should be 4-D or 5-D.
-    # Construct feed as a dictionary of {feed_target_name: feed_target_data}
+        # Use normilized image pixels as input data, which should be in the range [0, 1.0].
-    # and results will contain a list of data corresponding to fetch_targets.
+        batch_size = 1
-    results = exe.run(inference_program,
+        tensor_img = numpy.random.rand(batch_size, 3, 32, 32).astype("float32")
-                      feed={feed_target_names[0]: tensor_img},
-                      fetch_list=fetch_targets)
+        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
-    print("infer results: ", results[0])
+        # and results will contain a list of data corresponding to fetch_targets.
+        results = exe.run(inference_program,
+                          feed={feed_target_names[0]: tensor_img},
+                          fetch_list=fetch_targets)
+        print("infer results: ", results[0])
 def main(net_type, use_cuda):

--- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py
+++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py
@@ -26,7 +26,7 @@ import unittest
 word_dict, verb_dict, label_dict = conll05.get_dict()
 word_dict_len = len(word_dict)
 label_dict_len = len(label_dict)
-pred_len = len(verb_dict)
+pred_dict_len = len(verb_dict)
 mark_dict_len = 2
 word_dim = 32
@@ -53,7 +53,7 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
    # 8 features
    predicate_embedding = fluid.layers.embedding(
        input=predicate,
-        size=[pred_len, word_dim],
+        size=[pred_dict_len, word_dim],
        dtype='float32',
        is_sparse=IS_SPARSE,
        param_attr='vemb')
@@ -234,6 +234,7 @@ def train(use_cuda, save_dirname=None):
                # Set the threshold low to speed up the CI test
                if float(pass_precision) > 0.05:
                    if save_dirname is not None:
+                        # TODO(liuyiqun): Change the target to crf_decode
                        fluid.io.save_inference_model(save_dirname, [
                            'word_data', 'verb_data', 'ctx_n2_data',
                            'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data',
@@ -251,51 +252,60 @@ def infer(use_cuda, save_dirname=None):
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
-    # Use fluid.io.load_inference_model to obtain the inference program desc,
+    inference_scope = fluid.core.Scope()
-    # the feed_target_names (the names of variables that will be feeded 
+    with fluid.scope_guard(inference_scope):
-    # data using feed operators), and the fetch_targets (variables that 
+        # Use fluid.io.load_inference_model to obtain the inference program desc,
-    # we want to obtain data from using fetch operators).
+        # the feed_target_names (the names of variables that will be feeded
-    [inference_program, feed_target_names,
+        # data using feed operators), and the fetch_targets (variables that
-     fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
+        # we want to obtain data from using fetch operators).
+        [inference_program, feed_target_names,
-    lod = [0, 4, 10]
+         fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
-    ts_word = create_random_lodtensor(lod, place, low=0, high=1)
-    ts_pred = create_random_lodtensor(lod, place, low=0, high=1)
+        lod = [0, 4, 10]
-    ts_ctx_n2 = create_random_lodtensor(lod, place, low=0, high=1)
+        word = create_random_lodtensor(
-    ts_ctx_n1 = create_random_lodtensor(lod, place, low=0, high=1)
+            lod, place, low=0, high=word_dict_len - 1)
-    ts_ctx_0 = create_random_lodtensor(lod, place, low=0, high=1)
+        pred = create_random_lodtensor(
-    ts_ctx_p1 = create_random_lodtensor(lod, place, low=0, high=1)
+            lod, place, low=0, high=pred_dict_len - 1)
-    ts_ctx_p2 = create_random_lodtensor(lod, place, low=0, high=1)
+        ctx_n2 = create_random_lodtensor(
-    ts_mark = create_random_lodtensor(lod, place, low=0, high=1)
+            lod, place, low=0, high=word_dict_len - 1)
+        ctx_n1 = create_random_lodtensor(
-    # Construct feed as a dictionary of {feed_target_name: feed_target_data}
+            lod, place, low=0, high=word_dict_len - 1)
-    # and results will contain a list of data corresponding to fetch_targets.
+        ctx_0 = create_random_lodtensor(
-    assert feed_target_names[0] == 'word_data'
+            lod, place, low=0, high=word_dict_len - 1)
-    assert feed_target_names[1] == 'verb_data'
+        ctx_p1 = create_random_lodtensor(
-    assert feed_target_names[2] == 'ctx_n2_data'
+            lod, place, low=0, high=word_dict_len - 1)
-    assert feed_target_names[3] == 'ctx_n1_data'
+        ctx_p2 = create_random_lodtensor(
-    assert feed_target_names[4] == 'ctx_0_data'
+            lod, place, low=0, high=word_dict_len - 1)
-    assert feed_target_names[5] == 'ctx_p1_data'
+        mark = create_random_lodtensor(
-    assert feed_target_names[6] == 'ctx_p2_data'
+            lod, place, low=0, high=mark_dict_len - 1)
-    assert feed_target_names[7] == 'mark_data'
+        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
-    results = exe.run(inference_program,
+        # and results will contain a list of data corresponding to fetch_targets.
-                      feed={
+        assert feed_target_names[0] == 'word_data'
-                          feed_target_names[0]: ts_word,
+        assert feed_target_names[1] == 'verb_data'
-                          feed_target_names[1]: ts_pred,
+        assert feed_target_names[2] == 'ctx_n2_data'
-                          feed_target_names[2]: ts_ctx_n2,
+        assert feed_target_names[3] == 'ctx_n1_data'
-                          feed_target_names[3]: ts_ctx_n1,
+        assert feed_target_names[4] == 'ctx_0_data'
-                          feed_target_names[4]: ts_ctx_0,
+        assert feed_target_names[5] == 'ctx_p1_data'
-                          feed_target_names[5]: ts_ctx_p1,
+        assert feed_target_names[6] == 'ctx_p2_data'
-                          feed_target_names[6]: ts_ctx_p2,
+        assert feed_target_names[7] == 'mark_data'
-                          feed_target_names[7]: ts_mark
-                      },
+        results = exe.run(inference_program,
-                      fetch_list=fetch_targets,
+                          feed={
-                      return_numpy=False)
+                              feed_target_names[0]: word,
-    print(results[0].lod())
+                              feed_target_names[1]: pred,
-    np_data = np.array(results[0])
+                              feed_target_names[2]: ctx_n2,
-    print("Inference Shape: ", np_data.shape)
+                              feed_target_names[3]: ctx_n1,
-    print("Inference results: ", np_data)
+                              feed_target_names[4]: ctx_0,
+                              feed_target_names[5]: ctx_p1,
+                              feed_target_names[6]: ctx_p2,
+                              feed_target_names[7]: mark
+                          },
+                          fetch_list=fetch_targets,
+                          return_numpy=False)
+        print(results[0].lod())
+        np_data = np.array(results[0])
+        print("Inference Shape: ", np_data.shape)
 def main(use_cuda):

--- a/python/paddle/fluid/tests/book/test_recognize_digits.py
+++ b/python/paddle/fluid/tests/book/test_recognize_digits.py
@@ -78,7 +78,12 @@ def conv_net(img, label):
    return loss_net(conv_pool_2, label)
-def train(nn_type, use_cuda, parallel, save_dirname, save_param_filename):
+def train(nn_type,
+          use_cuda,
+          parallel,
+          save_dirname=None,
+          model_filename=None,
+          params_filename=None):
    if use_cuda and not fluid.core.is_compiled_with_cuda():
        return
    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
@@ -146,7 +151,8 @@ def train(nn_type, use_cuda, parallel, save_dirname, save_param_filename):
                        fluid.io.save_inference_model(
                            save_dirname, ["img"], [prediction],
                            exe,
-                            save_file_name=save_param_filename)
+                            model_filename=model_filename,
+                            params_filename=params_filename)
                    return
                else:
                    print(
@@ -158,54 +164,62 @@ def train(nn_type, use_cuda, parallel, save_dirname, save_param_filename):
    raise AssertionError("Loss of recognize digits is too large")
-def infer(use_cuda, save_dirname=None, param_filename=None):
+def infer(use_cuda,
+          save_dirname=None,
+          model_filename=None,
+          params_filename=None):
    if save_dirname is None:
        return
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
-    # Use fluid.io.load_inference_model to obtain the inference program desc,
+    inference_scope = fluid.core.Scope()
-    # the feed_target_names (the names of variables that will be feeded 
+    with fluid.scope_guard(inference_scope):
-    # data using feed operators), and the fetch_targets (variables that 
+        # Use fluid.io.load_inference_model to obtain the inference program desc,
-    # we want to obtain data from using fetch operators).
+        # the feed_target_names (the names of variables that will be feeded
-    [inference_program, feed_target_names, fetch_targets
+        # data using feed operators), and the fetch_targets (variables that
-     ] = fluid.io.load_inference_model(save_dirname, exe, param_filename)
+        # we want to obtain data from using fetch operators).
+        [inference_program, feed_target_names,
-    # The input's dimension of conv should be 4-D or 5-D.
+         fetch_targets] = fluid.io.load_inference_model(
-    # Use normilized image pixels as input data, which should be in the range [-1.0, 1.0].
+             save_dirname, exe, model_filename, params_filename)
-    batch_size = 1
-    tensor_img = numpy.random.uniform(-1.0, 1.0,
+        # The input's dimension of conv should be 4-D or 5-D.
-                                      [batch_size, 1, 28, 28]).astype("float32")
+        # Use normilized image pixels as input data, which should be in the range [-1.0, 1.0].
+        batch_size = 1
-    # Construct feed as a dictionary of {feed_target_name: feed_target_data}
+        tensor_img = numpy.random.uniform(
-    # and results will contain a list of data corresponding to fetch_targets.
+            -1.0, 1.0, [batch_size, 1, 28, 28]).astype("float32")
-    results = exe.run(inference_program,
-                      feed={feed_target_names[0]: tensor_img},
+        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
-                      fetch_list=fetch_targets)
+        # and results will contain a list of data corresponding to fetch_targets.
-    print("infer results: ", results[0])
+        results = exe.run(inference_program,
+                          feed={feed_target_names[0]: tensor_img},
+                          fetch_list=fetch_targets)
+        print("infer results: ", results[0])
 def main(use_cuda, parallel, nn_type, combine):
+    save_dirname = None
+    model_filename = None
+    params_filename = None
    if not use_cuda and not parallel:
        save_dirname = "recognize_digits_" + nn_type + ".inference.model"
-        save_filename = None
        if combine == True:
-            save_filename = "__params_combined__"
+            model_filename = "__model_combined__"
-    else:
+            params_filename = "__params_combined__"
-        save_dirname = None
-        save_filename = None
    train(
        nn_type=nn_type,
        use_cuda=use_cuda,
        parallel=parallel,
        save_dirname=save_dirname,
-        save_param_filename=save_filename)
+        model_filename=model_filename,
+        params_filename=params_filename)
    infer(
        use_cuda=use_cuda,
        save_dirname=save_dirname,
-        param_filename=save_filename)
+        model_filename=model_filename,
+        params_filename=params_filename)
 class TestRecognizeDigits(unittest.TestCase):

--- a/python/paddle/fluid/tests/book/test_recommender_system.py
+++ b/python/paddle/fluid/tests/book/test_recommender_system.py
@@ -251,13 +251,6 @@ def infer(use_cuda, save_dirname=None):
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
-    # Use fluid.io.load_inference_model to obtain the inference program desc,
-    # the feed_target_names (the names of variables that will be feeded
-    # data using feed operators), and the fetch_targets (variables that
-    # we want to obtain data from using fetch operators).
-    [inference_program, feed_target_names,
-     fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
    def create_lod_tensor(data, lod=None):
        tensor = fluid.LoDTensor()
        if lod is None:
@@ -275,44 +268,53 @@ def infer(use_cuda, save_dirname=None):
        tensor.set(flattened_data, place)
        return tensor
-    # Use the first data from paddle.dataset.movielens.test() as input
+    inference_scope = fluid.core.Scope()
-    assert feed_target_names[0] == "user_id"
+    with fluid.scope_guard(inference_scope):
-    user_id = create_lod_tensor([[1]])
+        # Use fluid.io.load_inference_model to obtain the inference program desc,
+        # the feed_target_names (the names of variables that will be feeded
-    assert feed_target_names[1] == "gender_id"
+        # data using feed operators), and the fetch_targets (variables that
-    gender_id = create_lod_tensor([[1]])
+        # we want to obtain data from using fetch operators).
+        [inference_program, feed_target_names,
-    assert feed_target_names[2] == "age_id"
+         fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
-    age_id = create_lod_tensor([[0]])
+        # Use the first data from paddle.dataset.movielens.test() as input
-    assert feed_target_names[3] == "job_id"
+        assert feed_target_names[0] == "user_id"
-    job_id = create_lod_tensor([[10]])
+        user_id = create_lod_tensor([[1]])
-    assert feed_target_names[4] == "movie_id"
+        assert feed_target_names[1] == "gender_id"
-    movie_id = create_lod_tensor([[783]])
+        gender_id = create_lod_tensor([[1]])
-    assert feed_target_names[5] == "category_id"
+        assert feed_target_names[2] == "age_id"
-    category_id = create_lod_tensor([[10], [8], [9]], [[0, 3]])
+        age_id = create_lod_tensor([[0]])
-    assert feed_target_names[6] == "movie_title"
+        assert feed_target_names[3] == "job_id"
-    movie_title = create_lod_tensor([[1069], [4140], [2923], [710], [988]],
+        job_id = create_lod_tensor([[10]])
-                                    [[0, 5]])
+        assert feed_target_names[4] == "movie_id"
-    # Construct feed as a dictionary of {feed_target_name: feed_target_data}
+        movie_id = create_lod_tensor([[783]])
-    # and results will contain a list of data corresponding to fetch_targets.
-    results = exe.run(inference_program,
+        assert feed_target_names[5] == "category_id"
-                      feed={
+        category_id = create_lod_tensor([[10], [8], [9]], [[0, 3]])
-                          feed_target_names[0]: user_id,
-                          feed_target_names[1]: gender_id,
+        assert feed_target_names[6] == "movie_title"
-                          feed_target_names[2]: age_id,
+        movie_title = create_lod_tensor([[1069], [4140], [2923], [710], [988]],
-                          feed_target_names[3]: job_id,
+                                        [[0, 5]])
-                          feed_target_names[4]: movie_id,
-                          feed_target_names[5]: category_id,
+        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
-                          feed_target_names[6]: movie_title
+        # and results will contain a list of data corresponding to fetch_targets.
-                      },
+        results = exe.run(inference_program,
-                      fetch_list=fetch_targets,
+                          feed={
-                      return_numpy=False)
+                              feed_target_names[0]: user_id,
-    print("inferred score: ", np.array(results[0]))
+                              feed_target_names[1]: gender_id,
+                              feed_target_names[2]: age_id,
+                              feed_target_names[3]: job_id,
+                              feed_target_names[4]: movie_id,
+                              feed_target_names[5]: category_id,
+                              feed_target_names[6]: movie_title
+                          },
+                          fetch_list=fetch_targets,
+                          return_numpy=False)
+        print("inferred score: ", np.array(results[0]))
 def main(use_cuda):

--- a/python/paddle/fluid/tests/book/test_understand_sentiment.py
+++ b/python/paddle/fluid/tests/book/test_understand_sentiment.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -193,36 +193,39 @@ def train(word_dict, net_method, use_cuda, parallel=False, save_dirname=None):
        net_method.__name__))
-def infer(use_cuda, save_dirname=None):
+def infer(word_dict, use_cuda, save_dirname=None):
    if save_dirname is None:
        return
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
-    # Use fluid.io.load_inference_model to obtain the inference program desc,
+    inference_scope = fluid.core.Scope()
-    # the feed_target_names (the names of variables that will be feeded 
+    with fluid.scope_guard(inference_scope):
-    # data using feed operators), and the fetch_targets (variables that 
+        # Use fluid.io.load_inference_model to obtain the inference program desc,
-    # we want to obtain data from using fetch operators).
+        # the feed_target_names (the names of variables that will be feeded
-    [inference_program, feed_target_names,
+        # data using feed operators), and the fetch_targets (variables that
-     fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
+        # we want to obtain data from using fetch operators).
+        [inference_program, feed_target_names,
-    lod = [0, 4, 10]
+         fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
-    word_dict = paddle.dataset.imdb.word_dict()
-    tensor_words = create_random_lodtensor(
+        word_dict_len = len(word_dict)
-        lod, place, low=0, high=len(word_dict) - 1)
+        lod = [0, 4, 10]
-    # Construct feed as a dictionary of {feed_target_name: feed_target_data}
+        tensor_words = create_random_lodtensor(
-    # and results will contain a list of data corresponding to fetch_targets.
+            lod, place, low=0, high=word_dict_len - 1)
-    assert feed_target_names[0] == "words"
-    results = exe.run(inference_program,
+        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
-                      feed={feed_target_names[0]: tensor_words},
+        # and results will contain a list of data corresponding to fetch_targets.
-                      fetch_list=fetch_targets,
+        assert feed_target_names[0] == "words"
-                      return_numpy=False)
+        results = exe.run(inference_program,
-    print(results[0].lod())
+                          feed={feed_target_names[0]: tensor_words},
-    np_data = np.array(results[0])
+                          fetch_list=fetch_targets,
-    print("Inference Shape: ", np_data.shape)
+                          return_numpy=False)
-    print("Inference results: ", np_data)
+        print(results[0].lod())
+        np_data = np.array(results[0])
+        print("Inference Shape: ", np_data.shape)
+        print("Inference results: ", np_data)
 def main(word_dict, net_method, use_cuda, parallel=False, save_dirname=None):
@@ -258,7 +261,7 @@ class TestUnderstandSentiment(unittest.TestCase):
                self.word_dict,
                net_method=convolution_net,
                use_cuda=False,
-                save_dirname="understand_sentiment.inference.model")
+                save_dirname="understand_sentiment_conv.inference.model")
    def test_conv_cpu_parallel(self):
        with self.new_program_scope():
@@ -271,7 +274,11 @@ class TestUnderstandSentiment(unittest.TestCase):
    @unittest.skip(reason="make CI faster")
    def test_stacked_lstm_cpu(self):
        with self.new_program_scope():
-            main(self.word_dict, net_method=stacked_lstm_net, use_cuda=False)
+            main(
+                self.word_dict,
+                net_method=stacked_lstm_net,
+                use_cuda=False,
+                save_dirname="understand_sentiment_stacked_lstm.inference.model")
    def test_stacked_lstm_cpu_parallel(self):
        with self.new_program_scope():
@@ -287,7 +294,7 @@ class TestUnderstandSentiment(unittest.TestCase):
                self.word_dict,
                net_method=convolution_net,
                use_cuda=True,
-                save_dirname="understand_sentiment.inference.model")
+                save_dirname="understand_sentiment_conv.inference.model")
    def test_conv_gpu_parallel(self):
        with self.new_program_scope():
@@ -300,7 +307,11 @@ class TestUnderstandSentiment(unittest.TestCase):
    @unittest.skip(reason="make CI faster")
    def test_stacked_lstm_gpu(self):
        with self.new_program_scope():
-            main(self.word_dict, net_method=stacked_lstm_net, use_cuda=True)
+            main(
+                self.word_dict,
+                net_method=stacked_lstm_net,
+                use_cuda=True,
+                save_dirname="understand_sentiment_stacked_lstm.inference.model")
    def test_stacked_lstm_gpu_parallel(self):
        with self.new_program_scope():

--- a/python/paddle/fluid/tests/book/test_word2vec.py
+++ b/python/paddle/fluid/tests/book/test_word2vec.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-# # Licensed under the Apache License, Version 2.0 (the "License");
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
@@ -21,6 +22,7 @@ import sys
 def create_random_lodtensor(lod, place, low, high):
+    # The range of data elements is [low, high]
    data = np.random.random_integers(low, high, [lod[-1], 1]).astype("int64")
    res = fluid.LoDTensor()
    res.set(data, place)
@@ -28,54 +30,7 @@ def create_random_lodtensor(lod, place, low, high):
    return res
-def infer(use_cuda, save_dirname=None):
+def train(use_cuda, is_sparse, is_parallel, save_dirname):
-    if save_dirname is None:
-        return
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    exe = fluid.Executor(place)
-    # Use fluid.io.load_inference_model to obtain the inference program desc,
-    # the feed_target_names (the names of variables that will be feeded 
-    # data using feed operators), and the fetch_targets (variables that 
-    # we want to obtain data from using fetch operators).
-    [inference_program, feed_target_names,
-     fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
-    word_dict = paddle.dataset.imikolov.build_dict()
-    dict_size = len(word_dict) - 1
-    # Setup input, by creating 4 words, and setting up lod required for 
-    # lookup_table_op
-    lod = [0, 1]
-    first_word = create_random_lodtensor(lod, place, low=0, high=dict_size)
-    second_word = create_random_lodtensor(lod, place, low=0, high=dict_size)
-    third_word = create_random_lodtensor(lod, place, low=0, high=dict_size)
-    fourth_word = create_random_lodtensor(lod, place, low=0, high=dict_size)
-    assert feed_target_names[0] == 'firstw'
-    assert feed_target_names[1] == 'secondw'
-    assert feed_target_names[2] == 'thirdw'
-    assert feed_target_names[3] == 'forthw'
-    # Construct feed as a dictionary of {feed_target_name: feed_target_data}
-    # and results will contain a list of data corresponding to fetch_targets.
-    results = exe.run(inference_program,
-                      feed={
-                          feed_target_names[0]: first_word,
-                          feed_target_names[1]: second_word,
-                          feed_target_names[2]: third_word,
-                          feed_target_names[3]: fourth_word
-                      },
-                      fetch_list=fetch_targets,
-                      return_numpy=False)
-    print(results[0].lod())
-    np_data = np.array(results[0])
-    print("Inference Shape: ", np_data.shape)
-    print("Inference results: ", np_data)
-def train(use_cuda, is_sparse, parallel, save_dirname):
    PASS_NUM = 100
    EMBED_SIZE = 32
    HIDDEN_SIZE = 256
@@ -130,7 +85,7 @@ def train(use_cuda, is_sparse, parallel, save_dirname):
    forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64')
    next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
-    if not parallel:
+    if not is_parallel:
        avg_cost, predict_word = __network__(
            [first_word, second_word, third_word, forth_word, next_word])
    else:
@@ -176,11 +131,67 @@ def train(use_cuda, is_sparse, parallel, save_dirname):
    raise AssertionError("Cost is too large {0:2.2}".format(avg_cost_np[0]))
-def main(use_cuda, is_sparse, parallel):
+def infer(use_cuda, save_dirname=None):
+    if save_dirname is None:
+        return
+    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    inference_scope = fluid.core.Scope()
+    with fluid.scope_guard(inference_scope):
+        # Use fluid.io.load_inference_model to obtain the inference program desc,
+        # the feed_target_names (the names of variables that will be feeded
+        # data using feed operators), and the fetch_targets (variables that
+        # we want to obtain data from using fetch operators).
+        [inference_program, feed_target_names,
+         fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
+        word_dict = paddle.dataset.imikolov.build_dict()
+        dict_size = len(word_dict)
+        # Setup inputs, by creating 4 words, the lod of which should be [0, 1]
+        lod = [0, 1]
+        first_word = create_random_lodtensor(
+            lod, place, low=0, high=dict_size - 1)
+        second_word = create_random_lodtensor(
+            lod, place, low=0, high=dict_size - 1)
+        third_word = create_random_lodtensor(
+            lod, place, low=0, high=dict_size - 1)
+        fourth_word = create_random_lodtensor(
+            lod, place, low=0, high=dict_size - 1)
+        assert feed_target_names[0] == 'firstw'
+        assert feed_target_names[1] == 'secondw'
+        assert feed_target_names[2] == 'thirdw'
+        assert feed_target_names[3] == 'forthw'
+        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
+        # and results will contain a list of data corresponding to fetch_targets.
+        results = exe.run(inference_program,
+                          feed={
+                              feed_target_names[0]: first_word,
+                              feed_target_names[1]: second_word,
+                              feed_target_names[2]: third_word,
+                              feed_target_names[3]: fourth_word
+                          },
+                          fetch_list=fetch_targets,
+                          return_numpy=False)
+        print(results[0].lod())
+        np_data = np.array(results[0])
+        print("Inference Shape: ", np_data.shape)
+def main(use_cuda, is_sparse, is_parallel):
    if use_cuda and not fluid.core.is_compiled_with_cuda():
        return
-    save_dirname = "word2vec.inference.model"
-    train(use_cuda, is_sparse, parallel, save_dirname)
+    if not is_parallel:
+        save_dirname = "word2vec.inference.model"
+    else:
+        save_dirname = None
+    train(use_cuda, is_sparse, is_parallel, save_dirname)
    infer(use_cuda, save_dirname)
@@ -193,10 +204,10 @@ class W2VTest(unittest.TestCase):
    pass
-def inject_test_method(use_cuda, is_sparse, parallel):
+def inject_test_method(use_cuda, is_sparse, is_parallel):
    fn_name = "test_{0}_{1}_{2}".format("cuda" if use_cuda else "cpu", "sparse"
                                        if is_sparse else "dense", "parallel"
-                                        if parallel else "normal")
+                                        if is_parallel else "normal")
    def __impl__(*args, **kwargs):
        prog = fluid.Program()
@@ -204,10 +215,12 @@ def inject_test_method(use_cuda, is_sparse, parallel):
        scope = fluid.core.Scope()
        with fluid.scope_guard(scope):
            with fluid.program_guard(prog, startup_prog):
-                main(use_cuda=use_cuda, is_sparse=is_sparse, parallel=parallel)
+                main(
+                    use_cuda=use_cuda,
+                    is_sparse=is_sparse,
+                    is_parallel=is_parallel)
-    # run only 2 cases: use_cuda is either True or False
+    if use_cuda and is_sparse:
-    if is_sparse == False and parallel == False:
        fn = __impl__
    else:
        # skip the other test when on CI server
@@ -219,8 +232,8 @@ def inject_test_method(use_cuda, is_sparse, parallel):
 for use_cuda in (False, True):
    for is_sparse in (False, True):
-        for parallel in (False, True):
+        for is_parallel in (False, True):
-            inject_test_method(use_cuda, is_sparse, parallel)
+            inject_test_method(use_cuda, is_sparse, is_parallel)
 if __name__ == '__main__':
    unittest.main()