From c796e013c6c4505306f8397d03a370f388924886 Mon Sep 17 00:00:00 2001
From: Liu Yiqun <liuyiqun01@baidu.com>
Date: Sun, 11 Feb 2018 09:51:02 +0000
Subject: [PATCH] Refine the inference unittests.

---
 paddle/fluid/framework/lod_tensor.cc          |   8 +-
 .../tests/book/test_inference_word2vec.cc     |  10 +-
 .../tests/book/test_image_classification.py   |   4 +-
 .../tests/book/test_label_semantic_roles.py   |  37 ++---
 .../v2/fluid/tests/book/test_word2vec.py      | 127 +++++++++---------
 5 files changed, 101 insertions(+), 85 deletions(-)

diff --git a/paddle/fluid/framework/lod_tensor.cc b/paddle/fluid/framework/lod_tensor.cc
index 05c67e453d0..70a2a652664 100644
--- a/paddle/fluid/framework/lod_tensor.cc
+++ b/paddle/fluid/framework/lod_tensor.cc
@@ -31,8 +31,14 @@ std::ostream &operator<<(std::ostream &os, const LoD &lod) {
   os << "{";
   for (auto &v : lod) {
     os << "{";
+    bool is_first = true;
     for (auto &i : v) {
-      os << i << ",";
+      if (is_first) {
+        os << i;
+        is_first = false;
+      } else {
+        os << ", " << i;
+      }
     }
     os << "}";
   }
diff --git a/paddle/fluid/inference/tests/book/test_inference_word2vec.cc b/paddle/fluid/inference/tests/book/test_inference_word2vec.cc
index 93376b6824d..a62b0a37c69 100644
--- a/paddle/fluid/inference/tests/book/test_inference_word2vec.cc
+++ b/paddle/fluid/inference/tests/book/test_inference_word2vec.cc
@@ -31,12 +31,12 @@ TEST(inference, word2vec) {
 
   paddle::framework::LoDTensor first_word, second_word, third_word, fourth_word;
   paddle::framework::LoD lod{{0, 1}};
-  int64_t dict_size = 2072;  // Hard-coding the size of dictionary
+  int64_t dict_size = 2073;  // The size of dictionary
 
-  SetupLoDTensor(first_word, lod, static_cast<int64_t>(0), dict_size);
-  SetupLoDTensor(second_word, lod, static_cast<int64_t>(0), dict_size);
-  SetupLoDTensor(third_word, lod, static_cast<int64_t>(0), dict_size);
-  SetupLoDTensor(fourth_word, lod, static_cast<int64_t>(0), dict_size);
+  SetupLoDTensor(first_word, lod, static_cast<int64_t>(0), dict_size - 1);
+  SetupLoDTensor(second_word, lod, static_cast<int64_t>(0), dict_size - 1);
+  SetupLoDTensor(third_word, lod, static_cast<int64_t>(0), dict_size - 1);
+  SetupLoDTensor(fourth_word, lod, static_cast<int64_t>(0), dict_size - 1);
 
   std::vector<paddle::framework::LoDTensor*> cpu_feeds;
   cpu_feeds.push_back(&first_word);
diff --git a/python/paddle/v2/fluid/tests/book/test_image_classification.py b/python/paddle/v2/fluid/tests/book/test_image_classification.py
index ffbe5bdbd64..4b764ee3b3a 100644
--- a/python/paddle/v2/fluid/tests/book/test_image_classification.py
+++ b/python/paddle/v2/fluid/tests/book/test_image_classification.py
@@ -182,7 +182,9 @@ def infer(use_cuda, save_dirname=None):
      fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
 
     # The input's dimension of conv should be 4-D or 5-D.
-    tensor_img = numpy.random.rand(1, 3, 32, 32).astype("float32")
+    # Use normilized image pixels as input data, which should be in the range [0, 1.0].
+    batch_size = 1
+    tensor_img = numpy.random.rand(batch_size, 3, 32, 32).astype("float32")
 
     # Construct feed as a dictionary of {feed_target_name: feed_target_data}
     # and results will contain a list of data corresponding to fetch_targets.
diff --git a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
index f33e81186bd..f5fb3ed36d5 100644
--- a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
+++ b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
@@ -26,7 +26,7 @@ import unittest
 word_dict, verb_dict, label_dict = conll05.get_dict()
 word_dict_len = len(word_dict)
 label_dict_len = len(label_dict)
-pred_len = len(verb_dict)
+pred_dict_len = len(verb_dict)
 
 mark_dict_len = 2
 word_dim = 32
@@ -53,7 +53,7 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
     # 8 features
     predicate_embedding = fluid.layers.embedding(
         input=predicate,
-        size=[pred_len, word_dim],
+        size=[pred_dict_len, word_dim],
         dtype='float32',
         is_sparse=IS_SPARSE,
         param_attr='vemb')
@@ -234,6 +234,7 @@ def train(use_cuda, save_dirname=None):
                 # Set the threshold low to speed up the CI test
                 if float(pass_precision) > 0.05:
                     if save_dirname is not None:
+                        # TODO(liuyiqun): Change the target to crf_decode
                         fluid.io.save_inference_model(save_dirname, [
                             'word_data', 'verb_data', 'ctx_n2_data',
                             'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data',
@@ -259,14 +260,14 @@ def infer(use_cuda, save_dirname=None):
      fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
 
     lod = [0, 4, 10]
-    ts_word = create_random_lodtensor(lod, place, low=0, high=1)
-    ts_pred = create_random_lodtensor(lod, place, low=0, high=1)
-    ts_ctx_n2 = create_random_lodtensor(lod, place, low=0, high=1)
-    ts_ctx_n1 = create_random_lodtensor(lod, place, low=0, high=1)
-    ts_ctx_0 = create_random_lodtensor(lod, place, low=0, high=1)
-    ts_ctx_p1 = create_random_lodtensor(lod, place, low=0, high=1)
-    ts_ctx_p2 = create_random_lodtensor(lod, place, low=0, high=1)
-    ts_mark = create_random_lodtensor(lod, place, low=0, high=1)
+    word = create_random_lodtensor(lod, place, low=0, high=word_dict_len - 1)
+    pred = create_random_lodtensor(lod, place, low=0, high=pred_dict_len - 1)
+    ctx_n2 = create_random_lodtensor(lod, place, low=0, high=word_dict_len - 1)
+    ctx_n1 = create_random_lodtensor(lod, place, low=0, high=word_dict_len - 1)
+    ctx_0 = create_random_lodtensor(lod, place, low=0, high=word_dict_len - 1)
+    ctx_p1 = create_random_lodtensor(lod, place, low=0, high=word_dict_len - 1)
+    ctx_p2 = create_random_lodtensor(lod, place, low=0, high=word_dict_len - 1)
+    mark = create_random_lodtensor(lod, place, low=0, high=mark_dict_len - 1)
 
     # Construct feed as a dictionary of {feed_target_name: feed_target_data}
     # and results will contain a list of data corresponding to fetch_targets.
@@ -281,14 +282,14 @@ def infer(use_cuda, save_dirname=None):
 
     results = exe.run(inference_program,
                       feed={
-                          feed_target_names[0]: ts_word,
-                          feed_target_names[1]: ts_pred,
-                          feed_target_names[2]: ts_ctx_n2,
-                          feed_target_names[3]: ts_ctx_n1,
-                          feed_target_names[4]: ts_ctx_0,
-                          feed_target_names[5]: ts_ctx_p1,
-                          feed_target_names[6]: ts_ctx_p2,
-                          feed_target_names[7]: ts_mark
+                          feed_target_names[0]: word,
+                          feed_target_names[1]: pred,
+                          feed_target_names[2]: ctx_n2,
+                          feed_target_names[3]: ctx_n1,
+                          feed_target_names[4]: ctx_0,
+                          feed_target_names[5]: ctx_p1,
+                          feed_target_names[6]: ctx_p2,
+                          feed_target_names[7]: mark
                       },
                       fetch_list=fetch_targets,
                       return_numpy=False)
diff --git a/python/paddle/v2/fluid/tests/book/test_word2vec.py b/python/paddle/v2/fluid/tests/book/test_word2vec.py
index 69bfbcee69a..d30f6230851 100644
--- a/python/paddle/v2/fluid/tests/book/test_word2vec.py
+++ b/python/paddle/v2/fluid/tests/book/test_word2vec.py
@@ -1,5 +1,6 @@
 #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-# # Licensed under the Apache License, Version 2.0 (the "License");
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
@@ -21,6 +22,7 @@ import sys
 
 
 def create_random_lodtensor(lod, place, low, high):
+    # The range of data elements is [low, high]
     data = np.random.random_integers(low, high, [lod[-1], 1]).astype("int64")
     res = fluid.LoDTensor()
     res.set(data, place)
@@ -28,54 +30,7 @@ def create_random_lodtensor(lod, place, low, high):
     return res
 
 
-def infer(use_cuda, save_dirname=None):
-    if save_dirname is None:
-        return
-
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    exe = fluid.Executor(place)
-
-    # Use fluid.io.load_inference_model to obtain the inference program desc,
-    # the feed_target_names (the names of variables that will be feeded 
-    # data using feed operators), and the fetch_targets (variables that 
-    # we want to obtain data from using fetch operators).
-    [inference_program, feed_target_names,
-     fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
-
-    word_dict = paddle.dataset.imikolov.build_dict()
-    dict_size = len(word_dict) - 1
-
-    # Setup input, by creating 4 words, and setting up lod required for 
-    # lookup_table_op
-    lod = [0, 1]
-    first_word = create_random_lodtensor(lod, place, low=0, high=dict_size)
-    second_word = create_random_lodtensor(lod, place, low=0, high=dict_size)
-    third_word = create_random_lodtensor(lod, place, low=0, high=dict_size)
-    fourth_word = create_random_lodtensor(lod, place, low=0, high=dict_size)
-
-    assert feed_target_names[0] == 'firstw'
-    assert feed_target_names[1] == 'secondw'
-    assert feed_target_names[2] == 'thirdw'
-    assert feed_target_names[3] == 'forthw'
-
-    # Construct feed as a dictionary of {feed_target_name: feed_target_data}
-    # and results will contain a list of data corresponding to fetch_targets.
-    results = exe.run(inference_program,
-                      feed={
-                          feed_target_names[0]: first_word,
-                          feed_target_names[1]: second_word,
-                          feed_target_names[2]: third_word,
-                          feed_target_names[3]: fourth_word
-                      },
-                      fetch_list=fetch_targets,
-                      return_numpy=False)
-    print(results[0].lod())
-    np_data = np.array(results[0])
-    print("Inference Shape: ", np_data.shape)
-    print("Inference results: ", np_data)
-
-
-def train(use_cuda, is_sparse, parallel, save_dirname):
+def train(use_cuda, is_sparse, is_parallel, save_dirname):
     PASS_NUM = 100
     EMBED_SIZE = 32
     HIDDEN_SIZE = 256
@@ -130,7 +85,7 @@ def train(use_cuda, is_sparse, parallel, save_dirname):
     forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64')
     next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
 
-    if not parallel:
+    if not is_parallel:
         avg_cost, predict_word = __network__(
             [first_word, second_word, third_word, forth_word, next_word])
     else:
@@ -176,11 +131,61 @@ def train(use_cuda, is_sparse, parallel, save_dirname):
     raise AssertionError("Cost is too large {0:2.2}".format(avg_cost_np[0]))
 
 
-def main(use_cuda, is_sparse, parallel):
+def infer(use_cuda, save_dirname=None):
+    if save_dirname is None:
+        return
+
+    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+
+    # Use fluid.io.load_inference_model to obtain the inference program desc,
+    # the feed_target_names (the names of variables that will be feeded
+    # data using feed operators), and the fetch_targets (variables that
+    # we want to obtain data from using fetch operators).
+    [inference_program, feed_target_names,
+     fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
+
+    word_dict = paddle.dataset.imikolov.build_dict()
+    dict_size = len(word_dict)
+
+    # Setup inputs, by creating 4 words, the lod of which should be [0, 1]
+    lod = [0, 1]
+    first_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1)
+    second_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1)
+    third_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1)
+    fourth_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1)
+
+    assert feed_target_names[0] == 'firstw'
+    assert feed_target_names[1] == 'secondw'
+    assert feed_target_names[2] == 'thirdw'
+    assert feed_target_names[3] == 'forthw'
+
+    # Construct feed as a dictionary of {feed_target_name: feed_target_data}
+    # and results will contain a list of data corresponding to fetch_targets.
+    results = exe.run(inference_program,
+                      feed={
+                          feed_target_names[0]: first_word,
+                          feed_target_names[1]: second_word,
+                          feed_target_names[2]: third_word,
+                          feed_target_names[3]: fourth_word
+                      },
+                      fetch_list=fetch_targets,
+                      return_numpy=False)
+    print(results[0].lod())
+    np_data = np.array(results[0])
+    print("Inference Shape: ", np_data.shape)
+
+
+def main(use_cuda, is_sparse, is_parallel):
     if use_cuda and not fluid.core.is_compiled_with_cuda():
         return
-    save_dirname = "word2vec.inference.model"
-    train(use_cuda, is_sparse, parallel, save_dirname)
+
+    if not is_parallel:
+        save_dirname = "word2vec.inference.model"
+    else:
+        save_dirname = None
+
+    train(use_cuda, is_sparse, is_parallel, save_dirname)
     infer(use_cuda, save_dirname)
 
 
@@ -193,10 +198,10 @@ class W2VTest(unittest.TestCase):
     pass
 
 
-def inject_test_method(use_cuda, is_sparse, parallel):
+def inject_test_method(use_cuda, is_sparse, is_parallel):
     fn_name = "test_{0}_{1}_{2}".format("cuda" if use_cuda else "cpu", "sparse"
                                         if is_sparse else "dense", "parallel"
-                                        if parallel else "normal")
+                                        if is_parallel else "normal")
 
     def __impl__(*args, **kwargs):
         prog = fluid.Program()
@@ -204,10 +209,12 @@ def inject_test_method(use_cuda, is_sparse, parallel):
         scope = fluid.core.Scope()
         with fluid.scope_guard(scope):
             with fluid.program_guard(prog, startup_prog):
-                main(use_cuda=use_cuda, is_sparse=is_sparse, parallel=parallel)
+                main(
+                    use_cuda=use_cuda,
+                    is_sparse=is_sparse,
+                    is_parallel=is_parallel)
 
-    # run only 2 cases: use_cuda is either True or False
-    if is_sparse == False and parallel == False:
+    if use_cuda and is_sparse:
         fn = __impl__
     else:
         # skip the other test when on CI server
@@ -219,8 +226,8 @@ def inject_test_method(use_cuda, is_sparse, parallel):
 
 for use_cuda in (False, True):
     for is_sparse in (False, True):
-        for parallel in (False, True):
-            inject_test_method(use_cuda, is_sparse, parallel)
+        for is_parallel in (False, True):
+            inject_test_method(use_cuda, is_sparse, is_parallel)
 
 if __name__ == '__main__':
     unittest.main()
-- 
GitLab