From c796e013c6c4505306f8397d03a370f388924886 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Sun, 11 Feb 2018 09:51:02 +0000 Subject: [PATCH] Refine the inference unittests. --- paddle/fluid/framework/lod_tensor.cc | 8 +- .../tests/book/test_inference_word2vec.cc | 10 +- .../tests/book/test_image_classification.py | 4 +- .../tests/book/test_label_semantic_roles.py | 37 ++--- .../v2/fluid/tests/book/test_word2vec.py | 127 +++++++++--------- 5 files changed, 101 insertions(+), 85 deletions(-) diff --git a/paddle/fluid/framework/lod_tensor.cc b/paddle/fluid/framework/lod_tensor.cc index 05c67e453d0..70a2a652664 100644 --- a/paddle/fluid/framework/lod_tensor.cc +++ b/paddle/fluid/framework/lod_tensor.cc @@ -31,8 +31,14 @@ std::ostream &operator<<(std::ostream &os, const LoD &lod) { os << "{"; for (auto &v : lod) { os << "{"; + bool is_first = true; for (auto &i : v) { - os << i << ","; + if (is_first) { + os << i; + is_first = false; + } else { + os << ", " << i; + } } os << "}"; } diff --git a/paddle/fluid/inference/tests/book/test_inference_word2vec.cc b/paddle/fluid/inference/tests/book/test_inference_word2vec.cc index 93376b6824d..a62b0a37c69 100644 --- a/paddle/fluid/inference/tests/book/test_inference_word2vec.cc +++ b/paddle/fluid/inference/tests/book/test_inference_word2vec.cc @@ -31,12 +31,12 @@ TEST(inference, word2vec) { paddle::framework::LoDTensor first_word, second_word, third_word, fourth_word; paddle::framework::LoD lod{{0, 1}}; - int64_t dict_size = 2072; // Hard-coding the size of dictionary + int64_t dict_size = 2073; // The size of dictionary - SetupLoDTensor(first_word, lod, static_cast(0), dict_size); - SetupLoDTensor(second_word, lod, static_cast(0), dict_size); - SetupLoDTensor(third_word, lod, static_cast(0), dict_size); - SetupLoDTensor(fourth_word, lod, static_cast(0), dict_size); + SetupLoDTensor(first_word, lod, static_cast(0), dict_size - 1); + SetupLoDTensor(second_word, lod, static_cast(0), dict_size - 1); + SetupLoDTensor(third_word, lod, static_cast(0), dict_size - 1); + SetupLoDTensor(fourth_word, lod, static_cast(0), dict_size - 1); std::vector cpu_feeds; cpu_feeds.push_back(&first_word); diff --git a/python/paddle/v2/fluid/tests/book/test_image_classification.py b/python/paddle/v2/fluid/tests/book/test_image_classification.py index ffbe5bdbd64..4b764ee3b3a 100644 --- a/python/paddle/v2/fluid/tests/book/test_image_classification.py +++ b/python/paddle/v2/fluid/tests/book/test_image_classification.py @@ -182,7 +182,9 @@ def infer(use_cuda, save_dirname=None): fetch_targets] = fluid.io.load_inference_model(save_dirname, exe) # The input's dimension of conv should be 4-D or 5-D. - tensor_img = numpy.random.rand(1, 3, 32, 32).astype("float32") + # Use normilized image pixels as input data, which should be in the range [0, 1.0]. + batch_size = 1 + tensor_img = numpy.random.rand(batch_size, 3, 32, 32).astype("float32") # Construct feed as a dictionary of {feed_target_name: feed_target_data} # and results will contain a list of data corresponding to fetch_targets. diff --git a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py index f33e81186bd..f5fb3ed36d5 100644 --- a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py @@ -26,7 +26,7 @@ import unittest word_dict, verb_dict, label_dict = conll05.get_dict() word_dict_len = len(word_dict) label_dict_len = len(label_dict) -pred_len = len(verb_dict) +pred_dict_len = len(verb_dict) mark_dict_len = 2 word_dim = 32 @@ -53,7 +53,7 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, # 8 features predicate_embedding = fluid.layers.embedding( input=predicate, - size=[pred_len, word_dim], + size=[pred_dict_len, word_dim], dtype='float32', is_sparse=IS_SPARSE, param_attr='vemb') @@ -234,6 +234,7 @@ def train(use_cuda, save_dirname=None): # Set the threshold low to speed up the CI test if float(pass_precision) > 0.05: if save_dirname is not None: + # TODO(liuyiqun): Change the target to crf_decode fluid.io.save_inference_model(save_dirname, [ 'word_data', 'verb_data', 'ctx_n2_data', 'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data', @@ -259,14 +260,14 @@ def infer(use_cuda, save_dirname=None): fetch_targets] = fluid.io.load_inference_model(save_dirname, exe) lod = [0, 4, 10] - ts_word = create_random_lodtensor(lod, place, low=0, high=1) - ts_pred = create_random_lodtensor(lod, place, low=0, high=1) - ts_ctx_n2 = create_random_lodtensor(lod, place, low=0, high=1) - ts_ctx_n1 = create_random_lodtensor(lod, place, low=0, high=1) - ts_ctx_0 = create_random_lodtensor(lod, place, low=0, high=1) - ts_ctx_p1 = create_random_lodtensor(lod, place, low=0, high=1) - ts_ctx_p2 = create_random_lodtensor(lod, place, low=0, high=1) - ts_mark = create_random_lodtensor(lod, place, low=0, high=1) + word = create_random_lodtensor(lod, place, low=0, high=word_dict_len - 1) + pred = create_random_lodtensor(lod, place, low=0, high=pred_dict_len - 1) + ctx_n2 = create_random_lodtensor(lod, place, low=0, high=word_dict_len - 1) + ctx_n1 = create_random_lodtensor(lod, place, low=0, high=word_dict_len - 1) + ctx_0 = create_random_lodtensor(lod, place, low=0, high=word_dict_len - 1) + ctx_p1 = create_random_lodtensor(lod, place, low=0, high=word_dict_len - 1) + ctx_p2 = create_random_lodtensor(lod, place, low=0, high=word_dict_len - 1) + mark = create_random_lodtensor(lod, place, low=0, high=mark_dict_len - 1) # Construct feed as a dictionary of {feed_target_name: feed_target_data} # and results will contain a list of data corresponding to fetch_targets. @@ -281,14 +282,14 @@ def infer(use_cuda, save_dirname=None): results = exe.run(inference_program, feed={ - feed_target_names[0]: ts_word, - feed_target_names[1]: ts_pred, - feed_target_names[2]: ts_ctx_n2, - feed_target_names[3]: ts_ctx_n1, - feed_target_names[4]: ts_ctx_0, - feed_target_names[5]: ts_ctx_p1, - feed_target_names[6]: ts_ctx_p2, - feed_target_names[7]: ts_mark + feed_target_names[0]: word, + feed_target_names[1]: pred, + feed_target_names[2]: ctx_n2, + feed_target_names[3]: ctx_n1, + feed_target_names[4]: ctx_0, + feed_target_names[5]: ctx_p1, + feed_target_names[6]: ctx_p2, + feed_target_names[7]: mark }, fetch_list=fetch_targets, return_numpy=False) diff --git a/python/paddle/v2/fluid/tests/book/test_word2vec.py b/python/paddle/v2/fluid/tests/book/test_word2vec.py index 69bfbcee69a..d30f6230851 100644 --- a/python/paddle/v2/fluid/tests/book/test_word2vec.py +++ b/python/paddle/v2/fluid/tests/book/test_word2vec.py @@ -1,5 +1,6 @@ # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# # Licensed under the Apache License, Version 2.0 (the "License"); +# +# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # @@ -21,6 +22,7 @@ import sys def create_random_lodtensor(lod, place, low, high): + # The range of data elements is [low, high] data = np.random.random_integers(low, high, [lod[-1], 1]).astype("int64") res = fluid.LoDTensor() res.set(data, place) @@ -28,54 +30,7 @@ def create_random_lodtensor(lod, place, low, high): return res -def infer(use_cuda, save_dirname=None): - if save_dirname is None: - return - - place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - exe = fluid.Executor(place) - - # Use fluid.io.load_inference_model to obtain the inference program desc, - # the feed_target_names (the names of variables that will be feeded - # data using feed operators), and the fetch_targets (variables that - # we want to obtain data from using fetch operators). - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model(save_dirname, exe) - - word_dict = paddle.dataset.imikolov.build_dict() - dict_size = len(word_dict) - 1 - - # Setup input, by creating 4 words, and setting up lod required for - # lookup_table_op - lod = [0, 1] - first_word = create_random_lodtensor(lod, place, low=0, high=dict_size) - second_word = create_random_lodtensor(lod, place, low=0, high=dict_size) - third_word = create_random_lodtensor(lod, place, low=0, high=dict_size) - fourth_word = create_random_lodtensor(lod, place, low=0, high=dict_size) - - assert feed_target_names[0] == 'firstw' - assert feed_target_names[1] == 'secondw' - assert feed_target_names[2] == 'thirdw' - assert feed_target_names[3] == 'forthw' - - # Construct feed as a dictionary of {feed_target_name: feed_target_data} - # and results will contain a list of data corresponding to fetch_targets. - results = exe.run(inference_program, - feed={ - feed_target_names[0]: first_word, - feed_target_names[1]: second_word, - feed_target_names[2]: third_word, - feed_target_names[3]: fourth_word - }, - fetch_list=fetch_targets, - return_numpy=False) - print(results[0].lod()) - np_data = np.array(results[0]) - print("Inference Shape: ", np_data.shape) - print("Inference results: ", np_data) - - -def train(use_cuda, is_sparse, parallel, save_dirname): +def train(use_cuda, is_sparse, is_parallel, save_dirname): PASS_NUM = 100 EMBED_SIZE = 32 HIDDEN_SIZE = 256 @@ -130,7 +85,7 @@ def train(use_cuda, is_sparse, parallel, save_dirname): forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64') next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64') - if not parallel: + if not is_parallel: avg_cost, predict_word = __network__( [first_word, second_word, third_word, forth_word, next_word]) else: @@ -176,11 +131,61 @@ def train(use_cuda, is_sparse, parallel, save_dirname): raise AssertionError("Cost is too large {0:2.2}".format(avg_cost_np[0])) -def main(use_cuda, is_sparse, parallel): +def infer(use_cuda, save_dirname=None): + if save_dirname is None: + return + + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + exe = fluid.Executor(place) + + # Use fluid.io.load_inference_model to obtain the inference program desc, + # the feed_target_names (the names of variables that will be feeded + # data using feed operators), and the fetch_targets (variables that + # we want to obtain data from using fetch operators). + [inference_program, feed_target_names, + fetch_targets] = fluid.io.load_inference_model(save_dirname, exe) + + word_dict = paddle.dataset.imikolov.build_dict() + dict_size = len(word_dict) + + # Setup inputs, by creating 4 words, the lod of which should be [0, 1] + lod = [0, 1] + first_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1) + second_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1) + third_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1) + fourth_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1) + + assert feed_target_names[0] == 'firstw' + assert feed_target_names[1] == 'secondw' + assert feed_target_names[2] == 'thirdw' + assert feed_target_names[3] == 'forthw' + + # Construct feed as a dictionary of {feed_target_name: feed_target_data} + # and results will contain a list of data corresponding to fetch_targets. + results = exe.run(inference_program, + feed={ + feed_target_names[0]: first_word, + feed_target_names[1]: second_word, + feed_target_names[2]: third_word, + feed_target_names[3]: fourth_word + }, + fetch_list=fetch_targets, + return_numpy=False) + print(results[0].lod()) + np_data = np.array(results[0]) + print("Inference Shape: ", np_data.shape) + + +def main(use_cuda, is_sparse, is_parallel): if use_cuda and not fluid.core.is_compiled_with_cuda(): return - save_dirname = "word2vec.inference.model" - train(use_cuda, is_sparse, parallel, save_dirname) + + if not is_parallel: + save_dirname = "word2vec.inference.model" + else: + save_dirname = None + + train(use_cuda, is_sparse, is_parallel, save_dirname) infer(use_cuda, save_dirname) @@ -193,10 +198,10 @@ class W2VTest(unittest.TestCase): pass -def inject_test_method(use_cuda, is_sparse, parallel): +def inject_test_method(use_cuda, is_sparse, is_parallel): fn_name = "test_{0}_{1}_{2}".format("cuda" if use_cuda else "cpu", "sparse" if is_sparse else "dense", "parallel" - if parallel else "normal") + if is_parallel else "normal") def __impl__(*args, **kwargs): prog = fluid.Program() @@ -204,10 +209,12 @@ def inject_test_method(use_cuda, is_sparse, parallel): scope = fluid.core.Scope() with fluid.scope_guard(scope): with fluid.program_guard(prog, startup_prog): - main(use_cuda=use_cuda, is_sparse=is_sparse, parallel=parallel) + main( + use_cuda=use_cuda, + is_sparse=is_sparse, + is_parallel=is_parallel) - # run only 2 cases: use_cuda is either True or False - if is_sparse == False and parallel == False: + if use_cuda and is_sparse: fn = __impl__ else: # skip the other test when on CI server @@ -219,8 +226,8 @@ def inject_test_method(use_cuda, is_sparse, parallel): for use_cuda in (False, True): for is_sparse in (False, True): - for parallel in (False, True): - inject_test_method(use_cuda, is_sparse, parallel) + for is_parallel in (False, True): + inject_test_method(use_cuda, is_sparse, is_parallel) if __name__ == '__main__': unittest.main() -- GitLab