remove old inference C++ tests (#24368)

63da846d · Tao Luo · GitHub · aa0f254f · 63da846d · aa0f254f
11 changed file
--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@@ -55,12 +55,8 @@ endif()
 # C inference API
 add_subdirectory(capi)
-if(WITH_TESTING)
+if(WITH_TESTING AND WITH_INFERENCE_API_TEST)
-  # tests/book depends the models that generated by python/paddle/fluid/tests/book
-  add_subdirectory(tests/book)
-  if(WITH_INFERENCE_API_TEST)
    add_subdirectory(tests/api)
-  endif()
 endif()
 if(NOT ON_INFER)

--- a/paddle/fluid/inference/tests/book/CMakeLists.txt
+++ b/paddle/fluid/inference/tests/book/CMakeLists.txt
-function(inference_test TARGET_NAME)
-  set(options "")
-  set(oneValueArgs "")
-  set(multiValueArgs ARGS)
-  cmake_parse_arguments(inference_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
-  set(arg_list "")
-  if(inference_test_ARGS)
-    foreach(arg ${inference_test_ARGS})
-      list(APPEND arg_list "_${arg}")
-    endforeach()
-  else()
-    list(APPEND arg_list "_")
-  endif()
-  foreach(arg ${arg_list})
-    string(REGEX REPLACE "^_$" "" arg "${arg}")
-    cc_test(test_inference_${TARGET_NAME}${arg}
-        SRCS test_inference_${TARGET_NAME}.cc
-        DEPS paddle_fluid_api
-        ARGS --dirname=${PYTHON_TESTS_DIR}/book/${TARGET_NAME}${arg}.inference.model)
-    set_tests_properties(test_inference_${TARGET_NAME}${arg}
-            PROPERTIES DEPENDS test_${TARGET_NAME})
-    set_tests_properties(test_inference_${TARGET_NAME}${arg}
-            PROPERTIES LABELS "RUN_TYPE=DIST")
-  endforeach()
-endfunction(inference_test)
-####################
-# Inference tests here depend on fluid/tests/book. If users want to run
-# individual test with ctest, they need to run tests in fluid/tests/book
-# first to generate saved model.
-####################
-# This unittest is buggy!
-#inference_test(fit_a_line)
-inference_test(image_classification ARGS vgg resnet)
-inference_test(label_semantic_roles)
-inference_test(recognize_digits ARGS mlp conv)
-inference_test(recommender_system)
-#inference_test(rnn_encoder_decoder)
-#inference_test(understand_sentiment ARGS conv)
-inference_test(word2vec)
-# This is an unly work around to make this test run
-# TODO(TJ): clean me up
-cc_test(test_inference_nlp
-  SRCS test_inference_nlp.cc
-  DEPS paddle_fluid_api
-  ARGS
-  --model_path=${PADDLE_BINARY_DIR}/python/paddle/fluid/tests/book/recognize_digits_mlp.inference.model)
-set_tests_properties(test_inference_nlp PROPERTIES LABELS "RUN_TYPE=DIST")
--- a/paddle/fluid/inference/tests/book/test_inference_fit_a_line.cc
+++ b/paddle/fluid/inference/tests/book/test_inference_fit_a_line.cc
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "gflags/gflags.h"
-#include "gtest/gtest.h"
-#include "paddle/fluid/inference/tests/test_helper.h"
-#include "paddle/fluid/inference/tests/test_multi_thread_helper.h"
-DEFINE_string(dirname, "", "Directory of the inference model.");
-TEST(inference, fit_a_line) {
-  if (FLAGS_dirname.empty()) {
-    LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model";
-  }
-  LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl;
-  std::string dirname = FLAGS_dirname;
-  // 0. Call `paddle::framework::InitDevices()` initialize all the devices
-  // In unittests, this is done in paddle/testing/paddle_gtest_main.cc
-  for (int num_threads : {1, 2}) {
-    std::vector<std::vector<paddle::framework::LoDTensor*>> cpu_feeds;
-    cpu_feeds.resize(num_threads);
-    for (int i = 0; i < num_threads; ++i) {
-      auto* input = new paddle::framework::LoDTensor();
-      // The second dim of the input tensor should be 13
-      // The input data should be >= 0
-      int64_t batch_size = 10;
-      SetupTensor<float>(input, {batch_size, 13}, static_cast<float>(0),
-                         static_cast<float>(10));
-      cpu_feeds[i].push_back(input);
-    }
-    std::vector<std::vector<paddle::framework::FetchType*>> cpu_fetchs1;
-    cpu_fetchs1.resize(num_threads);
-    for (int i = 0; i < num_threads; ++i) {
-      auto* output = new paddle::framework::FetchType();
-      cpu_fetchs1[i].push_back(output);
-    }
-    // Run inference on CPU
-    LOG(INFO) << "--- CPU Runs (num_threads: " << num_threads << "): ---";
-    if (num_threads == 1) {
-      TestInference<paddle::platform::CPUPlace>(dirname, cpu_feeds[0],
-                                                cpu_fetchs1[0]);
-    } else {
-      TestMultiThreadInference<paddle::platform::CPUPlace>(
-          dirname, cpu_feeds, cpu_fetchs1, num_threads);
-    }
-#ifdef PADDLE_WITH_CUDA
-    std::vector<std::vector<paddle::framework::FetchType*>> cpu_fetchs2;
-    cpu_fetchs2.resize(num_threads);
-    for (int i = 0; i < num_threads; ++i) {
-      auto* output = new paddle::framework::FetchType();
-      cpu_fetchs2[i].push_back(output);
-    }
-    // Run inference on CUDA GPU
-    LOG(INFO) << "--- GPU Runs (num_threads: " << num_threads << "): ---";
-    if (num_threads == 1) {
-      TestInference<paddle::platform::CUDAPlace>(dirname, cpu_feeds[0],
-                                                 cpu_fetchs2[0]);
-    } else {
-      TestMultiThreadInference<paddle::platform::CUDAPlace>(
-          dirname, cpu_feeds, cpu_fetchs2, num_threads);
-    }
-    for (int i = 0; i < num_threads; ++i) {
-      CheckError<float>(
-          BOOST_GET(paddle::framework::LoDTensor, *cpu_fetchs1[i][0]),
-          BOOST_GET(paddle::framework::LoDTensor, *cpu_fetchs2[i][0]));
-      delete cpu_fetchs2[i][0];
-    }
-#endif
-    for (int i = 0; i < num_threads; ++i) {
-      delete cpu_feeds[i][0];
-      delete cpu_fetchs1[i][0];
-    }
-  }  // num_threads-loop
-}
--- a/paddle/fluid/inference/tests/book/test_inference_image_classification.cc
+++ b/paddle/fluid/inference/tests/book/test_inference_image_classification.cc
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "gflags/gflags.h"
-#include "gtest/gtest.h"
-#include "paddle/fluid/inference/tests/test_helper.h"
-DEFINE_string(dirname, "", "Directory of the inference model.");
-DEFINE_string(fp16_dirname, "", "Directory of the float16 inference model.");
-DEFINE_int32(batch_size, 1, "Batch size of input data");
-DEFINE_int32(repeat, 1, "Running the inference program repeat times");
-DEFINE_bool(skip_cpu, false, "Skip the cpu test");
-TEST(inference, image_classification) {
-  if (FLAGS_dirname.empty() || FLAGS_batch_size < 1 || FLAGS_repeat < 1) {
-    LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model "
-                  "--batch_size=1 --repeat=1";
-  }
-  LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl;
-  std::string dirname = FLAGS_dirname;
-  // 0. Call `paddle::framework::InitDevices()` initialize all the devices
-  // In unittests, this is done in paddle/testing/paddle_gtest_main.cc
-  const bool is_combined = false;
-  std::vector<std::vector<int64_t>> feed_target_shapes =
-      GetFeedTargetShapes(dirname, is_combined);
-  paddle::framework::LoDTensor input;
-  // Use normilized image pixels as input data,
-  // which should be in the range [0.0, 1.0].
-  feed_target_shapes[0][0] = FLAGS_batch_size;
-  paddle::framework::DDim input_dims =
-      paddle::framework::make_ddim(feed_target_shapes[0]);
-  LOG(INFO) << input_dims;
-  SetupTensor<float>(&input, input_dims, static_cast<float>(0),
-                     static_cast<float>(1));
-  std::vector<paddle::framework::LoDTensor*> cpu_feeds;
-  cpu_feeds.push_back(&input);
-  paddle::framework::FetchType output1;
-  if (!FLAGS_skip_cpu) {
-    std::vector<paddle::framework::FetchType*> cpu_fetchs1;
-    cpu_fetchs1.push_back(&output1);
-    // Run inference on CPU
-    LOG(INFO) << "--- CPU Runs: ---";
-    LOG(INFO) << "Batch size is " << FLAGS_batch_size;
-    TestInference<paddle::platform::CPUPlace, false, true>(
-        dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, is_combined);
-    LOG(INFO) << BOOST_GET(paddle::framework::LoDTensor, output1).dims();
-  }
-#ifdef PADDLE_WITH_CUDA
-  paddle::framework::FetchType output2;
-  std::vector<paddle::framework::FetchType*> cpu_fetchs2;
-  cpu_fetchs2.push_back(&output2);
-  // Run inference on CUDA GPU
-  LOG(INFO) << "--- GPU Runs: ---";
-  LOG(INFO) << "Batch size is " << FLAGS_batch_size;
-  TestInference<paddle::platform::CUDAPlace, false, true>(
-      dirname, cpu_feeds, cpu_fetchs2, FLAGS_repeat, is_combined);
-  LOG(INFO) << BOOST_GET(paddle::framework::LoDTensor, output2).dims();
-  if (!FLAGS_skip_cpu) {
-    CheckError<float>(BOOST_GET(paddle::framework::LoDTensor, output1),
-                      BOOST_GET(paddle::framework::LoDTensor, output2));
-  }
-  // float16 inference requires cuda GPUs with >= 5.3 compute capability
-  if (!FLAGS_fp16_dirname.empty() &&
-      paddle::platform::GetCUDAComputeCapability(0) >= 53) {
-    paddle::framework::FetchType output3;
-    std::vector<paddle::framework::FetchType*> cpu_fetchs3;
-    cpu_fetchs3.push_back(&output3);
-    LOG(INFO) << "--- GPU Runs in float16 mode: ---";
-    LOG(INFO) << "Batch size is " << FLAGS_batch_size;
-    TestInference<paddle::platform::CUDAPlace, false, true>(
-        FLAGS_fp16_dirname, cpu_feeds, cpu_fetchs3, FLAGS_repeat);
-    CheckError<float>(BOOST_GET(paddle::framework::LoDTensor, output2),
-                      BOOST_GET(paddle::framework::LoDTensor, output3));
-  }
-#endif
-}
--- a/paddle/fluid/inference/tests/book/test_inference_label_semantic_roles.cc
+++ b/paddle/fluid/inference/tests/book/test_inference_label_semantic_roles.cc
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "gflags/gflags.h"
-#include "gtest/gtest.h"
-#include "paddle/fluid/inference/tests/test_helper.h"
-DEFINE_string(dirname, "", "Directory of the inference model.");
-TEST(inference, label_semantic_roles) {
-  if (FLAGS_dirname.empty()) {
-    LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model";
-  }
-  LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl;
-  std::string dirname = FLAGS_dirname;
-  // 0. Call `paddle::framework::InitDevices()` initialize all the devices
-  // In unittests, this is done in paddle/testing/paddle_gtest_main.cc
-  paddle::framework::LoDTensor word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1,
-      ctx_p2, mark;
-  paddle::framework::LoD lod{{0, 4, 10}};
-  int64_t word_dict_len = 44068;
-  int64_t predicate_dict_len = 3162;
-  int64_t mark_dict_len = 2;
-  SetupLoDTensor(&word, lod, static_cast<int64_t>(0),
-                 static_cast<int64_t>(word_dict_len - 1));
-  SetupLoDTensor(&predicate, lod, static_cast<int64_t>(0),
-                 static_cast<int64_t>(predicate_dict_len - 1));
-  SetupLoDTensor(&ctx_n2, lod, static_cast<int64_t>(0),
-                 static_cast<int64_t>(word_dict_len - 1));
-  SetupLoDTensor(&ctx_n1, lod, static_cast<int64_t>(0),
-                 static_cast<int64_t>(word_dict_len - 1));
-  SetupLoDTensor(&ctx_0, lod, static_cast<int64_t>(0),
-                 static_cast<int64_t>(word_dict_len - 1));
-  SetupLoDTensor(&ctx_p1, lod, static_cast<int64_t>(0),
-                 static_cast<int64_t>(word_dict_len - 1));
-  SetupLoDTensor(&ctx_p2, lod, static_cast<int64_t>(0),
-                 static_cast<int64_t>(word_dict_len - 1));
-  SetupLoDTensor(&mark, lod, static_cast<int64_t>(0),
-                 static_cast<int64_t>(mark_dict_len - 1));
-  std::vector<paddle::framework::LoDTensor*> cpu_feeds;
-  cpu_feeds.push_back(&word);
-  cpu_feeds.push_back(&predicate);
-  cpu_feeds.push_back(&ctx_n2);
-  cpu_feeds.push_back(&ctx_n1);
-  cpu_feeds.push_back(&ctx_0);
-  cpu_feeds.push_back(&ctx_p1);
-  cpu_feeds.push_back(&ctx_p2);
-  cpu_feeds.push_back(&mark);
-  paddle::framework::FetchType output1;
-  std::vector<paddle::framework::FetchType*> cpu_fetchs1;
-  cpu_fetchs1.push_back(&output1);
-  // Run inference on CPU
-  TestInference<paddle::platform::CPUPlace>(dirname, cpu_feeds, cpu_fetchs1);
-  auto output1_tensor = BOOST_GET(paddle::framework::LoDTensor, output1);
-  LOG(INFO) << output1_tensor.lod();
-  LOG(INFO) << output1_tensor.dims();
-#ifdef PADDLE_WITH_CUDA
-  paddle::framework::FetchType output2;
-  std::vector<paddle::framework::FetchType*> cpu_fetchs2;
-  cpu_fetchs2.push_back(&output2);
-  // Run inference on CUDA GPU
-  TestInference<paddle::platform::CUDAPlace>(dirname, cpu_feeds, cpu_fetchs2);
-  auto output2_tensor = BOOST_GET(paddle::framework::LoDTensor, output2);
-  LOG(INFO) << output2_tensor.lod();
-  LOG(INFO) << output2_tensor.dims();
-  CheckError<float>(output1_tensor, output2_tensor);
-#endif
-}
--- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc
+++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include <time.h>
-#include <fstream>
-#include <thread>  // NOLINT
-#include "gflags/gflags.h"
-#include "gtest/gtest.h"
-#include "paddle/fluid/inference/tests/test_helper.h"
-#include "paddle/fluid/platform/cpu_helper.h"
-#include "paddle/fluid/framework/feed_fetch_method.h"
-DEFINE_string(model_path, "", "Directory of the inference model.");
-DEFINE_string(data_file, "", "File of input index data.");
-DEFINE_int32(repeat, 100, "Running the inference program repeat times");
-DEFINE_bool(prepare_vars, true, "Prepare variables before executor");
-DEFINE_int32(num_threads, 1, "Number of threads should be used");
-DECLARE_bool(use_mkldnn);
-DECLARE_int32(paddle_num_threads);
-inline double GetCurrentMs() {
-  struct timeval time;
-  gettimeofday(&time, NULL);
-  return 1e+3 * time.tv_sec + 1e-3 * time.tv_usec;
-}
-// This function just give dummy data for recognize_digits model.
-size_t DummyData(std::vector<paddle::framework::LoDTensor>* out) {
-  paddle::framework::LoDTensor input;
-  SetupTensor<float>(&input, {1, 1, 28, 28}, -1.f, 1.f);
-  out->emplace_back(input);
-  return 1;
-}
-// Load the input word index data from file and save into LodTensor.
-// Return the size of words.
-size_t LoadData(std::vector<paddle::framework::LoDTensor>* out,
-                const std::string& filename) {
-  if (filename.empty()) {
-    return DummyData(out);
-  }
-  size_t sz = 0;
-  std::fstream fin(filename);
-  std::string line;
-  out->clear();
-  while (getline(fin, line)) {
-    std::istringstream iss(line);
-    std::vector<int64_t> ids;
-    std::string field;
-    while (getline(iss, field, ' ')) {
-      ids.push_back(stoi(field));
-    }
-    if (ids.size() >= 1024) {
-      // Synced with NLP guys, they will ignore input larger then 1024
-      continue;
-    }
-    paddle::framework::LoDTensor words;
-    paddle::framework::LoD lod{{0, ids.size()}};
-    words.set_lod(lod);
-    int64_t* pdata = words.mutable_data<int64_t>(
-        {static_cast<int64_t>(ids.size()), 1}, paddle::platform::CPUPlace());
-    memcpy(pdata, ids.data(), words.numel() * sizeof(int64_t));
-    out->emplace_back(words);
-    sz += ids.size();
-  }
-  return sz;
-}
-// Split input data samples into small pieces jobs as balanced as possible,
-// according to the number of threads.
-void SplitData(
-    const std::vector<paddle::framework::LoDTensor>& datasets,
-    std::vector<std::vector<const paddle::framework::LoDTensor*>>* jobs,
-    const int num_threads) {
-  size_t s = 0;
-  jobs->resize(num_threads);
-  while (s < datasets.size()) {
-    for (auto it = jobs->begin(); it != jobs->end(); it++) {
-      it->emplace_back(&datasets[s]);
-      s++;
-      if (s >= datasets.size()) {
-        break;
-      }
-    }
-  }
-}
-void ThreadRunInfer(
-    const int tid, paddle::framework::Scope* scope,
-    const std::vector<std::vector<const paddle::framework::LoDTensor*>>& jobs) {
-  // maybe framework:ProgramDesc is not thread-safe
-  paddle::platform::CPUPlace place;
-  paddle::framework::Executor executor(place);
-  auto& sub_scope = scope->NewScope();
-  auto inference_program =
-      paddle::inference::Load(&executor, scope, FLAGS_model_path);
-  auto ctx = executor.Prepare(*inference_program, /*block_id*/ 0);
-  executor.CreateVariables(*inference_program, &sub_scope, /*block_id*/ 0);
-  const std::vector<std::string>& feed_target_names =
-      inference_program->GetFeedTargetNames();
-  const std::vector<std::string>& fetch_target_names =
-      inference_program->GetFetchTargetNames();
-  PADDLE_ENFORCE_EQ(fetch_target_names.size(), 1UL);
-  std::map<std::string, paddle::framework::FetchType*> fetch_targets;
-  paddle::framework::FetchType outtensor;
-  fetch_targets[fetch_target_names[0]] = &outtensor;
-  std::map<std::string, const paddle::framework::LoDTensor*> feed_targets;
-  PADDLE_ENFORCE_EQ(feed_target_names.size(), 1UL);
-  // map the data of feed_targets to feed_holder
-  for (auto* op : inference_program->Block(0).AllOps()) {
-    if (op->Type() == "feed") {
-      std::string feed_target_name = op->Output("Out")[0];
-      int idx = BOOST_GET_CONST(int, op->GetAttr("col"));
-      paddle::framework::SetFeedVariable(scope, *feed_targets[feed_target_name],
-                                         "feed", idx);
-    }
-  }
-  auto& inputs = jobs[tid];
-  auto start_ms = GetCurrentMs();
-  for (size_t i = 0; i < inputs.size(); ++i) {
-    feed_targets[feed_target_names[0]] = inputs[i];
-    executor.RunPreparedContext(ctx.get(), &sub_scope,
-                                false /*create_local_scope*/);
-  }
-  auto stop_ms = GetCurrentMs();
-  // obtain the data of fetch_targets from fetch_holder
-  for (auto* op : inference_program->Block(0).AllOps()) {
-    if (op->Type() == "fetch") {
-      std::string fetch_target_name = op->Input("X")[0];
-      int idx = BOOST_GET_CONST(int, op->GetAttr("col"));
-      *fetch_targets[fetch_target_name] =
-          BOOST_GET(paddle::framework::LoDTensor,
-                    paddle::framework::GetFetchVariable(*scope, "fetch", idx));
-    }
-  }
-  scope->DeleteScope(&sub_scope);
-  LOG(INFO) << "Tid: " << tid << ", process " << inputs.size()
-            << " samples, avg time per sample: "
-            << (stop_ms - start_ms) / inputs.size() << " ms";
-}
-TEST(inference, nlp) {
-  if (FLAGS_model_path.empty()) {
-    LOG(FATAL) << "Usage: ./example --model_path=path/to/your/model";
-  }
-  if (FLAGS_data_file.empty()) {
-    LOG(WARNING) << "No data file provided, will use dummy data!"
-                 << "Note: if you use nlp model, please provide data file.";
-  }
-  LOG(INFO) << "Model Path: " << FLAGS_model_path;
-  LOG(INFO) << "Data File: " << FLAGS_data_file;
-  std::vector<paddle::framework::LoDTensor> datasets;
-  size_t num_total_words = LoadData(&datasets, FLAGS_data_file);
-  LOG(INFO) << "Number of samples (seq_len<1024): " << datasets.size();
-  LOG(INFO) << "Total number of words: " << num_total_words;
-  // 0. Call `paddle::framework::InitDevices()` initialize all the devices
-  std::unique_ptr<paddle::framework::Scope> scope(
-      new paddle::framework::Scope());
-  paddle::platform::SetNumThreads(FLAGS_paddle_num_threads);
-  double start_ms = 0, stop_ms = 0;
-  if (FLAGS_num_threads > 1) {
-    std::vector<std::vector<const paddle::framework::LoDTensor*>> jobs;
-    SplitData(datasets, &jobs, FLAGS_num_threads);
-    std::vector<std::unique_ptr<std::thread>> threads;
-    start_ms = GetCurrentMs();
-    for (int i = 0; i < FLAGS_num_threads; ++i) {
-      threads.emplace_back(
-          new std::thread(ThreadRunInfer, i, scope.get(), std::ref(jobs)));
-    }
-    for (int i = 0; i < FLAGS_num_threads; ++i) {
-      threads[i]->join();
-    }
-    stop_ms = GetCurrentMs();
-  } else {
-    // 1. Define place, executor, scope
-    paddle::platform::CPUPlace place;
-    paddle::framework::Executor executor(place);
-    // 2. Initialize the inference_program and load parameters
-    std::unique_ptr<paddle::framework::ProgramDesc> inference_program;
-    inference_program = InitProgram(&executor, scope.get(), FLAGS_model_path,
-                                    /*model combined*/ false);
-    // always prepare context
-    std::unique_ptr<paddle::framework::ExecutorPrepareContext> ctx;
-    ctx = executor.Prepare(*inference_program, 0);
-    if (FLAGS_prepare_vars) {
-      executor.CreateVariables(*inference_program, scope.get(), 0);
-    }
-    // preapre fetch
-    const std::vector<std::string>& fetch_target_names =
-        inference_program->GetFetchTargetNames();
-    PADDLE_ENFORCE_EQ(fetch_target_names.size(), 1UL);
-    std::map<std::string, paddle::framework::FetchType*> fetch_targets;
-    paddle::framework::FetchType outtensor;
-    fetch_targets[fetch_target_names[0]] = &outtensor;
-    // prepare feed
-    const std::vector<std::string>& feed_target_names =
-        inference_program->GetFeedTargetNames();
-    PADDLE_ENFORCE_EQ(feed_target_names.size(), 1UL);
-    std::map<std::string, const paddle::framework::LoDTensor*> feed_targets;
-    // feed data and run
-    start_ms = GetCurrentMs();
-    for (size_t i = 0; i < datasets.size(); ++i) {
-      feed_targets[feed_target_names[0]] = &(datasets[i]);
-      executor.RunPreparedContext(ctx.get(), scope.get(), &feed_targets,
-                                  &fetch_targets, !FLAGS_prepare_vars);
-    }
-    stop_ms = GetCurrentMs();
-    LOG(INFO) << "Tid: 0, process " << datasets.size()
-              << " samples, avg time per sample: "
-              << (stop_ms - start_ms) / datasets.size() << " ms";
-  }
-  LOG(INFO) << "Total inference time with " << FLAGS_num_threads
-            << " threads : " << (stop_ms - start_ms) / 1000.0
-            << " sec, QPS: " << datasets.size() / ((stop_ms - start_ms) / 1000);
-}
--- a/paddle/fluid/inference/tests/book/test_inference_recognize_digits.cc
+++ b/paddle/fluid/inference/tests/book/test_inference_recognize_digits.cc
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "gflags/gflags.h"
-#include "gtest/gtest.h"
-#include "paddle/fluid/inference/tests/test_helper.h"
-DEFINE_string(dirname, "", "Directory of the inference model.");
-DEFINE_int32(batch_size, 1, "Batch size of input data");
-DEFINE_int32(repeat, 1, "Running the inference program repeat times");
-TEST(inference, recognize_digits) {
-  if (FLAGS_dirname.empty() || FLAGS_batch_size < 1 || FLAGS_repeat < 1) {
-    LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model "
-                  "--batch_size=1 --repeat=1";
-  }
-  LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl;
-  std::string dirname = FLAGS_dirname;
-  // 0. Call `paddle::framework::InitDevices()` initialize all the devices
-  // In unittests, this is done in paddle/testing/paddle_gtest_main.cc
-  paddle::framework::LoDTensor input;
-  // Use normilized image pixels as input data,
-  // which should be in the range [-1.0, 1.0].
-  SetupTensor<float>(&input, {FLAGS_batch_size, 1, 28, 28},
-                     static_cast<float>(-1), static_cast<float>(1));
-  std::vector<paddle::framework::LoDTensor*> cpu_feeds;
-  cpu_feeds.push_back(&input);
-  for (auto is_combined : {false, true}) {
-    paddle::framework::FetchType output1;
-    std::vector<paddle::framework::FetchType*> cpu_fetchs1;
-    cpu_fetchs1.push_back(&output1);
-    // Run inference on CPU
-    LOG(INFO) << "--- CPU Runs: is_combined=" << is_combined << " ---";
-    TestInference<paddle::platform::CPUPlace>(dirname, cpu_feeds, cpu_fetchs1,
-                                              FLAGS_repeat, is_combined);
-    auto output1_tensor = BOOST_GET(paddle::framework::LoDTensor, output1);
-    LOG(INFO) << output1_tensor.dims();
-#ifdef PADDLE_WITH_CUDA
-    paddle::framework::FetchType output2;
-    std::vector<paddle::framework::FetchType*> cpu_fetchs2;
-    cpu_fetchs2.push_back(&output2);
-    // Run inference on CUDA GPU
-    LOG(INFO) << "--- GPU Runs: is_combined=" << is_combined << " ---";
-    TestInference<paddle::platform::CUDAPlace>(dirname, cpu_feeds, cpu_fetchs2,
-                                               FLAGS_repeat, is_combined);
-    auto output2_tensor = BOOST_GET(paddle::framework::LoDTensor, output2);
-    LOG(INFO) << output2_tensor.dims();
-    CheckError<float>(output1_tensor, output2_tensor);
-#endif
-  }
-}
--- a/paddle/fluid/inference/tests/book/test_inference_recommender_system.cc
+++ b/paddle/fluid/inference/tests/book/test_inference_recommender_system.cc
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "gflags/gflags.h"
-#include "gtest/gtest.h"
-#include "paddle/fluid/inference/tests/test_helper.h"
-DEFINE_string(dirname, "", "Directory of the inference model.");
-TEST(inference, recommender_system) {
-  if (FLAGS_dirname.empty()) {
-    LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model";
-  }
-  LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl;
-  std::string dirname = FLAGS_dirname;
-  // 0. Call `paddle::framework::InitDevices()` initialize all the devices
-  // In unittests, this is done in paddle/testing/paddle_gtest_main.cc
-  int64_t batch_size = 1;
-  paddle::framework::LoDTensor user_id, gender_id, age_id, job_id, movie_id,
-      category_id, movie_title;
-  // Use the first data from paddle.dataset.movielens.test() as input
-  std::vector<int64_t> user_id_data = {1};
-  SetupTensor<int64_t>(&user_id, {batch_size, 1}, user_id_data);
-  std::vector<int64_t> gender_id_data = {1};
-  SetupTensor<int64_t>(&gender_id, {batch_size, 1}, gender_id_data);
-  std::vector<int64_t> age_id_data = {0};
-  SetupTensor<int64_t>(&age_id, {batch_size, 1}, age_id_data);
-  std::vector<int64_t> job_id_data = {10};
-  SetupTensor<int64_t>(&job_id, {batch_size, 1}, job_id_data);
-  std::vector<int64_t> movie_id_data = {783};
-  SetupTensor<int64_t>(&movie_id, {batch_size, 1}, movie_id_data);
-  std::vector<int64_t> category_id_data = {10, 8, 9};
-  SetupLoDTensor<int64_t>(&category_id, {3, 1}, {{0, 3}}, category_id_data);
-  std::vector<int64_t> movie_title_data = {1069, 4140, 2923, 710, 988};
-  SetupLoDTensor<int64_t>(&movie_title, {5, 1}, {{0, 5}}, movie_title_data);
-  std::vector<paddle::framework::LoDTensor*> cpu_feeds;
-  cpu_feeds.push_back(&user_id);
-  cpu_feeds.push_back(&gender_id);
-  cpu_feeds.push_back(&age_id);
-  cpu_feeds.push_back(&job_id);
-  cpu_feeds.push_back(&movie_id);
-  cpu_feeds.push_back(&category_id);
-  cpu_feeds.push_back(&movie_title);
-  paddle::framework::FetchType output1;
-  std::vector<paddle::framework::FetchType*> cpu_fetchs1;
-  cpu_fetchs1.push_back(&output1);
-  // Run inference on CPU
-  TestInference<paddle::platform::CPUPlace>(dirname, cpu_feeds, cpu_fetchs1);
-  auto output1_tensor = BOOST_GET(paddle::framework::LoDTensor, output1);
-  LOG(INFO) << output1_tensor.dims();
-#ifdef PADDLE_WITH_CUDA
-  paddle::framework::FetchType output2;
-  std::vector<paddle::framework::FetchType*> cpu_fetchs2;
-  cpu_fetchs2.push_back(&output2);
-  // Run inference on CUDA GPU
-  TestInference<paddle::platform::CUDAPlace>(dirname, cpu_feeds, cpu_fetchs2);
-  auto output2_tensor = BOOST_GET(paddle::framework::LoDTensor, output2);
-  LOG(INFO) << output2_tensor.dims();
-  CheckError<float>(output1_tensor, output2_tensor);
-#endif
-}
--- a/paddle/fluid/inference/tests/book/test_inference_rnn_encoder_decoder.cc
+++ b/paddle/fluid/inference/tests/book/test_inference_rnn_encoder_decoder.cc
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "gflags/gflags.h"
-#include "gtest/gtest.h"
-#include "paddle/fluid/inference/tests/test_helper.h"
-DEFINE_string(dirname, "", "Directory of the inference model.");
-TEST(inference, rnn_encoder_decoder) {
-  if (FLAGS_dirname.empty()) {
-    LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model";
-  }
-  LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl;
-  std::string dirname = FLAGS_dirname;
-  // 0. Call `paddle::framework::InitDevices()` initialize all the devices
-  // In unittests, this is done in paddle/testing/paddle_gtest_main.cc
-  paddle::framework::LoDTensor word_data, trg_word;
-  paddle::framework::LoD lod{{0, 4, 10}};
-  SetupLoDTensor(&word_data, lod, static_cast<int64_t>(0),
-                 static_cast<int64_t>(1));
-  SetupLoDTensor(&trg_word, lod, static_cast<int64_t>(0),
-                 static_cast<int64_t>(1));
-  std::vector<paddle::framework::LoDTensor*> cpu_feeds;
-  cpu_feeds.push_back(&word_data);
-  cpu_feeds.push_back(&trg_word);
-  paddle::framework::FetchType output1;
-  std::vector<paddle::framework::FetchType*> cpu_fetchs1;
-  cpu_fetchs1.push_back(&output1);
-  // Run inference on CPU
-  TestInference<paddle::platform::CPUPlace>(dirname, cpu_feeds, cpu_fetchs1);
-  auto output1_tensor = BOOST_GET(paddle::framework::LoDTensor, output1);
-  LOG(INFO) << output1_tensor.lod();
-  LOG(INFO) << output1_tensor.dims();
-#ifdef PADDLE_WITH_CUDA
-  paddle::framework::FetchType output2;
-  std::vector<paddle::framework::FetchType*> cpu_fetchs2;
-  cpu_fetchs2.push_back(&output2);
-  // Run inference on CUDA GPU
-  TestInference<paddle::platform::CUDAPlace>(dirname, cpu_feeds, cpu_fetchs2);
-  auto output2_tensor = BOOST_GET(paddle::framework::LoDTensor, output2);
-  LOG(INFO) << output2_tensor.lod();
-  LOG(INFO) << output2_tensor.dims();
-  CheckError<float>(output1_tensor, output2_tensor);
-#endif
-}
--- a/paddle/fluid/inference/tests/book/test_inference_understand_sentiment.cc
+++ b/paddle/fluid/inference/tests/book/test_inference_understand_sentiment.cc
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "gflags/gflags.h"
-#include "gtest/gtest.h"
-#include "paddle/fluid/inference/tests/test_helper.h"
-DEFINE_string(dirname, "", "Directory of the inference model.");
-TEST(inference, understand_sentiment) {
-  if (FLAGS_dirname.empty()) {
-    LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model";
-  }
-  LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl;
-  std::string dirname = FLAGS_dirname;
-  // 0. Call `paddle::framework::InitDevices()` initialize all the devices
-  // In unittests, this is done in paddle/testing/paddle_gtest_main.cc
-  paddle::framework::LoDTensor words;
-  paddle::framework::LoD lod{{0, 4, 10}};
-  int64_t word_dict_len = 5147;
-  SetupLoDTensor(&words, lod, static_cast<int64_t>(0),
-                 static_cast<int64_t>(word_dict_len - 1));
-  std::vector<paddle::framework::LoDTensor*> cpu_feeds;
-  cpu_feeds.push_back(&words);
-  paddle::framework::FetchType output1;
-  std::vector<paddle::framework::FetchType*> cpu_fetchs1;
-  cpu_fetchs1.push_back(&output1);
-  // Run inference on CPU
-  TestInference<paddle::platform::CPUPlace>(dirname, cpu_feeds, cpu_fetchs1);
-  auto output1_tensor = BOOST_GET(paddle::framework::LoDTensor, output1);
-  LOG(INFO) << output1_tensor.lod();
-  LOG(INFO) << output1_tensor.dims();
-#ifdef PADDLE_WITH_CUDA
-  paddle::framework::FetchType output2;
-  std::vector<paddle::framework::FetchType*> cpu_fetchs2;
-  cpu_fetchs2.push_back(&output2);
-  // Run inference on CUDA GPU
-  TestInference<paddle::platform::CUDAPlace>(dirname, cpu_feeds, cpu_fetchs2);
-  auto output2_tensor = BOOST_GET(paddle::framework::LoDTensor, output2);
-  LOG(INFO) << output2_tensor.lod();
-  LOG(INFO) << output2_tensor.dims();
-  CheckError<float>(output1_tensor, output2_tensor);
-#endif
-}
--- a/paddle/fluid/inference/tests/book/test_inference_word2vec.cc
+++ b/paddle/fluid/inference/tests/book/test_inference_word2vec.cc
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "gflags/gflags.h"
-#include "gtest/gtest.h"
-#include "paddle/fluid/inference/tests/test_helper.h"
-DEFINE_string(dirname, "", "Directory of the inference model.");
-TEST(inference, word2vec) {
-  if (FLAGS_dirname.empty()) {
-    LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model";
-  }
-  LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl;
-  std::string dirname = FLAGS_dirname;
-  // 0. Call `paddle::framework::InitDevices()` initialize all the devices
-  // In unittests, this is done in paddle/testing/paddle_gtest_main.cc
-  paddle::framework::LoDTensor first_word, second_word, third_word, fourth_word;
-  paddle::framework::LoD lod{{0, 1}};
-  int64_t dict_size = 2073;  // The size of dictionary
-  SetupLoDTensor(&first_word, lod, static_cast<int64_t>(0), dict_size - 1);
-  SetupLoDTensor(&second_word, lod, static_cast<int64_t>(0), dict_size - 1);
-  SetupLoDTensor(&third_word, lod, static_cast<int64_t>(0), dict_size - 1);
-  SetupLoDTensor(&fourth_word, lod, static_cast<int64_t>(0), dict_size - 1);
-  std::vector<paddle::framework::LoDTensor*> cpu_feeds;
-  cpu_feeds.push_back(&first_word);
-  cpu_feeds.push_back(&second_word);
-  cpu_feeds.push_back(&third_word);
-  cpu_feeds.push_back(&fourth_word);
-  paddle::framework::FetchType output1;
-  std::vector<paddle::framework::FetchType*> cpu_fetchs1;
-  cpu_fetchs1.push_back(&output1);
-  // Run inference on CPU
-  TestInference<paddle::platform::CPUPlace>(dirname, cpu_feeds, cpu_fetchs1);
-  auto output1_tensor = BOOST_GET(paddle::framework::LoDTensor, output1);
-  LOG(INFO) << output1_tensor.lod();
-  LOG(INFO) << output1_tensor.dims();
-#ifdef PADDLE_WITH_CUDA
-  paddle::framework::FetchType output2;
-  std::vector<paddle::framework::FetchType*> cpu_fetchs2;
-  cpu_fetchs2.push_back(&output2);
-  // Run inference on CUDA GPU
-  TestInference<paddle::platform::CUDAPlace>(dirname, cpu_feeds, cpu_fetchs2);
-  auto output2_tensor = BOOST_GET(paddle::framework::LoDTensor, output2);
-  LOG(INFO) << output2_tensor.lod();
-  LOG(INFO) << output2_tensor.dims();
-  CheckError<float>(output1_tensor, output2_tensor);
-#endif
-}