diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h index f6893be428feacbba85bab380e22972848eaeb93..8e359a67738c0df180933421b45f15b39fd0e78c 100644 --- a/paddle/fluid/inference/api/helper.h +++ b/paddle/fluid/inference/api/helper.h @@ -123,10 +123,16 @@ std::string DescribeTensor(const PaddleTensor &tensor) { } void PrintTime(int batch_size, int repeat, int num_threads, int tid, - double latency) { + double latency, int epoch = 1) { LOG(INFO) << "====== batch_size: " << batch_size << ", repeat: " << repeat << ", threads: " << num_threads << ", thread id: " << tid << ", latency: " << latency << "ms ======"; + if (epoch > 1) { + int samples = batch_size * epoch; + LOG(INFO) << "====== sample number: " << samples + << ", average latency of each sample: " << latency / samples + << "ms ======"; + } } } // namespace inference diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index ece0d333999825e062087dc8cbd5533ae63689ee..7d86b4b4cae6e085d955caadbf5c985efb421658 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -51,9 +51,7 @@ inference_analysis_test(test_analyzer_lac SRCS analyzer_lac_tester.cc # text_classification set(TEXT_CLASSIFICATION_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/text_classification") download_model_and_data(${TEXT_CLASSIFICATION_INSTALL_DIR} "text-classification-Senta.tar.gz" "text_classification_data.txt.tar.gz") -inference_analysis_test(test_text_classification SRCS analyzer_text_classification_tester.cc +inference_analysis_test(test_analyzer_text_classification SRCS analyzer_text_classification_tester.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} ARGS --infer_model=${TEXT_CLASSIFICATION_INSTALL_DIR}/text-classification-Senta - --infer_data=${TEXT_CLASSIFICATION_INSTALL_DIR}/data.txt - --topn=1 # Just run top 1 batch. - ) + --infer_data=${TEXT_CLASSIFICATION_INSTALL_DIR}/data.txt) diff --git a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc index 7e00cb20ad0ce052a84d5491b0cdf167f0768081..45c19af520dd4ee9696cb3486a74f9069b9782b5 100644 --- a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc @@ -12,21 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/analysis/analyzer.h" -#include -#include "paddle/fluid/framework/ir/fuse_pass_base.h" -#include "paddle/fluid/inference/analysis/ut_helper.h" -#include "paddle/fluid/inference/api/analysis_predictor.h" -#include "paddle/fluid/inference/api/helper.h" -#include "paddle/fluid/inference/api/paddle_inference_pass.h" -#include "paddle/fluid/platform/profiler.h" - -DEFINE_string(infer_model, "", "model path for LAC"); -DEFINE_string(infer_data, "", "data file for LAC"); -DEFINE_int32(batch_size, 1, "batch size."); -DEFINE_int32(burning, 0, "Burning before repeat."); -DEFINE_int32(repeat, 1, "Running the inference program repeat times."); -DEFINE_bool(test_all_data, false, "Test the all dataset in data file."); +#include "paddle/fluid/inference/tests/api/tester_helper.h" namespace paddle { namespace inference { @@ -126,46 +112,37 @@ void TestLACPrediction(const std::string &model_path, const std::string &data_file, const int batch_size, const int repeat, bool test_all_data, bool use_analysis = false) { - NativeConfig config; - config.model_dir = model_path; - config.use_gpu = false; - config.device = 0; - config.specify_input_name = true; + AnalysisConfig cfg; + cfg.model_dir = model_path; + cfg.use_gpu = false; + cfg.device = 0; + cfg.specify_input_name = true; + cfg.enable_ir_optim = true; + std::vector input_slots, outputs_slots; DataRecord data(data_file, batch_size); GetOneBatch(&input_slots, &data, batch_size); std::unique_ptr predictor; if (use_analysis) { - AnalysisConfig cfg; - cfg.model_dir = model_path; - cfg.use_gpu = false; - cfg.device = 0; - cfg.specify_input_name = true; - cfg.enable_ir_optim = true; predictor = CreatePaddlePredictor(cfg); } else { predictor = - CreatePaddlePredictor(config); + CreatePaddlePredictor(cfg); } for (int i = 0; i < FLAGS_burning; i++) { predictor->Run(input_slots, &outputs_slots); } Timer timer; - if (test_all_data) { - double sum = 0; - LOG(INFO) << "Total number of samples: " << data.datasets.size(); - for (int i = 0; i < repeat; i++) { - for (size_t bid = 0; bid < data.batched_datas.size(); ++bid) { - GetOneBatch(&input_slots, &data, batch_size); - timer.tic(); - predictor->Run(input_slots, &outputs_slots); - sum += timer.toc(); - } + if (FLAGS_test_all_data) { + LOG(INFO) << "test all data"; + std::vector> input_slots_all; + for (size_t bid = 0; bid < data.batched_datas.size(); ++bid) { + GetOneBatch(&input_slots, &data, batch_size); + input_slots_all.emplace_back(input_slots); } - PrintTime(batch_size, repeat, 1, 0, sum / repeat); - LOG(INFO) << "Average latency of each sample: " - << sum / repeat / data.datasets.size() << " ms"; + LOG(INFO) << "total number of samples: " << data.datasets.size(); + TestPrediction(cfg, input_slots_all, &outputs_slots, FLAGS_num_threads); return; } timer.tic(); @@ -190,19 +167,10 @@ void TestLACPrediction(const std::string &model_path, if (use_analysis) { // run once for comparion as reference auto ref_predictor = - CreatePaddlePredictor(config); + CreatePaddlePredictor(cfg); std::vector ref_outputs_slots; ref_predictor->Run(input_slots, &ref_outputs_slots); - EXPECT_EQ(ref_outputs_slots.size(), outputs_slots.size()); - auto &ref_out = ref_outputs_slots[0]; - size_t ref_size = - std::accumulate(ref_out.shape.begin(), ref_out.shape.end(), 1, - [](int a, int b) { return a * b; }); - EXPECT_EQ(size, ref_size); - int64_t *pdata_ref = static_cast(ref_out.data.data()); - for (size_t i = 0; i < size; ++i) { - EXPECT_EQ(pdata_ref[i], pdata[i]); - } + CompareResult(ref_outputs_slots, outputs_slots); AnalysisPredictor *analysis_predictor = dynamic_cast(predictor.get()); diff --git a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc index 6e8e43add7d3383fa79efea91c23750be9c8956f..f8c651e32f7e2ce1d8ced0e6774ffd555d351167 100644 --- a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc @@ -12,20 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/analysis/analyzer.h" -#include -#include "paddle/fluid/framework/ir/fuse_pass_base.h" -#include "paddle/fluid/inference/analysis/ut_helper.h" -#include "paddle/fluid/inference/api/analysis_predictor.h" -#include "paddle/fluid/inference/api/helper.h" -#include "paddle/fluid/inference/api/paddle_inference_pass.h" -#include "paddle/fluid/platform/profiler.h" - -DEFINE_string(infer_model, "", "model path"); -DEFINE_string(infer_data, "", "data path"); -DEFINE_int32(batch_size, 10, "batch size."); -DEFINE_int32(repeat, 1, "Running the inference program repeat times."); -DEFINE_bool(test_all_data, false, "Test the all dataset in data file."); +#include "paddle/fluid/inference/tests/api/tester_helper.h" namespace paddle { namespace inference { @@ -113,50 +100,35 @@ const int chinese_ner_result_data[] = {30, 45, 41, 48, 17, 26, 48, 39, 38, 16, 25}; void TestChineseNERPrediction(bool use_analysis) { - NativeConfig config; - config.prog_file = FLAGS_infer_model + "/__model__"; - config.param_file = FLAGS_infer_model + "/param"; - config.use_gpu = false; - config.device = 0; - config.specify_input_name = true; + AnalysisConfig cfg; + cfg.prog_file = FLAGS_infer_model + "/__model__"; + cfg.param_file = FLAGS_infer_model + "/param"; + cfg.use_gpu = false; + cfg.device = 0; + cfg.specify_input_name = true; + cfg.enable_ir_optim = true; std::vector input_slots, outputs; std::unique_ptr predictor; Timer timer; if (use_analysis) { - AnalysisConfig cfg; - cfg.prog_file = FLAGS_infer_model + "/__model__"; - cfg.param_file = FLAGS_infer_model + "/param"; - cfg.use_gpu = false; - cfg.device = 0; - cfg.specify_input_name = true; - cfg.enable_ir_optim = true; predictor = CreatePaddlePredictor(cfg); } else { predictor = - CreatePaddlePredictor(config); + CreatePaddlePredictor(cfg); } if (FLAGS_test_all_data) { LOG(INFO) << "test all data"; - double sum = 0; - size_t num_samples; - for (int i = 0; i < FLAGS_repeat; i++) { - DataRecord data(FLAGS_infer_data, FLAGS_batch_size); - // Just one batch, the num_samples remains the same. - num_samples = data.num_samples; - for (size_t bid = 0; bid < num_samples / FLAGS_batch_size; ++bid) { - PrepareInputs(&input_slots, &data, FLAGS_batch_size); - timer.tic(); - predictor->Run(input_slots, &outputs); - sum += timer.toc(); - } + DataRecord data(FLAGS_infer_data, FLAGS_batch_size); + std::vector> input_slots_all; + for (size_t bid = 0; bid < data.num_samples / FLAGS_batch_size; ++bid) { + PrepareInputs(&input_slots, &data, FLAGS_batch_size); + input_slots_all.emplace_back(input_slots); } - LOG(INFO) << "total number of samples: " << num_samples; - PrintTime(FLAGS_batch_size, FLAGS_repeat, 1, 0, sum / FLAGS_repeat); - LOG(INFO) << "average latency of each sample: " - << sum / FLAGS_repeat / num_samples; + LOG(INFO) << "total number of samples: " << data.num_samples; + TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); return; } // Prepare inputs. @@ -182,19 +154,10 @@ void TestChineseNERPrediction(bool use_analysis) { if (use_analysis) { // run once for comparion as reference auto ref_predictor = - CreatePaddlePredictor(config); + CreatePaddlePredictor(cfg); std::vector ref_outputs_slots; ref_predictor->Run(input_slots, &ref_outputs_slots); - EXPECT_EQ(ref_outputs_slots.size(), outputs.size()); - auto &ref_out = ref_outputs_slots[0]; - size_t ref_size = - std::accumulate(ref_out.shape.begin(), ref_out.shape.end(), 1, - [](int a, int b) { return a * b; }); - EXPECT_EQ(size, ref_size); - int64_t *pdata_ref = static_cast(ref_out.data.data()); - for (size_t i = 0; i < size; ++i) { - EXPECT_EQ(pdata_ref[i], result[i]); - } + CompareResult(ref_outputs_slots, outputs); AnalysisPredictor *analysis_predictor = dynamic_cast(predictor.get()); diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc index b8ac468b4e98bcef81cdbbf66e3f1640c03a7ab8..df96be544eaf51c52aa5592966f499fad91aab82 100644 --- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc @@ -12,24 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/analysis/analyzer.h" - -#include -#include -#include // NOLINT -#include "paddle/fluid/framework/ir/fuse_pass_base.h" -#include "paddle/fluid/framework/ir/pass.h" -#include "paddle/fluid/inference/analysis/ut_helper.h" -#include "paddle/fluid/inference/api/analysis_predictor.h" -#include "paddle/fluid/inference/api/helper.h" -#include "paddle/fluid/inference/api/paddle_inference_api.h" -#include "paddle/fluid/inference/api/paddle_inference_pass.h" - -DEFINE_string(infer_model, "", "model path"); -DEFINE_string(infer_data, "", "data path"); -DEFINE_int32(batch_size, 10, "batch size."); -DEFINE_int32(repeat, 1, "Running the inference program repeat times."); -DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads."); +#include "paddle/fluid/inference/tests/api/tester_helper.h" namespace paddle { namespace inference { @@ -164,26 +147,6 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, } } -void CompareResult(const std::vector &outputs, - const std::vector &base_outputs) { - PADDLE_ENFORCE_GT(outputs.size(), 0); - PADDLE_ENFORCE_EQ(outputs.size(), base_outputs.size()); - for (size_t i = 0; i < outputs.size(); i++) { - auto &out = outputs[i]; - auto &base_out = base_outputs[i]; - size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1, - [](int a, int b) { return a * b; }); - size_t size1 = std::accumulate(base_out.shape.begin(), base_out.shape.end(), - 1, [](int a, int b) { return a * b; }); - PADDLE_ENFORCE_EQ(size, size1); - PADDLE_ENFORCE_GT(size, 0); - float *data = static_cast(out.data.data()); - float *base_data = static_cast(base_out.data.data()); - for (size_t i = 0; i < size; i++) { - EXPECT_NEAR(data[i], base_data[i], 1e-3); - } - } -} // Test with a really complicate model. void TestRNN1Prediction(bool use_analysis, bool activate_ir, int num_threads) { AnalysisConfig config; @@ -198,7 +161,6 @@ void TestRNN1Prediction(bool use_analysis, bool activate_ir, int num_threads) { config.ir_passes.clear(); // Do not exclude any pass. int batch_size = FLAGS_batch_size; - int num_times = FLAGS_repeat; auto base_predictor = CreatePaddlePredictor(config); @@ -213,45 +175,14 @@ void TestRNN1Prediction(bool use_analysis, bool activate_ir, int num_threads) { base_predictor->Run(input_slots, &base_outputs); + std::vector> input_slots_all; + input_slots_all.emplace_back(input_slots); if (num_threads == 1) { - // Prepare inputs. - Timer timer; - timer.tic(); - for (int i = 0; i < num_times; i++) { - predictor->Run(input_slots, &outputs); - } - PrintTime(batch_size, num_times, 1, 0, timer.toc() / num_times); + TestOneThreadPrediction(config, input_slots_all, &outputs); CompareResult(outputs, base_outputs); } else { - std::vector threads; - std::vector> predictors; - // TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled - // because AttentionLSTM's hard code nodeid will be damanged. - for (int tid = 0; tid < num_threads; ++tid) { - predictors.emplace_back( - CreatePaddlePredictor( - config)); - } - for (int tid = 0; tid < num_threads; ++tid) { - threads.emplace_back([&, tid]() { - // Each thread should have local input_slots and outputs. - std::vector input_slots; - DataRecord data(FLAGS_infer_data, batch_size); - PrepareInputs(&input_slots, &data, batch_size); - std::vector outputs; - Timer timer; - timer.tic(); - for (int i = 0; i < num_times; i++) { - predictors[tid]->Run(input_slots, &outputs); - } - PrintTime(batch_size, num_times, num_threads, tid, - timer.toc() / num_times); - CompareResult(outputs, base_outputs); - }); - } - for (int i = 0; i < num_threads; ++i) { - threads[i].join(); - } + // only return the output of first thread + TestMultiThreadPrediction(config, input_slots_all, &outputs, num_threads); } if (use_analysis && activate_ir) { @@ -293,8 +224,7 @@ TEST(Analyzer, RNN_tests) { // Directly infer with the original model. TestRNN1Prediction(false, false, i); // Inference with the original model with the analysis turned on, the - // analysis - // module will transform the program to a data flow graph. + // analysis module will transform the program to a data flow graph. TestRNN1Prediction(true, false, i); // Inference with analysis and IR. The IR module will fuse some large // kernels. diff --git a/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc b/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc index 65169f8cfcc5bf1e989609666f6e0ba03e42e5ba..1472c475e4a3061ffcad96925ea215a41a7e63eb 100644 --- a/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc @@ -12,23 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/analysis/analyzer.h" -#include -#include // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files. -#include -#include -#include "paddle/fluid/framework/ir/pass.h" -#include "paddle/fluid/inference/analysis/ut_helper.h" -#include "paddle/fluid/inference/api/helper.h" -#include "paddle/fluid/inference/api/paddle_inference_api.h" -#include "paddle/fluid/inference/api/paddle_inference_pass.h" -#include "paddle/fluid/inference/api/timer.h" - -DEFINE_string(infer_model, "", "Directory of the inference model."); -DEFINE_string(infer_data, "", "Path of the dataset."); -DEFINE_int32(batch_size, 1, "batch size."); -DEFINE_int32(repeat, 1, "How many times to repeat run."); -DEFINE_int32(topn, -1, "Run top n batches of data to save time"); +#include "paddle/fluid/inference/tests/api/tester_helper.h" namespace paddle { namespace inference { @@ -37,24 +21,25 @@ struct DataReader { explicit DataReader(const std::string &path) : file(new std::ifstream(path)) {} - bool NextBatch(PaddleTensor *tensor, int batch_size) { + bool NextBatch(std::vector *input, int batch_size) { PADDLE_ENFORCE_EQ(batch_size, 1); std::string line; - tensor->lod.clear(); - tensor->lod.emplace_back(std::vector({0})); + PaddleTensor tensor; + tensor.dtype = PaddleDType::INT64; + tensor.lod.emplace_back(std::vector({0})); std::vector data; for (int i = 0; i < batch_size; i++) { if (!std::getline(*file, line)) return false; inference::split_to_int64(line, ' ', &data); } - tensor->lod.front().push_back(data.size()); + tensor.lod.front().push_back(data.size()); - tensor->data.Resize(data.size() * sizeof(int64_t)); - memcpy(tensor->data.data(), data.data(), data.size() * sizeof(int64_t)); - tensor->shape.clear(); - tensor->shape.push_back(data.size()); - tensor->shape.push_back(1); + tensor.data.Resize(data.size() * sizeof(int64_t)); + memcpy(tensor.data.data(), data.data(), data.size() * sizeof(int64_t)); + tensor.shape.push_back(data.size()); + tensor.shape.push_back(1); + input->assign({tensor}); return true; } @@ -68,32 +53,28 @@ void Main(int batch_size) { config.model_dir = FLAGS_infer_model; config.use_gpu = false; config.enable_ir_optim = true; - auto predictor = - CreatePaddlePredictor( - config); - - std::vector input_slots(1); - // one batch starts - // data -- - auto &input = input_slots[0]; - input.dtype = PaddleDType::INT64; - inference::Timer timer; - double sum = 0; - std::vector output_slots; + std::vector input_slots, output_slots; + DataReader reader(FLAGS_infer_data); + std::vector> input_slots_all; - int num_batches = 0; - for (int t = 0; t < FLAGS_repeat; t++) { - DataReader reader(FLAGS_infer_data); - while (reader.NextBatch(&input, FLAGS_batch_size)) { - if (FLAGS_topn > 0 && num_batches > FLAGS_topn) break; - timer.tic(); - CHECK(predictor->Run(input_slots, &output_slots)); - sum += timer.toc(); + if (FLAGS_test_all_data) { + LOG(INFO) << "test all data"; + int num_batches = 0; + while (reader.NextBatch(&input_slots, FLAGS_batch_size)) { + input_slots_all.emplace_back(input_slots); ++num_batches; } + LOG(INFO) << "total number of samples: " << num_batches * FLAGS_batch_size; + TestPrediction(config, input_slots_all, &output_slots, FLAGS_num_threads); + return; } - PrintTime(batch_size, FLAGS_repeat, 1, 0, sum / FLAGS_repeat); + + // one batch starts + // data -- + reader.NextBatch(&input_slots, FLAGS_batch_size); + input_slots_all.emplace_back(input_slots); + TestPrediction(config, input_slots_all, &output_slots, FLAGS_num_threads); // Get output LOG(INFO) << "get outputs " << output_slots.size(); diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..44688ad36efe685e73df39e515949c9224e955ea --- /dev/null +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -0,0 +1,126 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include // NOLINT +#include +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/inference/analysis/analyzer.h" +#include "paddle/fluid/inference/analysis/ut_helper.h" +#include "paddle/fluid/inference/api/analysis_predictor.h" +#include "paddle/fluid/inference/api/helper.h" +#include "paddle/fluid/inference/api/paddle_inference_pass.h" +#include "paddle/fluid/platform/profiler.h" + +DEFINE_string(infer_model, "", "model path"); +DEFINE_string(infer_data, "", "data file"); +DEFINE_int32(batch_size, 1, "batch size."); +DEFINE_int32(burning, 0, "Burning before repeat."); +DEFINE_int32(repeat, 1, "Running the inference program repeat times."); +DEFINE_bool(test_all_data, false, "Test the all dataset in data file."); +DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads."); + +namespace paddle { +namespace inference { + +void CompareResult(const std::vector &outputs, + const std::vector &base_outputs) { + PADDLE_ENFORCE_GT(outputs.size(), 0); + PADDLE_ENFORCE_EQ(outputs.size(), base_outputs.size()); + for (size_t i = 0; i < outputs.size(); i++) { + auto &out = outputs[i]; + auto &base_out = base_outputs[i]; + size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1, + [](int a, int b) { return a * b; }); + size_t size1 = std::accumulate(base_out.shape.begin(), base_out.shape.end(), + 1, [](int a, int b) { return a * b; }); + PADDLE_ENFORCE_EQ(size, size1); + PADDLE_ENFORCE_GT(size, 0); + float *data = static_cast(out.data.data()); + float *base_data = static_cast(base_out.data.data()); + for (size_t i = 0; i < size; i++) { + EXPECT_NEAR(data[i], base_data[i], 1e-3); + } + } +} + +void TestOneThreadPrediction( + AnalysisConfig config, const std::vector> inputs, + std::vector *outputs) { + int batch_size = FLAGS_batch_size; + int num_times = FLAGS_repeat; + auto predictor = + CreatePaddlePredictor( + config); + Timer timer; + timer.tic(); + for (int i = 0; i < num_times; i++) { + for (size_t j = 0; j < inputs.size(); j++) { + predictor->Run(inputs[j], outputs); + } + } + PrintTime(batch_size, num_times, 1, 0, timer.toc() / num_times, + inputs.size()); +} + +void TestMultiThreadPrediction( + AnalysisConfig config, const std::vector> inputs, + std::vector *outputs, int num_threads) { + int batch_size = FLAGS_batch_size; + int num_times = FLAGS_repeat; + std::vector threads; + std::vector> predictors; + // TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled + // because AttentionLSTM's hard code nodeid will be damanged. + for (int tid = 0; tid < num_threads; ++tid) { + predictors.emplace_back( + CreatePaddlePredictor( + config)); + } + for (int tid = 0; tid < num_threads; ++tid) { + threads.emplace_back([&, tid]() { + // Each thread should have local inputs and outputs. + // The inputs of each thread are all the same. + std::vector> inputs_tid = inputs; + std::vector outputs_tid; + Timer timer; + timer.tic(); + for (int i = 0; i < num_times; i++) { + for (size_t j = 0; j < inputs_tid.size(); j++) { + predictors[tid]->Run(inputs_tid[j], &outputs_tid); + } + } + PrintTime(batch_size, num_times, num_threads, tid, + timer.toc() / num_times, inputs_tid.size()); + }); + } + for (int i = 0; i < num_threads; ++i) { + threads[i].join(); + } +} + +void TestPrediction(AnalysisConfig config, + const std::vector> inputs, + std::vector *outputs, int num_threads) { + if (num_threads == 1) { + TestOneThreadPrediction(config, inputs, outputs); + } else { + TestMultiThreadPrediction(config, inputs, outputs, num_threads); + } +} + +} // namespace inference +} // namespace paddle