From 781cd0cf5195877681af34d0c573bcffa84ea5a6 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Sat, 12 Jan 2019 20:32:49 +0800 Subject: [PATCH] add multi threads test of seqpool test (#15293) --- .../tests/api/analyzer_seq_pool1_tester.cc | 59 ++++++++++++++++++- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc index d9de55ab7..fb4c5c0a0 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc @@ -214,6 +214,9 @@ void PrepareZeroCopyInputs( } } +// diff: similarity_norm.tmp_0, // speed: fc_4.tmp_1 +static const char out_var_name[] = "reduce_sum_0.tmp_0"; + // return the output values std::vector zerocopy_profile(int repeat_times) { AnalysisConfig config; @@ -222,7 +225,7 @@ std::vector zerocopy_profile(int repeat_times) { auto predictor = CreatePaddlePredictor(config); std::vector> inputs; PrepareZeroCopyInputs(predictor, &inputs); - auto output_tensor = predictor->GetOutputTensor("reduce_sum_0.tmp_0"); + auto output_tensor = predictor->GetOutputTensor(out_var_name); Timer timer; LOG(INFO) << "Warm up run..."; timer.tic(); @@ -239,7 +242,7 @@ std::vector zerocopy_profile(int repeat_times) { PrintTime(FLAGS_batch_size, repeat_times, 1, 0, timer.toc() / repeat_times, 1); - VLOG(3) << "ZeroCopy output: " << DescribeZeroCopyTensor(*output_tensor); + LOG(INFO) << "ZeroCopy output: " << DescribeZeroCopyTensor(*output_tensor); PaddlePlace place; int output_size{0}; auto *pdata = output_tensor->data(&place, &output_size); @@ -252,6 +255,58 @@ std::vector zerocopy_profile(int repeat_times) { TEST(Analyzer_seq_pool1, zerocopy_profile) { zerocopy_profile(FLAGS_repeat); } +TEST(Analyzer_seq_pool1, zerocopy_profile_threads) { + AnalysisConfig config; + SetConfig(&config); + config.SwitchUseFeedFetchOps(false); + + auto base_predictor = CreatePaddlePredictor(config); + double total_time_of_threads{0}; + std::vector threads; + std::vector> predictors; + for (int tid = 0; tid < FLAGS_num_threads; tid++) { + predictors.emplace_back(base_predictor->Clone()); + // predictors.emplace_back(CreatePaddlePredictor(config)); + } + + for (int tid = 0; tid < FLAGS_num_threads; tid++) { + threads.emplace_back([config, &total_time_of_threads, &predictors, tid] { + auto &predictor = predictors[tid]; + std::vector> inputs; + PrepareZeroCopyInputs(predictor, &inputs); + auto output_tensor = predictor->GetOutputTensor(out_var_name); + Timer timer; + double total_time{0}; + + LOG(INFO) << "Warm up run..."; + timer.tic(); + predictor->ZeroCopyRun(); + PrintTime(FLAGS_batch_size, 1, FLAGS_num_threads, tid, timer.toc(), 1); + if (FLAGS_profile) { + paddle::platform::ResetProfiler(); + } + int repeat_times = FLAGS_repeat; + LOG(INFO) << "Run " << repeat_times << " times..."; + timer.tic(); + + for (int i = 0; i < repeat_times; i++) { + predictor->ZeroCopyRun(); + } + total_time += timer.toc(); + total_time_of_threads += total_time; + + LOG(INFO) << "thread time: " << total_time / repeat_times; + }); + } + + for (auto &t : threads) { + t.join(); + } + + LOG(INFO) << "average time: " + << total_time_of_threads / FLAGS_num_threads / FLAGS_repeat; +} + TEST(Analyzer_seq_pool1, zerocopy_fuse_statis) { analysis_fuse_statis(true); } TEST(Analyzer_seq_pool1, zerocopy_compare_native) { -- GitLab