From 781cd0cf5195877681af34d0c573bcffa84ea5a6 Mon Sep 17 00:00:00 2001
From: tensor-tang <tangjian03@baidu.com>
Date: Sat, 12 Jan 2019 20:32:49 +0800
Subject: [PATCH] add multi threads test of seqpool test (#15293)

---
 .../tests/api/analyzer_seq_pool1_tester.cc    | 59 ++++++++++++++++++-
 1 file changed, 57 insertions(+), 2 deletions(-)
diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
index d9de55ab7..fb4c5c0a0 100644
--- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
@@ -214,6 +214,9 @@ void PrepareZeroCopyInputs(
   }
 }
 
+// diff: similarity_norm.tmp_0, // speed: fc_4.tmp_1
+static const char out_var_name[] = "reduce_sum_0.tmp_0";
+
 // return the output values
 std::vector<float> zerocopy_profile(int repeat_times) {
   AnalysisConfig config;
@@ -222,7 +225,7 @@ std::vector<float> zerocopy_profile(int repeat_times) {
   auto predictor = CreatePaddlePredictor<AnalysisConfig>(config);
   std::vector<std::unique_ptr<ZeroCopyTensor>> inputs;
   PrepareZeroCopyInputs(predictor, &inputs);
-  auto output_tensor = predictor->GetOutputTensor("reduce_sum_0.tmp_0");
+  auto output_tensor = predictor->GetOutputTensor(out_var_name);
   Timer timer;
   LOG(INFO) << "Warm up run...";
   timer.tic();
@@ -239,7 +242,7 @@ std::vector<float> zerocopy_profile(int repeat_times) {
   PrintTime(FLAGS_batch_size, repeat_times, 1, 0, timer.toc() / repeat_times,
             1);
 
-  VLOG(3) << "ZeroCopy output: " << DescribeZeroCopyTensor(*output_tensor);
+  LOG(INFO) << "ZeroCopy output: " << DescribeZeroCopyTensor(*output_tensor);
   PaddlePlace place;
   int output_size{0};
   auto *pdata = output_tensor->data<float>(&place, &output_size);
@@ -252,6 +255,58 @@ std::vector<float> zerocopy_profile(int repeat_times) {
 
 TEST(Analyzer_seq_pool1, zerocopy_profile) { zerocopy_profile(FLAGS_repeat); }
 
+TEST(Analyzer_seq_pool1, zerocopy_profile_threads) {
+  AnalysisConfig config;
+  SetConfig(&config);
+  config.SwitchUseFeedFetchOps(false);
+
+  auto base_predictor = CreatePaddlePredictor<AnalysisConfig>(config);
+  double total_time_of_threads{0};
+  std::vector<std::thread> threads;
+  std::vector<std::unique_ptr<PaddlePredictor>> predictors;
+  for (int tid = 0; tid < FLAGS_num_threads; tid++) {
+    predictors.emplace_back(base_predictor->Clone());
+    // predictors.emplace_back(CreatePaddlePredictor<AnalysisConfig>(config));
+  }
+
+  for (int tid = 0; tid < FLAGS_num_threads; tid++) {
+    threads.emplace_back([config, &total_time_of_threads, &predictors, tid] {
+      auto &predictor = predictors[tid];
+      std::vector<std::unique_ptr<ZeroCopyTensor>> inputs;
+      PrepareZeroCopyInputs(predictor, &inputs);
+      auto output_tensor = predictor->GetOutputTensor(out_var_name);
+      Timer timer;
+      double total_time{0};
+
+      LOG(INFO) << "Warm up run...";
+      timer.tic();
+      predictor->ZeroCopyRun();
+      PrintTime(FLAGS_batch_size, 1, FLAGS_num_threads, tid, timer.toc(), 1);
+      if (FLAGS_profile) {
+        paddle::platform::ResetProfiler();
+      }
+      int repeat_times = FLAGS_repeat;
+      LOG(INFO) << "Run " << repeat_times << " times...";
+      timer.tic();
+
+      for (int i = 0; i < repeat_times; i++) {
+        predictor->ZeroCopyRun();
+      }
+      total_time += timer.toc();
+      total_time_of_threads += total_time;
+
+      LOG(INFO) << "thread time: " << total_time / repeat_times;
+    });
+  }
+
+  for (auto &t : threads) {
+    t.join();
+  }
+
+  LOG(INFO) << "average time: "
+            << total_time_of_threads / FLAGS_num_threads / FLAGS_repeat;
+}
+
 TEST(Analyzer_seq_pool1, zerocopy_fuse_statis) { analysis_fuse_statis(true); }
 
 TEST(Analyzer_seq_pool1, zerocopy_compare_native) {
-- 
GitLab