Merge pull request #15310 from luotao1/ZeroCopy_omp

fix multi-threads in ZeroCopyProfile

Merge pull request #15310 from luotao1/ZeroCopy_omp
fix multi-threads in ZeroCopyProfile
0b6447a4 · tensor-tang · GitHub · 485d3210 · 2411ed42 · 0b6447a4
2 changed file
--- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
@@ -370,15 +370,12 @@ TEST(Analyzer_rnn1, ZeroCopyMultiThread) {
  auto base_predictor = CreatePaddlePredictor<AnalysisConfig>(config);
  double total_time_of_threads{0};
  std::vector<std::thread> threads;
-  std::vector<std::unique_ptr<PaddlePredictor>> predictors;
-  for (int tid = 0; tid < FLAGS_num_threads; tid++) {
-    predictors.emplace_back(CreatePaddlePredictor<AnalysisConfig>(config));
-  }
  for (int tid = 0; tid < FLAGS_num_threads; tid++) {
-    threads.emplace_back([config, &total_time_of_threads, &predictors, tid] {
+    threads.emplace_back([&, tid] {
-      // auto predictor = base_predictor->Clone();
+      // To ensure the thread binding correctly,
-      auto &predictor = predictors[tid];
+      // please clone inside the threadpool.
+      auto predictor = base_predictor->Clone();
      NEW_TENSOR(data_lod_attention);
      NEW_TENSOR(cell_init);
      NEW_TENSOR(data);

--- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
@@ -263,15 +263,12 @@ TEST(Analyzer_seq_pool1, zerocopy_profile_threads) {
  auto base_predictor = CreatePaddlePredictor<AnalysisConfig>(config);
  double total_time_of_threads{0};
  std::vector<std::thread> threads;
-  std::vector<std::unique_ptr<PaddlePredictor>> predictors;
-  for (int tid = 0; tid < FLAGS_num_threads; tid++) {
-    predictors.emplace_back(base_predictor->Clone());
-    // predictors.emplace_back(CreatePaddlePredictor<AnalysisConfig>(config));
-  }
  for (int tid = 0; tid < FLAGS_num_threads; tid++) {
-    threads.emplace_back([config, &total_time_of_threads, &predictors, tid] {
+    threads.emplace_back([&, tid] {
-      auto &predictor = predictors[tid];
+      // To ensure the thread binding correctly,
+      // please clone inside the threadpool.
+      auto predictor = base_predictor->Clone();
      std::vector<std::unique_ptr<ZeroCopyTensor>> inputs;
      PrepareZeroCopyInputs(predictor, &inputs);
      auto output_tensor = predictor->GetOutputTensor(out_var_name);