diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h
index 72703bc80b493db96235d6b08f069b3b8bd1a320..d21567ac197587473684092f6cb12b059e0b1871 100644
--- a/paddle/fluid/inference/tests/api/tester_helper.h
+++ b/paddle/fluid/inference/tests/api/tester_helper.h
@@ -207,11 +207,7 @@ void TestMultiThreadPrediction(
   int batch_size = FLAGS_batch_size;
   int num_times = FLAGS_repeat;
   std::vector<std::thread> threads;
-  std::vector<std::unique_ptr<PaddlePredictor>> predictors;
-  predictors.emplace_back(CreateTestPredictor(config, use_analysis));
-  for (int tid = 1; tid < num_threads; ++tid) {
-    predictors.emplace_back(predictors.front()->Clone());
-  }
+  auto main_predictor = CreateTestPredictor(config, use_analysis);
 
   size_t total_time{0};
   for (int tid = 0; tid < num_threads; ++tid) {
@@ -219,7 +215,9 @@ void TestMultiThreadPrediction(
       // Each thread should have local inputs and outputs.
       // The inputs of each thread are all the same.
       std::vector<PaddleTensor> outputs_tid;
-      auto &predictor = predictors[tid];
+      // To ensure the thread binding correctly,
+      // please clone inside the threadpool.
+      auto predictor = main_predictor->Clone();
 #ifdef PADDLE_WITH_MKLDNN
       if (use_analysis) {
         static_cast<AnalysisPredictor *>(predictor.get())