diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index e8964c4acea0d220deca048a018eb7de42d7e4e5..467d4411376381df950bb582f9c73410284a5e2d 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -183,6 +183,9 @@ void AnalysisPredictor::SetMkldnnThreadID(int tid) { bool AnalysisPredictor::Run(const std::vector &inputs, std::vector *output_data, int batch_size) { + if (UNLIKELY(config_.cpu_math_library_num_threads() > 1)) { + paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); + } VLOG(3) << "Predictor::predict"; inference::Timer timer; timer.tic(); diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index 97c164bdef7a4b3e66be78526793f3830ada398b..048286a843f0190a8139cb86eda4f3a3a40d89a1 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -131,6 +131,9 @@ NativePaddlePredictor::~NativePaddlePredictor() { bool NativePaddlePredictor::Run(const std::vector &inputs, std::vector *output_data, int batch_size) { + if (UNLIKELY(config_.cpu_math_library_num_threads() > 1)) { + paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); + } VLOG(3) << "Predictor::predict"; Timer timer; timer.tic(); diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc index c27c39f40a2067dd2bd2150e4b1e53eab7cdf06e..36282b3efe5756da55b056c09e94aa352e3dcf8a 100644 --- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc @@ -366,15 +366,17 @@ TEST(Analyzer_rnn1, ZeroCopyMultiThread) { #define NEW_TENSOR(name__) \ auto name__##_tensor = predictor->GetInputTensor(#name__); - auto base_predictor = CreatePaddlePredictor(config); + std::vector> predictors; + predictors.emplace_back(CreatePaddlePredictor(config)); + for (int tid = 1; tid < FLAGS_num_threads; tid++) { + predictors.emplace_back(predictors.front()->Clone()); + } double total_time_of_threads{0}; std::vector threads; for (int tid = 0; tid < FLAGS_num_threads; tid++) { threads.emplace_back([&, tid] { - // To ensure the thread binding correctly, - // please clone inside the threadpool. - auto predictor = base_predictor->Clone(); + auto &predictor = predictors[tid]; NEW_TENSOR(data_lod_attention); NEW_TENSOR(cell_init); NEW_TENSOR(data); diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc index bd0059e18485c046df27d5ddbb39df9bbb249113..cca2ab1ee148b568e714c24dded7cd72403f0e5f 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc @@ -266,15 +266,17 @@ TEST(Analyzer_seq_pool1, zerocopy_profile_threads) { SetConfig(&config); config.SwitchUseFeedFetchOps(false); - auto base_predictor = CreatePaddlePredictor(config); + std::vector> predictors; + predictors.emplace_back(CreatePaddlePredictor(config)); + for (int tid = 1; tid < FLAGS_num_threads; tid++) { + predictors.emplace_back(predictors.front()->Clone()); + } double total_time_of_threads{0}; std::vector threads; for (int tid = 0; tid < FLAGS_num_threads; tid++) { threads.emplace_back([&, tid] { - // To ensure the thread binding correctly, - // please clone inside the threadpool. - auto predictor = base_predictor->Clone(); + auto &predictor = predictors[tid]; std::vector> inputs; PrepareZeroCopyInputs(predictor, &inputs); auto output_tensor = predictor->GetOutputTensor(out_var_name); diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 2811eb4946ea025cf6c7ab197c4e603df86f6f2d..2e53fddfe7f6f0c5b31ff069fb1661f143022841 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -17,8 +17,10 @@ #include #include +#include #include #include // NOLINT +#include #include #ifdef WITH_GPERFTOOLS #include @@ -252,7 +254,11 @@ void TestMultiThreadPrediction( int batch_size = FLAGS_batch_size; int num_times = FLAGS_repeat; std::vector threads; - auto main_predictor = CreateTestPredictor(config, use_analysis); + std::vector> predictors; + predictors.emplace_back(CreateTestPredictor(config, use_analysis)); + for (int tid = 1; tid < num_threads; tid++) { + predictors.emplace_back(predictors.front()->Clone()); + } size_t total_time{0}; for (int tid = 0; tid < num_threads; ++tid) { @@ -260,9 +266,7 @@ void TestMultiThreadPrediction( // Each thread should have local inputs and outputs. // The inputs of each thread are all the same. std::vector outputs_tid; - // To ensure the thread binding correctly, - // please clone inside the threadpool. - auto predictor = main_predictor->Clone(); + auto &predictor = predictors[tid]; #ifdef PADDLE_WITH_MKLDNN if (use_analysis) { static_cast(predictor.get())