add SetMKLDNNThreadId api

a5c4b463 · luotao1 · e21edb26 · a5c4b463 · a5c4b463 · a5c4b463
4 changed file
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -159,6 +159,14 @@ bool AnalysisPredictor::PrepareExecutor() {
  return true;
 }
+void AnalysisPredictor::SetMKLDNNThreadId(int tid) {
+#ifdef PADDLE_WITH_MKLDNN
+  platform::set_cur_thread_id(tid);
+#else
+  LOG(ERROR) << "Please compile with MKLDNN first to use MKLDNN";
+#endif
+}
 bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
                            std::vector<PaddleTensor> *output_data,
                            int batch_size) {

--- a/paddle/fluid/inference/api/analysis_predictor.h
+++ b/paddle/fluid/inference/api/analysis_predictor.h
@@ -69,6 +69,8 @@ class AnalysisPredictor : public PaddlePredictor {
  framework::Scope *scope() { return scope_.get(); }
  framework::ProgramDesc &program() { return *inference_program_; }
+  void SetMKLDNNThreadId(int tid);
 protected:
  bool PrepareProgram(const std::shared_ptr<framework::ProgramDesc> &program);
  bool PrepareScope(const std::shared_ptr<framework::Scope> &parent_scope);

--- a/paddle/fluid/inference/api/paddle_analysis_config.h
+++ b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -51,9 +51,9 @@ struct AnalysisConfig : public NativeConfig {
                            int max_batch_size = 1);
  bool use_tensorrt() const { return use_tensorrt_; }
+  void EnableMKLDNN();
  // NOTE this is just for internal development, please not use it.
  // NOT stable yet.
-  void EnableMKLDNN();
  bool use_mkldnn() const { return use_mkldnn_; }
  friend class ::paddle::AnalysisPredictor;

--- a/paddle/fluid/inference/tests/api/tester_helper.h
+++ b/paddle/fluid/inference/tests/api/tester_helper.h
@@ -216,13 +216,16 @@ void TestMultiThreadPrediction(
  size_t total_time{0};
  for (int tid = 0; tid < num_threads; ++tid) {
    threads.emplace_back([&, tid]() {
-#ifdef PADDLE_WITH_MKLDNN
-      platform::set_cur_thread_id(static_cast<int>(tid) + 1);
-#endif
      // Each thread should have local inputs and outputs.
      // The inputs of each thread are all the same.
      std::vector<PaddleTensor> outputs_tid;
      auto &predictor = predictors[tid];
+#ifdef PADDLE_WITH_MKLDNN
+      if (use_analysis) {
+        static_cast<AnalysisPredictor *>(predictor.get())
+            ->SetMKLDNNThreadId(static_cast<int>(tid) + 1);
+      }
+#endif
      // warmup run
      LOG(INFO) << "Running thread " << tid << ", warm up run...";