add Set/GetCPUNumThreads api

e21edb26 · luotao1 · 3fe2def1 · e21edb26 · e21edb26 · e21edb26
9 changed file
--- a/paddle/fluid/inference/api/analysis_config.cc
+++ b/paddle/fluid/inference/api/analysis_config.cc
@@ -46,6 +46,7 @@ contrib::AnalysisConfig::AnalysisConfig(const contrib::AnalysisConfig &other) {
  prog_file = other.prog_file;
  param_file = other.param_file;
  specify_input_name = other.specify_input_name;
+  cpu_num_threads_ = other.cpu_num_threads_;
  // fields from this.
  enable_ir_optim = other.enable_ir_optim;
  use_feed_fetch_ops = other.use_feed_fetch_ops;

--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -35,7 +35,6 @@
 #include "paddle/fluid/platform/profiler.h"
 DECLARE_bool(profile);
-DECLARE_int32(paddle_num_threads);
 namespace paddle {
@@ -67,7 +66,7 @@ bool AnalysisPredictor::Init(
 #endif
  // no matter with or without MKLDNN
-  paddle::platform::SetNumThreads(FLAGS_paddle_num_threads);
+  paddle::platform::SetNumThreads(config_.GetCPUNumThreads());
  if (!PrepareScope(parent_scope)) {
    return false;

--- a/paddle/fluid/inference/api/api_impl.cc
+++ b/paddle/fluid/inference/api/api_impl.cc
@@ -28,7 +28,6 @@ limitations under the License. */
 #include "paddle/fluid/platform/profiler.h"
 DEFINE_bool(profile, false, "Turn on profiler for fluid");
-DECLARE_int32(paddle_num_threads);
 namespace paddle {
 namespace {
@@ -76,7 +75,7 @@ bool NativePaddlePredictor::Init(
 #endif
  // no matter with or without MKLDNN
-  paddle::platform::SetNumThreads(FLAGS_paddle_num_threads);
+  paddle::platform::SetNumThreads(config_.GetCPUNumThreads());
  if (config_.use_gpu) {
    place_ = paddle::platform::CUDAPlace(config_.device);

--- a/paddle/fluid/inference/api/paddle_api.h
+++ b/paddle/fluid/inference/api/paddle_api.h
@@ -186,6 +186,15 @@ struct NativeConfig : public PaddlePredictor::Config {
  // Specify the variable's name of each input if input tensors don't follow the
  // `feeds` and `fetches` of the phase `save_inference_model`.
  bool specify_input_name{false};
+  // Set and get the number of cpu threads.
+  void SetCPUNumThreads(int cpu_num_threads) {
+    cpu_num_threads_ = cpu_num_threads;
+  }
+  int GetCPUNumThreads() const { return cpu_num_threads_; }
+ protected:
+  int cpu_num_threads_{1};  // number of cpu threads for each instance.
 };
 // A factory to help create different predictors.

--- a/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
@@ -27,6 +27,7 @@ void SetConfig(AnalysisConfig *cfg) {
  cfg->device = 0;
  cfg->enable_ir_optim = true;
  cfg->specify_input_name = true;
+  cfg->SetCPUNumThreads(FLAGS_paddle_num_threads);
 }
 void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {

--- a/paddle/fluid/inference/tests/api/config_printer.h
+++ b/paddle/fluid/inference/tests/api/config_printer.h
@@ -53,6 +53,8 @@ std::ostream &operator<<(std::ostream &os, const NativeConfig &config) {
  os << GenSpaces(num_spaces) << "param_file: " << config.param_file << "\n";
  os << GenSpaces(num_spaces)
     << "specify_input_name: " << config.specify_input_name << "\n";
+  os << GenSpaces(num_spaces)
+     << "cpu_num_threads: " << config.GetCPUNumThreads() << "\n";
  num_spaces--;
  os << GenSpaces(num_spaces) << "}\n";
  return os;

--- a/paddle/fluid/inference/tests/api/tester_helper.h
+++ b/paddle/fluid/inference/tests/api/tester_helper.h
@@ -42,6 +42,7 @@ DEFINE_bool(use_analysis, true,
            "Running the inference program in analysis mode.");
 DECLARE_bool(profile);
+DECLARE_int32(paddle_num_threads);
 namespace paddle {
 namespace inference {

--- a/paddle/fluid/operators/math/fc_compute.h
+++ b/paddle/fluid/operators/math/fc_compute.h
@@ -17,8 +17,6 @@ limitations under the License. */
 #include "paddle/fluid/operators/math/blas.h"
 #include "paddle/fluid/operators/math/jit_kernel.h"
-DECLARE_int32(paddle_num_threads);
 namespace paddle {
 namespace operators {
 namespace math {
@@ -43,7 +41,7 @@ inline void FCCompute(const BlasT<DeviceContext, T>& blas, const int M,
                           .template Get<jitkernel::VAddKernel<T>>(N);
 #ifdef PADDLE_WITH_MKLML
-#pragma omp parallel for if (FLAGS_paddle_num_threads > 1)
+#pragma omp parallel for
 #endif
    for (int i = 0; i < M; i++) {
      T* dst = Y + i * N;

--- a/paddle/fluid/platform/cpu_helper.cc
+++ b/paddle/fluid/platform/cpu_helper.cc
@@ -41,7 +41,7 @@ void SetNumThreads(int num_threads) {
 #elif defined(PADDLE_WITH_MKLML)
  int real_num_threads = num_threads > 1 ? num_threads : 1;
  platform::dynload::MKL_Set_Num_Threads(real_num_threads);
-  omp_set_num_threads(num_threads);
+  omp_set_num_threads(real_num_threads);
 #else
  PADDLE_ENFORCE(false, "To be implemented.");
 #endif