提交 e21edb26 编写于 作者: L luotao1

add Set/GetCPUNumThreads api

上级 3fe2def1
...@@ -46,6 +46,7 @@ contrib::AnalysisConfig::AnalysisConfig(const contrib::AnalysisConfig &other) { ...@@ -46,6 +46,7 @@ contrib::AnalysisConfig::AnalysisConfig(const contrib::AnalysisConfig &other) {
prog_file = other.prog_file; prog_file = other.prog_file;
param_file = other.param_file; param_file = other.param_file;
specify_input_name = other.specify_input_name; specify_input_name = other.specify_input_name;
cpu_num_threads_ = other.cpu_num_threads_;
// fields from this. // fields from this.
enable_ir_optim = other.enable_ir_optim; enable_ir_optim = other.enable_ir_optim;
use_feed_fetch_ops = other.use_feed_fetch_ops; use_feed_fetch_ops = other.use_feed_fetch_ops;
......
...@@ -35,7 +35,6 @@ ...@@ -35,7 +35,6 @@
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
DECLARE_bool(profile); DECLARE_bool(profile);
DECLARE_int32(paddle_num_threads);
namespace paddle { namespace paddle {
...@@ -67,7 +66,7 @@ bool AnalysisPredictor::Init( ...@@ -67,7 +66,7 @@ bool AnalysisPredictor::Init(
#endif #endif
// no matter with or without MKLDNN // no matter with or without MKLDNN
paddle::platform::SetNumThreads(FLAGS_paddle_num_threads); paddle::platform::SetNumThreads(config_.GetCPUNumThreads());
if (!PrepareScope(parent_scope)) { if (!PrepareScope(parent_scope)) {
return false; return false;
......
...@@ -28,7 +28,6 @@ limitations under the License. */ ...@@ -28,7 +28,6 @@ limitations under the License. */
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
DEFINE_bool(profile, false, "Turn on profiler for fluid"); DEFINE_bool(profile, false, "Turn on profiler for fluid");
DECLARE_int32(paddle_num_threads);
namespace paddle { namespace paddle {
namespace { namespace {
...@@ -76,7 +75,7 @@ bool NativePaddlePredictor::Init( ...@@ -76,7 +75,7 @@ bool NativePaddlePredictor::Init(
#endif #endif
// no matter with or without MKLDNN // no matter with or without MKLDNN
paddle::platform::SetNumThreads(FLAGS_paddle_num_threads); paddle::platform::SetNumThreads(config_.GetCPUNumThreads());
if (config_.use_gpu) { if (config_.use_gpu) {
place_ = paddle::platform::CUDAPlace(config_.device); place_ = paddle::platform::CUDAPlace(config_.device);
......
...@@ -186,6 +186,15 @@ struct NativeConfig : public PaddlePredictor::Config { ...@@ -186,6 +186,15 @@ struct NativeConfig : public PaddlePredictor::Config {
// Specify the variable's name of each input if input tensors don't follow the // Specify the variable's name of each input if input tensors don't follow the
// `feeds` and `fetches` of the phase `save_inference_model`. // `feeds` and `fetches` of the phase `save_inference_model`.
bool specify_input_name{false}; bool specify_input_name{false};
// Set and get the number of cpu threads.
void SetCPUNumThreads(int cpu_num_threads) {
cpu_num_threads_ = cpu_num_threads;
}
int GetCPUNumThreads() const { return cpu_num_threads_; }
protected:
int cpu_num_threads_{1}; // number of cpu threads for each instance.
}; };
// A factory to help create different predictors. // A factory to help create different predictors.
......
...@@ -27,6 +27,7 @@ void SetConfig(AnalysisConfig *cfg) { ...@@ -27,6 +27,7 @@ void SetConfig(AnalysisConfig *cfg) {
cfg->device = 0; cfg->device = 0;
cfg->enable_ir_optim = true; cfg->enable_ir_optim = true;
cfg->specify_input_name = true; cfg->specify_input_name = true;
cfg->SetCPUNumThreads(FLAGS_paddle_num_threads);
} }
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) { void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
......
...@@ -53,6 +53,8 @@ std::ostream &operator<<(std::ostream &os, const NativeConfig &config) { ...@@ -53,6 +53,8 @@ std::ostream &operator<<(std::ostream &os, const NativeConfig &config) {
os << GenSpaces(num_spaces) << "param_file: " << config.param_file << "\n"; os << GenSpaces(num_spaces) << "param_file: " << config.param_file << "\n";
os << GenSpaces(num_spaces) os << GenSpaces(num_spaces)
<< "specify_input_name: " << config.specify_input_name << "\n"; << "specify_input_name: " << config.specify_input_name << "\n";
os << GenSpaces(num_spaces)
<< "cpu_num_threads: " << config.GetCPUNumThreads() << "\n";
num_spaces--; num_spaces--;
os << GenSpaces(num_spaces) << "}\n"; os << GenSpaces(num_spaces) << "}\n";
return os; return os;
......
...@@ -42,6 +42,7 @@ DEFINE_bool(use_analysis, true, ...@@ -42,6 +42,7 @@ DEFINE_bool(use_analysis, true,
"Running the inference program in analysis mode."); "Running the inference program in analysis mode.");
DECLARE_bool(profile); DECLARE_bool(profile);
DECLARE_int32(paddle_num_threads);
namespace paddle { namespace paddle {
namespace inference { namespace inference {
......
...@@ -17,8 +17,6 @@ limitations under the License. */ ...@@ -17,8 +17,6 @@ limitations under the License. */
#include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/jit_kernel.h" #include "paddle/fluid/operators/math/jit_kernel.h"
DECLARE_int32(paddle_num_threads);
namespace paddle { namespace paddle {
namespace operators { namespace operators {
namespace math { namespace math {
...@@ -43,7 +41,7 @@ inline void FCCompute(const BlasT<DeviceContext, T>& blas, const int M, ...@@ -43,7 +41,7 @@ inline void FCCompute(const BlasT<DeviceContext, T>& blas, const int M,
.template Get<jitkernel::VAddKernel<T>>(N); .template Get<jitkernel::VAddKernel<T>>(N);
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
#pragma omp parallel for if (FLAGS_paddle_num_threads > 1) #pragma omp parallel for
#endif #endif
for (int i = 0; i < M; i++) { for (int i = 0; i < M; i++) {
T* dst = Y + i * N; T* dst = Y + i * N;
......
...@@ -41,7 +41,7 @@ void SetNumThreads(int num_threads) { ...@@ -41,7 +41,7 @@ void SetNumThreads(int num_threads) {
#elif defined(PADDLE_WITH_MKLML) #elif defined(PADDLE_WITH_MKLML)
int real_num_threads = num_threads > 1 ? num_threads : 1; int real_num_threads = num_threads > 1 ? num_threads : 1;
platform::dynload::MKL_Set_Num_Threads(real_num_threads); platform::dynload::MKL_Set_Num_Threads(real_num_threads);
omp_set_num_threads(num_threads); omp_set_num_threads(real_num_threads);
#else #else
PADDLE_ENFORCE(false, "To be implemented."); PADDLE_ENFORCE(false, "To be implemented.");
#endif #endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册