提交 e21edb26 编写于 作者: L luotao1

add Set/GetCPUNumThreads api

上级 3fe2def1
......@@ -46,6 +46,7 @@ contrib::AnalysisConfig::AnalysisConfig(const contrib::AnalysisConfig &other) {
prog_file = other.prog_file;
param_file = other.param_file;
specify_input_name = other.specify_input_name;
cpu_num_threads_ = other.cpu_num_threads_;
// fields from this.
enable_ir_optim = other.enable_ir_optim;
use_feed_fetch_ops = other.use_feed_fetch_ops;
......
......@@ -35,7 +35,6 @@
#include "paddle/fluid/platform/profiler.h"
DECLARE_bool(profile);
DECLARE_int32(paddle_num_threads);
namespace paddle {
......@@ -67,7 +66,7 @@ bool AnalysisPredictor::Init(
#endif
// no matter with or without MKLDNN
paddle::platform::SetNumThreads(FLAGS_paddle_num_threads);
paddle::platform::SetNumThreads(config_.GetCPUNumThreads());
if (!PrepareScope(parent_scope)) {
return false;
......
......@@ -28,7 +28,6 @@ limitations under the License. */
#include "paddle/fluid/platform/profiler.h"
DEFINE_bool(profile, false, "Turn on profiler for fluid");
DECLARE_int32(paddle_num_threads);
namespace paddle {
namespace {
......@@ -76,7 +75,7 @@ bool NativePaddlePredictor::Init(
#endif
// no matter with or without MKLDNN
paddle::platform::SetNumThreads(FLAGS_paddle_num_threads);
paddle::platform::SetNumThreads(config_.GetCPUNumThreads());
if (config_.use_gpu) {
place_ = paddle::platform::CUDAPlace(config_.device);
......
......@@ -186,6 +186,15 @@ struct NativeConfig : public PaddlePredictor::Config {
// Specify the variable's name of each input if input tensors don't follow the
// `feeds` and `fetches` of the phase `save_inference_model`.
bool specify_input_name{false};
// Set and get the number of cpu threads.
void SetCPUNumThreads(int cpu_num_threads) {
cpu_num_threads_ = cpu_num_threads;
}
int GetCPUNumThreads() const { return cpu_num_threads_; }
protected:
int cpu_num_threads_{1}; // number of cpu threads for each instance.
};
// A factory to help create different predictors.
......
......@@ -27,6 +27,7 @@ void SetConfig(AnalysisConfig *cfg) {
cfg->device = 0;
cfg->enable_ir_optim = true;
cfg->specify_input_name = true;
cfg->SetCPUNumThreads(FLAGS_paddle_num_threads);
}
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
......
......@@ -53,6 +53,8 @@ std::ostream &operator<<(std::ostream &os, const NativeConfig &config) {
os << GenSpaces(num_spaces) << "param_file: " << config.param_file << "\n";
os << GenSpaces(num_spaces)
<< "specify_input_name: " << config.specify_input_name << "\n";
os << GenSpaces(num_spaces)
<< "cpu_num_threads: " << config.GetCPUNumThreads() << "\n";
num_spaces--;
os << GenSpaces(num_spaces) << "}\n";
return os;
......
......@@ -42,6 +42,7 @@ DEFINE_bool(use_analysis, true,
"Running the inference program in analysis mode.");
DECLARE_bool(profile);
DECLARE_int32(paddle_num_threads);
namespace paddle {
namespace inference {
......
......@@ -17,8 +17,6 @@ limitations under the License. */
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/jit_kernel.h"
DECLARE_int32(paddle_num_threads);
namespace paddle {
namespace operators {
namespace math {
......@@ -43,7 +41,7 @@ inline void FCCompute(const BlasT<DeviceContext, T>& blas, const int M,
.template Get<jitkernel::VAddKernel<T>>(N);
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for if (FLAGS_paddle_num_threads > 1)
#pragma omp parallel for
#endif
for (int i = 0; i < M; i++) {
T* dst = Y + i * N;
......
......@@ -41,7 +41,7 @@ void SetNumThreads(int num_threads) {
#elif defined(PADDLE_WITH_MKLML)
int real_num_threads = num_threads > 1 ? num_threads : 1;
platform::dynload::MKL_Set_Num_Threads(real_num_threads);
omp_set_num_threads(num_threads);
omp_set_num_threads(real_num_threads);
#else
PADDLE_ENFORCE(false, "To be implemented.");
#endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册