未验证 提交 5bea9c14 编写于 作者: T tensor-tang 提交者: GitHub

Merge pull request #12397 from tensor-tang/refine/num_threads

refine num_threads control
...@@ -20,9 +20,6 @@ limitations under the License. */ ...@@ -20,9 +20,6 @@ limitations under the License. */
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/inference/tests/test_helper.h" #include "paddle/fluid/inference/tests/test_helper.h"
#include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/platform/cpu_helper.h"
#ifdef PADDLE_WITH_MKLML
#include <omp.h>
#endif
DEFINE_string(model_path, "", "Directory of the inference model."); DEFINE_string(model_path, "", "Directory of the inference model.");
DEFINE_string(data_file, "", "File of input index data."); DEFINE_string(data_file, "", "File of input index data.");
...@@ -30,6 +27,7 @@ DEFINE_int32(repeat, 100, "Running the inference program repeat times"); ...@@ -30,6 +27,7 @@ DEFINE_int32(repeat, 100, "Running the inference program repeat times");
DEFINE_bool(prepare_vars, true, "Prepare variables before executor"); DEFINE_bool(prepare_vars, true, "Prepare variables before executor");
DEFINE_int32(num_threads, 1, "Number of threads should be used"); DEFINE_int32(num_threads, 1, "Number of threads should be used");
DECLARE_bool(use_mkldnn); DECLARE_bool(use_mkldnn);
DECLARE_int32(paddle_num_threads);
inline double GetCurrentMs() { inline double GetCurrentMs() {
struct timeval time; struct timeval time;
...@@ -160,12 +158,7 @@ TEST(inference, nlp) { ...@@ -160,12 +158,7 @@ TEST(inference, nlp) {
std::unique_ptr<paddle::framework::Scope> scope( std::unique_ptr<paddle::framework::Scope> scope(
new paddle::framework::Scope()); new paddle::framework::Scope());
#ifdef PADDLE_WITH_MKLML paddle::platform::SetNumThreads(FLAGS_paddle_num_threads);
// only use 1 thread number per std::thread
omp_set_dynamic(0);
omp_set_num_threads(1);
paddle::platform::SetNumThreads(1);
#endif
double start_ms = 0, stop_ms = 0; double start_ms = 0, stop_ms = 0;
if (FLAGS_num_threads > 1) { if (FLAGS_num_threads > 1) {
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
#include <omp.h>
#include "paddle/fluid/platform/dynload/mklml.h" #include "paddle/fluid/platform/dynload/mklml.h"
#endif #endif
...@@ -33,6 +34,7 @@ void SetNumThreads(int num_threads) { ...@@ -33,6 +34,7 @@ void SetNumThreads(int num_threads) {
#elif defined(PADDLE_WITH_MKLML) #elif defined(PADDLE_WITH_MKLML)
int real_num_threads = num_threads > 1 ? num_threads : 1; int real_num_threads = num_threads > 1 ? num_threads : 1;
platform::dynload::MKL_Set_Num_Threads(real_num_threads); platform::dynload::MKL_Set_Num_Threads(real_num_threads);
omp_set_num_threads(num_threads);
#else #else
PADDLE_ENFORCE(false, "To be implemented."); PADDLE_ENFORCE(false, "To be implemented.");
#endif #endif
......
...@@ -23,6 +23,9 @@ limitations under the License. */ ...@@ -23,6 +23,9 @@ limitations under the License. */
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/fluid/string/piece.h" #include "paddle/fluid/string/piece.h"
DEFINE_int32(paddle_num_threads, 1,
"Number of threads for each paddle instance.");
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -115,7 +118,7 @@ void InitDevices(bool init_p2p, const std::vector<int> devices) { ...@@ -115,7 +118,7 @@ void InitDevices(bool init_p2p, const std::vector<int> devices) {
places.emplace_back(platform::CPUPlace()); places.emplace_back(platform::CPUPlace());
platform::DeviceContextPool::Init(places); platform::DeviceContextPool::Init(places);
#ifndef PADDLE_WITH_MKLDNN #ifndef PADDLE_WITH_MKLDNN
platform::SetNumThreads(1); platform::SetNumThreads(FLAGS_paddle_num_threads);
#endif #endif
} }
......
...@@ -123,7 +123,7 @@ def __bootstrap__(): ...@@ -123,7 +123,7 @@ def __bootstrap__():
read_env_flags = [ read_env_flags = [
'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir', 'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir',
'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb', 'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb',
'init_allocated_mem', 'free_idle_memory' 'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads'
] ]
if core.is_compiled_with_dist(): if core.is_compiled_with_dist():
read_env_flags.append('rpc_deadline') read_env_flags.append('rpc_deadline')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册