未验证 提交 5130b0a1 编写于 作者: J JingZhuangzhuang 提交者: GitHub

merge python lib (#46013)

上级 2ca65904
...@@ -249,7 +249,7 @@ option(WITH_SYSTEM_BLAS "Use system blas library" OFF) ...@@ -249,7 +249,7 @@ option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
option(WITH_DISTRIBUTE "Compile with distributed support" OFF) option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF) option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
option(ON_INFER "Turn on inference optimization and inference-lib generation" option(ON_INFER "Turn on inference optimization and inference-lib generation"
OFF) ON)
################################ Internal Configurations ####################################### ################################ Internal Configurations #######################################
option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF) option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF)
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools"
......
...@@ -236,7 +236,7 @@ endif() ...@@ -236,7 +236,7 @@ endif()
if(WIN32 if(WIN32
OR APPLE OR APPLE
OR NOT WITH_GPU OR NOT WITH_GPU
OR ON_INFER) OR (ON_INFER AND NOT WITH_PYTHON))
set(WITH_DGC OFF) set(WITH_DGC OFF)
endif() endif()
......
...@@ -21,17 +21,8 @@ ...@@ -21,17 +21,8 @@
#include "paddle/phi/core/utils/rw_lock.h" #include "paddle/phi/core/utils/rw_lock.h"
// When in inference scenario, the scopes will not be written by two threads in
// a mean time, but a scope may be read by multiple threads concurrently, and
// the mutex will cause serious performance issue.
// So the mutex is disabled when `ON_INFER`.
#ifdef PADDLE_ON_INFERENCE
#define SCOPE_VARS_READER_LOCK
#define SCOPE_VARS_WRITER_LOCK
#else
#define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_); #define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_);
#define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_); #define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_);
#endif
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -24,17 +24,8 @@ ...@@ -24,17 +24,8 @@
#include "paddle/fluid/platform/event.h" #include "paddle/fluid/platform/event.h"
#include "paddle/phi/core/utils/rw_lock.h" #include "paddle/phi/core/utils/rw_lock.h"
// When in inference scenario, the scopes will not be written by two threads in
// a mean time, but a scope may be read by multiple threads concurrently, and
// the mutex will cause serious performance issue.
// So the mutex is disabled when `ON_INFER`.
#ifdef PADDLE_ON_INFERENCE
#define SCOPE_VARS_READER_LOCK
#define SCOPE_VARS_WRITER_LOCK
#else
#define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_); #define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_);
#define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_); #define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_);
#endif
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -25,21 +25,10 @@ PADDLE_DEFINE_EXPORTED_bool( ...@@ -25,21 +25,10 @@ PADDLE_DEFINE_EXPORTED_bool(
"Delete local scope eagerly. It will reduce GPU memory usage but " "Delete local scope eagerly. It will reduce GPU memory usage but "
"slow down the destruction of variables.(around 1% performance harm)"); "slow down the destruction of variables.(around 1% performance harm)");
// When in inference scenario, the scopes will not be written by two threads in
// a mean time, but a scope may be read by multiple threads concurrently, and
// the mutex will cause serious performance issue.
// So the mutex is disabled when `ON_INFER`.
#ifdef PADDLE_ON_INFERENCE
#define SCOPE_KIDS_READER_LOCK
#define SCOPE_KIDS_WRITER_LOCK
#define SCOPE_VARS_READER_LOCK
#define SCOPE_VARS_WRITER_LOCK
#else
#define SCOPE_KIDS_READER_LOCK phi::AutoRDLock auto_lock(&kids_lock_); #define SCOPE_KIDS_READER_LOCK phi::AutoRDLock auto_lock(&kids_lock_);
#define SCOPE_KIDS_WRITER_LOCK phi::AutoWRLock auto_lock(&kids_lock_); #define SCOPE_KIDS_WRITER_LOCK phi::AutoWRLock auto_lock(&kids_lock_);
#define SCOPE_VARS_READER_LOCK phi::AutoRDLock auto_lock(&vars_lock_); #define SCOPE_VARS_READER_LOCK phi::AutoRDLock auto_lock(&vars_lock_);
#define SCOPE_VARS_WRITER_LOCK phi::AutoWRLock auto_lock(&vars_lock_); #define SCOPE_VARS_WRITER_LOCK phi::AutoWRLock auto_lock(&vars_lock_);
#endif
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -171,12 +171,9 @@ class Scope : public ScopeBase { ...@@ -171,12 +171,9 @@ class Scope : public ScopeBase {
DISABLE_COPY_AND_ASSIGN(Scope); DISABLE_COPY_AND_ASSIGN(Scope);
#ifndef PADDLE_ON_INFERENCE
private: private:
mutable phi::RWLock kids_lock_; mutable phi::RWLock kids_lock_;
mutable phi::RWLock vars_lock_; mutable phi::RWLock vars_lock_;
#endif
}; };
// Generate some debug string about the inherience structure of scope, quite // Generate some debug string about the inherience structure of scope, quite
......
...@@ -388,11 +388,7 @@ PADDLE_DEFINE_EXPORTED_int32( ...@@ -388,11 +388,7 @@ PADDLE_DEFINE_EXPORTED_int32(
* enable garbage collection strategy when training large networks. * enable garbage collection strategy when training large networks.
*/ */
// Disable gc by default when inference library is built // Disable gc by default when inference library is built
#ifdef PADDLE_ON_INFERENCE
static const double kDefaultEagerDeleteTensorGB = -1;
#else
static const double kDefaultEagerDeleteTensorGB = 0; static const double kDefaultEagerDeleteTensorGB = 0;
#endif
PADDLE_DEFINE_EXPORTED_double( PADDLE_DEFINE_EXPORTED_double(
eager_delete_tensor_gb, eager_delete_tensor_gb,
...@@ -663,11 +659,7 @@ PADDLE_DEFINE_EXPORTED_bool(use_mkldnn, false, "Use MKLDNN to run"); ...@@ -663,11 +659,7 @@ PADDLE_DEFINE_EXPORTED_bool(use_mkldnn, false, "Use MKLDNN to run");
* If FLAGS_call_stack_level == 2, the python stack, c++ stack, and error * If FLAGS_call_stack_level == 2, the python stack, c++ stack, and error
* message summary will be shown. * message summary will be shown.
*/ */
#ifdef PADDLE_ON_INFERENCE
static const int32_t kDefaultCallStackLevel = 2;
#else
static const int32_t kDefaultCallStackLevel = 1; static const int32_t kDefaultCallStackLevel = 1;
#endif
PADDLE_DEFINE_EXPORTED_int32( PADDLE_DEFINE_EXPORTED_int32(
call_stack_level, call_stack_level,
......
...@@ -140,7 +140,7 @@ if(WITH_CUSTOM_DEVICE) ...@@ -140,7 +140,7 @@ if(WITH_CUSTOM_DEVICE)
set(PYBIND_DEPS ${PYBIND_DEPS} custom_device_common_op_registry) set(PYBIND_DEPS ${PYBIND_DEPS} custom_device_common_op_registry)
endif() endif()
if(NOT ON_INFER) if(WITH_PYTHON)
set(PYBIND_DEPS ${PYBIND_DEPS} processgroup eager_reducer) set(PYBIND_DEPS ${PYBIND_DEPS} processgroup eager_reducer)
if(WITH_NCCL OR WITH_RCCL) if(WITH_NCCL OR WITH_RCCL)
set(PYBIND_DEPS ${PYBIND_DEPS} processgroup_nccl) set(PYBIND_DEPS ${PYBIND_DEPS} processgroup_nccl)
......
...@@ -2546,7 +2546,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -2546,7 +2546,7 @@ All parameter, weight, gradient are variables in Paddle.
BindCompatible(&m); BindCompatible(&m);
BindDataset(&m); BindDataset(&m);
BindGenerator(&m); BindGenerator(&m);
#ifndef PADDLE_ON_INFERENCE #ifndef PADDLE_NO_PYTHON
BindDistributed(&m); BindDistributed(&m);
#endif #endif
#ifdef PADDLE_WITH_ASCEND #ifdef PADDLE_WITH_ASCEND
......
...@@ -805,7 +805,7 @@ py_test_modules( ...@@ -805,7 +805,7 @@ py_test_modules(
# it is found that windows CI will run all the training unittests with the ON_INFER option turned on, # it is found that windows CI will run all the training unittests with the ON_INFER option turned on,
# which will not appear in other CIs. The calculation behavior of some ops in inference mode is # which will not appear in other CIs. The calculation behavior of some ops in inference mode is
# inconsistent with that in non-inference mode. # inconsistent with that in non-inference mode.
if(NOT ON_INFER) if(WITH_PYTHON)
py_test_modules(test_parallel_executor_seresnext_base_cpu MODULES py_test_modules(test_parallel_executor_seresnext_base_cpu MODULES
test_parallel_executor_seresnext_base_cpu) test_parallel_executor_seresnext_base_cpu)
py_test_modules(test_parallel_executor_seresnext_with_reduce_cpu MODULES py_test_modules(test_parallel_executor_seresnext_with_reduce_cpu MODULES
......
...@@ -34,7 +34,7 @@ set(TEST_EAGER_OPS ...@@ -34,7 +34,7 @@ set(TEST_EAGER_OPS
list(REMOVE_ITEM TEST_OPS test_lac) list(REMOVE_ITEM TEST_OPS test_lac)
# NOTE(Aurelius84): In case of Windows CI, if open ON_INFER, RWLOCK of Scope # NOTE(Aurelius84): In case of Windows CI, if open ON_INFER, RWLOCK of Scope
# will be removed and will cause some random failed in multi-thread. # will be removed and will cause some random failed in multi-thread.
if(NOT ON_INFER) if(WITH_PYTHON)
py_test_modules(test_lac MODULES test_lac ENVS FLAGS_enable_eager_mode=1) py_test_modules(test_lac MODULES test_lac ENVS FLAGS_enable_eager_mode=1)
set_tests_properties(test_lac PROPERTIES TIMEOUT 120) set_tests_properties(test_lac PROPERTIES TIMEOUT 120)
endif() endif()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册