未验证 提交 cbe64cc1 编写于 作者: J JingZhuangzhuang 提交者: GitHub

Support inference compilation in training package (#46008)

* merge python lib
* Update third_party.cmake
* Update CMakeLists.txt
上级 9718791c
......@@ -249,7 +249,7 @@ option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
option(ON_INFER "Turn on inference optimization and inference-lib generation"
OFF)
ON)
################################ Internal Configurations #######################################
option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF)
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools"
......
......@@ -236,7 +236,7 @@ endif()
if(WIN32
OR APPLE
OR NOT WITH_GPU
OR ON_INFER)
OR (ON_INFER AND NOT WITH_PYTHON))
set(WITH_DGC OFF)
endif()
......
......@@ -21,17 +21,8 @@
#include "paddle/phi/core/utils/rw_lock.h"
// When in inference scenario, the scopes will not be written by two threads in
// a mean time, but a scope may be read by multiple threads concurrently, and
// the mutex will cause serious performance issue.
// So the mutex is disabled when `ON_INFER`.
#ifdef PADDLE_ON_INFERENCE
#define SCOPE_VARS_READER_LOCK
#define SCOPE_VARS_WRITER_LOCK
#else
#define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_);
#define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_);
#endif
namespace paddle {
namespace framework {
......
......@@ -24,17 +24,8 @@
#include "paddle/fluid/platform/event.h"
#include "paddle/phi/core/utils/rw_lock.h"
// When in inference scenario, the scopes will not be written by two threads in
// a mean time, but a scope may be read by multiple threads concurrently, and
// the mutex will cause serious performance issue.
// So the mutex is disabled when `ON_INFER`.
#ifdef PADDLE_ON_INFERENCE
#define SCOPE_VARS_READER_LOCK
#define SCOPE_VARS_WRITER_LOCK
#else
#define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_);
#define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_);
#endif
namespace paddle {
namespace framework {
......
......@@ -25,21 +25,10 @@ PADDLE_DEFINE_EXPORTED_bool(
"Delete local scope eagerly. It will reduce GPU memory usage but "
"slow down the destruction of variables.(around 1% performance harm)");
// When in inference scenario, the scopes will not be written by two threads in
// a mean time, but a scope may be read by multiple threads concurrently, and
// the mutex will cause serious performance issue.
// So the mutex is disabled when `ON_INFER`.
#ifdef PADDLE_ON_INFERENCE
#define SCOPE_KIDS_READER_LOCK
#define SCOPE_KIDS_WRITER_LOCK
#define SCOPE_VARS_READER_LOCK
#define SCOPE_VARS_WRITER_LOCK
#else
#define SCOPE_KIDS_READER_LOCK phi::AutoRDLock auto_lock(&kids_lock_);
#define SCOPE_KIDS_WRITER_LOCK phi::AutoWRLock auto_lock(&kids_lock_);
#define SCOPE_VARS_READER_LOCK phi::AutoRDLock auto_lock(&vars_lock_);
#define SCOPE_VARS_WRITER_LOCK phi::AutoWRLock auto_lock(&vars_lock_);
#endif
namespace paddle {
namespace framework {
......
......@@ -179,12 +179,9 @@ class Scope : public ScopeBase {
DISABLE_COPY_AND_ASSIGN(Scope);
#ifndef PADDLE_ON_INFERENCE
private:
mutable phi::RWLock kids_lock_;
mutable phi::RWLock vars_lock_;
#endif
};
// Generate some debug string about the inherience structure of scope, quite
......
......@@ -388,11 +388,7 @@ PADDLE_DEFINE_EXPORTED_int32(
* enable garbage collection strategy when training large networks.
*/
// Disable gc by default when inference library is built
#ifdef PADDLE_ON_INFERENCE
static const double kDefaultEagerDeleteTensorGB = -1;
#else
static const double kDefaultEagerDeleteTensorGB = 0;
#endif
PADDLE_DEFINE_EXPORTED_double(
eager_delete_tensor_gb,
......@@ -663,11 +659,7 @@ PADDLE_DEFINE_EXPORTED_bool(use_mkldnn, false, "Use MKLDNN to run");
* If FLAGS_call_stack_level == 2, the python stack, c++ stack, and error
* message summary will be shown.
*/
#ifdef PADDLE_ON_INFERENCE
static const int32_t kDefaultCallStackLevel = 2;
#else
static const int32_t kDefaultCallStackLevel = 1;
#endif
PADDLE_DEFINE_EXPORTED_int32(
call_stack_level,
......
......@@ -140,7 +140,7 @@ if(WITH_CUSTOM_DEVICE)
set(PYBIND_DEPS ${PYBIND_DEPS} custom_device_common_op_registry)
endif()
if(NOT ON_INFER)
if(WITH_PYTHON)
set(PYBIND_DEPS ${PYBIND_DEPS} processgroup eager_reducer)
if(WITH_NCCL OR WITH_RCCL)
set(PYBIND_DEPS ${PYBIND_DEPS} processgroup_nccl)
......
......@@ -2547,7 +2547,7 @@ All parameter, weight, gradient are variables in Paddle.
BindCompatible(&m);
BindDataset(&m);
BindGenerator(&m);
#ifndef PADDLE_ON_INFERENCE
#ifndef PADDLE_NO_PYTHON
BindDistributed(&m);
#endif
#ifdef PADDLE_WITH_ASCEND
......
......@@ -799,7 +799,7 @@ py_test_modules(
# it is found that windows CI will run all the training unittests with the ON_INFER option turned on,
# which will not appear in other CIs. The calculation behavior of some ops in inference mode is
# inconsistent with that in non-inference mode.
if(NOT ON_INFER)
if(WITH_PYTHON)
py_test_modules(test_parallel_executor_seresnext_base_cpu MODULES
test_parallel_executor_seresnext_base_cpu)
py_test_modules(test_parallel_executor_seresnext_with_reduce_cpu MODULES
......
......@@ -34,7 +34,7 @@ set(TEST_EAGER_OPS
list(REMOVE_ITEM TEST_OPS test_lac)
# NOTE(Aurelius84): In case of Windows CI, if open ON_INFER, RWLOCK of Scope
# will be removed and will cause some random failed in multi-thread.
if(NOT ON_INFER)
if(WITH_PYTHON)
py_test_modules(test_lac MODULES test_lac ENVS FLAGS_enable_eager_mode=1)
set_tests_properties(test_lac PROPERTIES TIMEOUT 120)
endif()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册