提交 92b5f49e 编写于 作者: Y Yu Yang

Eager destruct local scope

上级 ba927b88
...@@ -26,6 +26,11 @@ DEFINE_bool(benchmark, false, ...@@ -26,6 +26,11 @@ DEFINE_bool(benchmark, false,
"Default cuda is asynchronous device, set to True will" "Default cuda is asynchronous device, set to True will"
"force op run in synchronous mode."); "force op run in synchronous mode.");
DEFINE_bool(
eager_delete_scope, true,
"Delete local scope eagerly. It will reduce GPU memory usage but "
"slow down the destruction of variables.(around 1% performance harm)");
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -97,7 +102,7 @@ void Scope::DeleteScope(Scope* scope) const { ...@@ -97,7 +102,7 @@ void Scope::DeleteScope(Scope* scope) const {
PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope); PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope);
this->kids_.erase(it); this->kids_.erase(it);
// When making memory benchmark on Fluid, we have to delete scope sync. // When making memory benchmark on Fluid, we have to delete scope sync.
if (FLAGS_benchmark) { if (FLAGS_benchmark || FLAGS_eager_delete_scope) {
delete scope; delete scope;
} else { } else {
Async([scope] { delete scope; }); Async([scope] { delete scope; });
......
...@@ -107,7 +107,8 @@ def __bootstrap__(): ...@@ -107,7 +107,8 @@ def __bootstrap__():
os.environ['OMP_NUM_THREADS'] = str(num_threads) os.environ['OMP_NUM_THREADS'] = str(num_threads)
read_env_flags = [ read_env_flags = [
'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir' 'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir',
'eager_delete_scope'
] ]
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
read_env_flags += ['fraction_of_gpu_memory_to_use'] read_env_flags += ['fraction_of_gpu_memory_to_use']
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册