diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc index f9866411417ece784aab860c6f707b1a1fcd8528..9091713158c8071d5386f14250e3c546284e7fd0 100644 --- a/paddle/fluid/framework/scope.cc +++ b/paddle/fluid/framework/scope.cc @@ -26,6 +26,11 @@ DEFINE_bool(benchmark, false, "Default cuda is asynchronous device, set to True will" "force op run in synchronous mode."); +DEFINE_bool( + eager_delete_scope, true, + "Delete local scope eagerly. It will reduce GPU memory usage but " + "slow down the destruction of variables.(around 1% performance harm)"); + namespace paddle { namespace framework { @@ -97,7 +102,7 @@ void Scope::DeleteScope(Scope* scope) const { PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope); this->kids_.erase(it); // When making memory benchmark on Fluid, we have to delete scope sync. - if (FLAGS_benchmark) { + if (FLAGS_benchmark || FLAGS_eager_delete_scope) { delete scope; } else { Async([scope] { delete scope; }); diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index e9ca0d45f98bd27692a15060310d4e8cd1e8b181..e2502990d5b78eb0db7bdfd0c8ef9fb6688016df 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -107,7 +107,8 @@ def __bootstrap__(): os.environ['OMP_NUM_THREADS'] = str(num_threads) read_env_flags = [ - 'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir' + 'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir', + 'eager_delete_scope' ] if core.is_compiled_with_cuda(): read_env_flags += ['fraction_of_gpu_memory_to_use']