From 612e1a31554b4cc24eea61ff257014a975b47929 Mon Sep 17 00:00:00 2001 From: sneaxiy Date: Sat, 15 Sep 2018 15:18:10 +0000 Subject: [PATCH] modification --- .../framework/details/computation_op_handle.h | 2 -- paddle/fluid/framework/details/op_handle_base.h | 5 ----- .../details/reference_count_op_handle.h | 10 +++++----- .../framework/details/reference_count_pass.cc | 8 +++----- paddle/fluid/framework/executor.cc | 17 ++++++++--------- paddle/fluid/framework/executor.h | 2 -- paddle/fluid/framework/parallel_executor.cc | 1 - paddle/fluid/framework/parallel_executor.h | 4 ++++ paddle/fluid/framework/scope.cc | 6 +++--- paddle/fluid/platform/device_context.h | 2 -- python/paddle/fluid/__init__.py | 2 +- 11 files changed, 24 insertions(+), 35 deletions(-) diff --git a/paddle/fluid/framework/details/computation_op_handle.h b/paddle/fluid/framework/details/computation_op_handle.h index 9a330749e..e98f1ab14 100644 --- a/paddle/fluid/framework/details/computation_op_handle.h +++ b/paddle/fluid/framework/details/computation_op_handle.h @@ -23,8 +23,6 @@ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/framework/details/reference_count_op_handle.h" - namespace paddle { namespace framework { namespace details { diff --git a/paddle/fluid/framework/details/op_handle_base.h b/paddle/fluid/framework/details/op_handle_base.h index d4e2c4448..9fbefabc8 100644 --- a/paddle/fluid/framework/details/op_handle_base.h +++ b/paddle/fluid/framework/details/op_handle_base.h @@ -89,11 +89,6 @@ class OpHandleBase { ir::Node *Node() { return node_; } - const std::map - &GetDeviceContexts() const { - return dev_ctxes_; - } - protected: void RunAndRecordEvent(const std::function &callback); diff --git a/paddle/fluid/framework/details/reference_count_op_handle.h b/paddle/fluid/framework/details/reference_count_op_handle.h index b76fc646c..71db8d952 100644 --- a/paddle/fluid/framework/details/reference_count_op_handle.h +++ b/paddle/fluid/framework/details/reference_count_op_handle.h @@ -69,15 +69,15 @@ class ReferenceCountOpHandle : public OpHandleBase { std::string Name() const override { return "reference_count"; } - // protected: + protected: void RunImpl() override { - auto *exec_scope_ = scope_->FindVar(kLocalExecScopeName)->Get(); + auto *exec_scope = scope_->FindVar(kLocalExecScopeName)->Get(); std::vector tensors; for (auto &name : var_names_) { auto it = ref_cnts_->find(name); if (it == ref_cnts_->end()) continue; - auto *var = exec_scope_->FindVar(name); + auto *var = exec_scope->FindVar(name); if (var == nullptr || !var->IsType()) continue; if (it->second.fetch_sub(1) <= 1) { @@ -91,8 +91,8 @@ class ReferenceCountOpHandle : public OpHandleBase { } private: - void ClearTensors(const std::vector &tensors) const { - auto *gc = dynamic_cast *>(gc_); + void ClearTensors(const std::vector &tensors) { + auto *gc = dynamic_cast *>(gc_); if (gc != nullptr) { auto compute_stream = dev_ctx_->stream(); auto callback_stream = gc->stream(); diff --git a/paddle/fluid/framework/details/reference_count_pass.cc b/paddle/fluid/framework/details/reference_count_pass.cc index 892e6ea48..344754d5a 100644 --- a/paddle/fluid/framework/details/reference_count_pass.cc +++ b/paddle/fluid/framework/details/reference_count_pass.cc @@ -128,12 +128,10 @@ std::unique_ptr ReferenceCountPass::ApplyImpl( std::vector> new_all_ops; new_all_ops.reserve(compute_ref_cnt_map.size() + all_ops.size()); for (auto &op : all_ops) { - auto it = compute_ref_cnt_map.find(op.get()); + new_all_ops.emplace_back(std::move(op)); + auto it = compute_ref_cnt_map.find(new_all_ops.back().get()); if (it != compute_ref_cnt_map.end()) { - new_all_ops.emplace_back(std::move(op)); - new_all_ops.emplace_back(std::unique_ptr(it->second)); - } else { - new_all_ops.emplace_back(std::move(op)); + new_all_ops.emplace_back(it->second); } } diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index fd58de28a..650d9086d 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -37,9 +37,11 @@ int kProgramId = -1; ExecutorPrepareContext::ExecutorPrepareContext( const framework::ProgramDesc& prog, size_t block_id) - : prog_(prog), - block_id_(block_id), - ref_cnts_(GetNonPersistableReferenceCount(prog, block_id)) {} + : prog_(prog), block_id_(block_id) { + if (GetEagerDeletionThreshold() >= 0) { + ref_cnts_ = GetNonPersistableReferenceCount(prog_, block_id_); + } +} ExecutorPrepareContext::~ExecutorPrepareContext() { VLOG(5) << "destroy ExecutorPrepareContext"; @@ -331,8 +333,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, CreateVariables(ctx->prog_, local_scope, ctx->block_id_); } - std::shared_ptr> erase_tensors( - new std::vector()); int64_t max_memory_size = GetEagerDeletionThreshold(); std::unique_ptr> gc; @@ -353,7 +353,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, for (auto& op : ctx->ops_) { op->Run(*local_scope, place_); -#ifdef PADDLE_WITH_CUDA if (gc != nullptr) { std::vector erase_vars; for (auto& input : op->Inputs()) { @@ -395,7 +394,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, if (!erase_tensors.empty()) gc->Add(erase_tensors); } } -#endif if (FLAGS_benchmark) { VLOG(2) << "Memory used after operator " + op->Type() + " running: " @@ -403,10 +401,11 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, } } - if (gc != nullptr) + if (gc != nullptr) { gc->Wait(); - else + } else { platform::DeviceContextPool::Instance().Get(place_)->Wait(); + } if (local_scope != scope) { scope->DeleteScope(local_scope); diff --git a/paddle/fluid/framework/executor.h b/paddle/fluid/framework/executor.h index 122bafedc..b74626876 100644 --- a/paddle/fluid/framework/executor.h +++ b/paddle/fluid/framework/executor.h @@ -28,8 +28,6 @@ namespace paddle { namespace framework { extern void InitializeVariable(Variable* var, proto::VarType::Type var_type); -int64_t GetEagerDeletionThreshold(); - template std::unordered_map GetNonPersistableReferenceCount( const ProgramDesc& prog, size_t block_id) { diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 880521f29..ae393d66a 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -22,7 +22,6 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/graph_viz_pass.h" #ifdef PADDLE_WITH_CUDA -#include "paddle/fluid/framework/details/reference_count_pass.h" #include "paddle/fluid/platform/nccl_helper.h" #endif diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index a0f66c3f8..88e207845 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -29,6 +29,10 @@ limitations under the License. */ #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/device_context.h" +#ifdef PADDLE_WITH_CUDA +#include "paddle/fluid/framework/details/reference_count_pass.h" +#endif + namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc index ece9a69a9..1a727a2c8 100644 --- a/paddle/fluid/framework/scope.cc +++ b/paddle/fluid/framework/scope.cc @@ -32,7 +32,7 @@ DEFINE_bool( "slow down the destruction of variables.(around 1% performance harm)"); DEFINE_double( - eager_delete_tensor_GB, -1.0, + eager_delete_tensor_gb, -1.0, "Memory size threshold (GB) when the garbage collector clear tensors." "Disabled when this value is less than 0"); @@ -40,9 +40,9 @@ namespace paddle { namespace framework { int64_t GetEagerDeletionThreshold() { - return FLAGS_eager_delete_tensor_GB < 0 + return FLAGS_eager_delete_tensor_gb < 0 ? -1 - : static_cast(FLAGS_eager_delete_tensor_GB * + : static_cast(FLAGS_eager_delete_tensor_gb * (static_cast(1) << 30)); } diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index c3b092b2a..795391951 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -36,8 +36,6 @@ limitations under the License. */ #endif #include "unsupported/Eigen/CXX11/Tensor" -DECLARE_bool(clear_gpu_memory_when_unused); - namespace paddle { namespace platform { diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index e4d7575ca..1ca2ac2dd 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -122,7 +122,7 @@ def __bootstrap__(): 'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir', 'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb', 'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads', - "dist_threadpool_size", 'cpu_deterministic', 'eager_delete_tensor_GB' + "dist_threadpool_size", 'cpu_deterministic', 'eager_delete_tensor_gb' ] if core.is_compiled_with_dist(): read_env_flags.append('rpc_deadline') -- GitLab