提交 612e1a31 编写于 作者: S sneaxiy

modification

上级 d0b2453e
...@@ -23,8 +23,6 @@ ...@@ -23,8 +23,6 @@
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/framework/details/reference_count_op_handle.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
......
...@@ -89,11 +89,6 @@ class OpHandleBase { ...@@ -89,11 +89,6 @@ class OpHandleBase {
ir::Node *Node() { return node_; } ir::Node *Node() { return node_; }
const std::map<platform::Place, platform::DeviceContext *>
&GetDeviceContexts() const {
return dev_ctxes_;
}
protected: protected:
void RunAndRecordEvent(const std::function<void()> &callback); void RunAndRecordEvent(const std::function<void()> &callback);
......
...@@ -69,15 +69,15 @@ class ReferenceCountOpHandle : public OpHandleBase { ...@@ -69,15 +69,15 @@ class ReferenceCountOpHandle : public OpHandleBase {
std::string Name() const override { return "reference_count"; } std::string Name() const override { return "reference_count"; }
// protected: protected:
void RunImpl() override { void RunImpl() override {
auto *exec_scope_ = scope_->FindVar(kLocalExecScopeName)->Get<Scope *>(); auto *exec_scope = scope_->FindVar(kLocalExecScopeName)->Get<Scope *>();
std::vector<LoDTensor *> tensors; std::vector<LoDTensor *> tensors;
for (auto &name : var_names_) { for (auto &name : var_names_) {
auto it = ref_cnts_->find(name); auto it = ref_cnts_->find(name);
if (it == ref_cnts_->end()) continue; if (it == ref_cnts_->end()) continue;
auto *var = exec_scope_->FindVar(name); auto *var = exec_scope->FindVar(name);
if (var == nullptr || !var->IsType<LoDTensor>()) continue; if (var == nullptr || !var->IsType<LoDTensor>()) continue;
if (it->second.fetch_sub(1) <= 1) { if (it->second.fetch_sub(1) <= 1) {
...@@ -91,8 +91,8 @@ class ReferenceCountOpHandle : public OpHandleBase { ...@@ -91,8 +91,8 @@ class ReferenceCountOpHandle : public OpHandleBase {
} }
private: private:
void ClearTensors(const std::vector<LoDTensor *> &tensors) const { void ClearTensors(const std::vector<LoDTensor *> &tensors) {
auto *gc = dynamic_cast<const StreamGarbageCollector<Tensor> *>(gc_); auto *gc = dynamic_cast<StreamGarbageCollector<Tensor> *>(gc_);
if (gc != nullptr) { if (gc != nullptr) {
auto compute_stream = dev_ctx_->stream(); auto compute_stream = dev_ctx_->stream();
auto callback_stream = gc->stream(); auto callback_stream = gc->stream();
......
...@@ -128,12 +128,10 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl( ...@@ -128,12 +128,10 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
std::vector<std::unique_ptr<OpHandleBase>> new_all_ops; std::vector<std::unique_ptr<OpHandleBase>> new_all_ops;
new_all_ops.reserve(compute_ref_cnt_map.size() + all_ops.size()); new_all_ops.reserve(compute_ref_cnt_map.size() + all_ops.size());
for (auto &op : all_ops) { for (auto &op : all_ops) {
auto it = compute_ref_cnt_map.find(op.get()); new_all_ops.emplace_back(std::move(op));
auto it = compute_ref_cnt_map.find(new_all_ops.back().get());
if (it != compute_ref_cnt_map.end()) { if (it != compute_ref_cnt_map.end()) {
new_all_ops.emplace_back(std::move(op)); new_all_ops.emplace_back(it->second);
new_all_ops.emplace_back(std::unique_ptr<OpHandleBase>(it->second));
} else {
new_all_ops.emplace_back(std::move(op));
} }
} }
......
...@@ -37,9 +37,11 @@ int kProgramId = -1; ...@@ -37,9 +37,11 @@ int kProgramId = -1;
ExecutorPrepareContext::ExecutorPrepareContext( ExecutorPrepareContext::ExecutorPrepareContext(
const framework::ProgramDesc& prog, size_t block_id) const framework::ProgramDesc& prog, size_t block_id)
: prog_(prog), : prog_(prog), block_id_(block_id) {
block_id_(block_id), if (GetEagerDeletionThreshold() >= 0) {
ref_cnts_(GetNonPersistableReferenceCount<int>(prog, block_id)) {} ref_cnts_ = GetNonPersistableReferenceCount<int>(prog_, block_id_);
}
}
ExecutorPrepareContext::~ExecutorPrepareContext() { ExecutorPrepareContext::~ExecutorPrepareContext() {
VLOG(5) << "destroy ExecutorPrepareContext"; VLOG(5) << "destroy ExecutorPrepareContext";
...@@ -331,8 +333,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, ...@@ -331,8 +333,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
CreateVariables(ctx->prog_, local_scope, ctx->block_id_); CreateVariables(ctx->prog_, local_scope, ctx->block_id_);
} }
std::shared_ptr<std::vector<framework::LoDTensor*>> erase_tensors(
new std::vector<framework::LoDTensor*>());
int64_t max_memory_size = GetEagerDeletionThreshold(); int64_t max_memory_size = GetEagerDeletionThreshold();
std::unique_ptr<GarbageCollector<Tensor>> gc; std::unique_ptr<GarbageCollector<Tensor>> gc;
...@@ -353,7 +353,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, ...@@ -353,7 +353,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
for (auto& op : ctx->ops_) { for (auto& op : ctx->ops_) {
op->Run(*local_scope, place_); op->Run(*local_scope, place_);
#ifdef PADDLE_WITH_CUDA
if (gc != nullptr) { if (gc != nullptr) {
std::vector<std::string> erase_vars; std::vector<std::string> erase_vars;
for (auto& input : op->Inputs()) { for (auto& input : op->Inputs()) {
...@@ -395,7 +394,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, ...@@ -395,7 +394,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
if (!erase_tensors.empty()) gc->Add(erase_tensors); if (!erase_tensors.empty()) gc->Add(erase_tensors);
} }
} }
#endif
if (FLAGS_benchmark) { if (FLAGS_benchmark) {
VLOG(2) << "Memory used after operator " + op->Type() + " running: " VLOG(2) << "Memory used after operator " + op->Type() + " running: "
...@@ -403,10 +401,11 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, ...@@ -403,10 +401,11 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
} }
} }
if (gc != nullptr) if (gc != nullptr) {
gc->Wait(); gc->Wait();
else } else {
platform::DeviceContextPool::Instance().Get(place_)->Wait(); platform::DeviceContextPool::Instance().Get(place_)->Wait();
}
if (local_scope != scope) { if (local_scope != scope) {
scope->DeleteScope(local_scope); scope->DeleteScope(local_scope);
......
...@@ -28,8 +28,6 @@ namespace paddle { ...@@ -28,8 +28,6 @@ namespace paddle {
namespace framework { namespace framework {
extern void InitializeVariable(Variable* var, proto::VarType::Type var_type); extern void InitializeVariable(Variable* var, proto::VarType::Type var_type);
int64_t GetEagerDeletionThreshold();
template <typename T> template <typename T>
std::unordered_map<std::string, T> GetNonPersistableReferenceCount( std::unordered_map<std::string, T> GetNonPersistableReferenceCount(
const ProgramDesc& prog, size_t block_id) { const ProgramDesc& prog, size_t block_id) {
......
...@@ -22,7 +22,6 @@ limitations under the License. */ ...@@ -22,7 +22,6 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/graph_viz_pass.h" #include "paddle/fluid/framework/ir/graph_viz_pass.h"
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/framework/details/reference_count_pass.h"
#include "paddle/fluid/platform/nccl_helper.h" #include "paddle/fluid/platform/nccl_helper.h"
#endif #endif
......
...@@ -29,6 +29,10 @@ limitations under the License. */ ...@@ -29,6 +29,10 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/framework/details/reference_count_pass.h"
#endif
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -32,7 +32,7 @@ DEFINE_bool( ...@@ -32,7 +32,7 @@ DEFINE_bool(
"slow down the destruction of variables.(around 1% performance harm)"); "slow down the destruction of variables.(around 1% performance harm)");
DEFINE_double( DEFINE_double(
eager_delete_tensor_GB, -1.0, eager_delete_tensor_gb, -1.0,
"Memory size threshold (GB) when the garbage collector clear tensors." "Memory size threshold (GB) when the garbage collector clear tensors."
"Disabled when this value is less than 0"); "Disabled when this value is less than 0");
...@@ -40,9 +40,9 @@ namespace paddle { ...@@ -40,9 +40,9 @@ namespace paddle {
namespace framework { namespace framework {
int64_t GetEagerDeletionThreshold() { int64_t GetEagerDeletionThreshold() {
return FLAGS_eager_delete_tensor_GB < 0 return FLAGS_eager_delete_tensor_gb < 0
? -1 ? -1
: static_cast<int64_t>(FLAGS_eager_delete_tensor_GB * : static_cast<int64_t>(FLAGS_eager_delete_tensor_gb *
(static_cast<int64_t>(1) << 30)); (static_cast<int64_t>(1) << 30));
} }
......
...@@ -36,8 +36,6 @@ limitations under the License. */ ...@@ -36,8 +36,6 @@ limitations under the License. */
#endif #endif
#include "unsupported/Eigen/CXX11/Tensor" #include "unsupported/Eigen/CXX11/Tensor"
DECLARE_bool(clear_gpu_memory_when_unused);
namespace paddle { namespace paddle {
namespace platform { namespace platform {
......
...@@ -122,7 +122,7 @@ def __bootstrap__(): ...@@ -122,7 +122,7 @@ def __bootstrap__():
'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir', 'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir',
'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb', 'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb',
'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads', 'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads',
"dist_threadpool_size", 'cpu_deterministic', 'eager_delete_tensor_GB' "dist_threadpool_size", 'cpu_deterministic', 'eager_delete_tensor_gb'
] ]
if core.is_compiled_with_dist(): if core.is_compiled_with_dist():
read_env_flags.append('rpc_deadline') read_env_flags.append('rpc_deadline')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册