From cb9c59bdebd2dbdf7f7cf22781548734deb5e383 Mon Sep 17 00:00:00 2001 From: liuwei1031 <46661762+liuwei1031@users.noreply.github.com> Date: Wed, 10 Apr 2019 10:33:34 +0800 Subject: [PATCH] cherry-pick PR 16547,16736,16739 test=release/1.4 (#16748) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix the bug of reusing different types of variables in memory_optimiz… (#16547) * fix the bug of reusing different types of variables in memory_optimize_pass, test=develop * disable SELECTED_ROWS AND LOD_TENSOR_ARRAY reusage, test=develop * only use the latest version variable for inplace strategy (#16736) * bug-fix, test=develop * tweak code, test=develop * cherry-pick PR 16547,16736,16739 test=release/1.4 --- .../framework/details/inplace_op_pass.cc | 9 ++++ .../details/memory_optimize_helper.cc | 43 ++++++++----------- .../details/memory_optimize_helper.h | 7 +-- 3 files changed, 30 insertions(+), 29 deletions(-) diff --git a/paddle/fluid/framework/details/inplace_op_pass.cc b/paddle/fluid/framework/details/inplace_op_pass.cc index 79150f719e3..84c9e4a379a 100644 --- a/paddle/fluid/framework/details/inplace_op_pass.cc +++ b/paddle/fluid/framework/details/inplace_op_pass.cc @@ -305,6 +305,12 @@ void InplacePass::TryInplaceOpInputOutput(ir::Node* op, VLOG(4) << "Try to inplace " << in_var_name << " with " << out_var_name; + if (var_nodes_[in_var_name].back() != in_node) { + VLOG(4) << "SKIP since " << in_var_name + << " is also used as output by other ops"; + continue; + } + bool can_replace = true; if (in_var_name == out_var_name) { can_replace = false; @@ -527,6 +533,9 @@ void GraphView::Build(ir::Graph* g) { }; for (auto& node : g->Nodes()) { if (!node->IsOp()) continue; + // avoid optimize the variable used in sub-blocks + if (OpHasSubBlock(node->Op())) update_skip_set(node); + if (node->Name() == "send") update_skip_set(node); if (node->Name() == "recv") update_skip_set(node); if (node->Name() == "prefetch") update_skip_set(node); diff --git a/paddle/fluid/framework/details/memory_optimize_helper.cc b/paddle/fluid/framework/details/memory_optimize_helper.cc index 894d7dad2e6..1af57dc4087 100644 --- a/paddle/fluid/framework/details/memory_optimize_helper.cc +++ b/paddle/fluid/framework/details/memory_optimize_helper.cc @@ -131,16 +131,7 @@ size_t NodeSize(const VarDesc& node) { return type_size * std::abs(size); } -size_t NodeSize(ir::Node* n) { - VarDesc* desc = nullptr; - // some op do not have block pointer - if (n->inputs[0]->Op() != nullptr) { - desc = FindVarDescInBlock(n); - } else { - desc = n->Var(); - } - return NodeSize(*desc); -} +size_t NodeSize(ir::Node* n) { return NodeSize(*(n->Var())); } std::string DebugStringImpl(VarDesc* var) { std::stringstream ss; @@ -163,24 +154,22 @@ std::string DebugStringImpl(VarDesc* var) { } std::string DebugString(ir::Node* var) { - return DebugStringImpl(FindVarDescInBlock(var)); + return DebugStringImpl(GetVarDesc(var)); } // NOTE(dzh): based ir node, if a large node has been reused // by a small size node, then next time it appear in pool, it will // have the small size. Find the original node shap from blockdesc. -VarDesc* FindVarDescInBlock(ir::Node* n) { +VarDesc* GetVarDesc(ir::Node* n) { PADDLE_ENFORCE(n->IsVar() && !n->IsCtrlVar() && n->inputs.size() == 1); - BlockDesc* block = n->inputs[0]->Op()->Block(); - PADDLE_ENFORCE(block->HasVar(n->Name()), - string::Sprintf("Block do not has var %s", n->Name())); - return block->FindVar(n->Name()); + return n->Var(); } struct NodeComparator { bool operator()(ir::Node* lhs, ir::Node* rhs) const { - auto* lhs_desc = FindVarDescInBlock(lhs); - auto* rhs_desc = FindVarDescInBlock(rhs); + if (lhs->Var()->GetType() != rhs->Var()->GetType()) return false; + auto* lhs_desc = GetVarDesc(lhs); + auto* rhs_desc = GetVarDesc(rhs); // match data type if (lhs_desc->GetDataType() != rhs_desc->GetDataType()) { return false; @@ -204,7 +193,7 @@ void OrderedSet::Insert(ir::Node* var) { return; } - auto* var_desc = FindVarDescInBlock(var); + auto* var_desc = var->Var(); auto var_shape = var_desc->GetShape(); int batch_size = static_cast(var_shape[0]); @@ -212,7 +201,7 @@ void OrderedSet::Insert(ir::Node* var) { Iter it = nodes_.begin(); while (it != nodes_.end()) { auto& prev = it->front(); - auto* cache_desc = FindVarDescInBlock(prev); + auto* cache_desc = GetVarDesc(prev); int cache_batch_size = cache_desc->GetShape()[0]; if ((cache_batch_size == -1 && batch_size == -1) || (cache_batch_size != -1 && batch_size != -1)) { @@ -336,10 +325,16 @@ int MinChunkSize() { bool NodeCanReused(const VarDesc& node) { auto type = node.GetType(); // only these types holds bulk of gpu memory - if (!(type == proto::VarType::LOD_TENSOR || - type == proto::VarType::LOD_TENSOR_ARRAY)) { - return false; - } + // FIXME(liuwei1031) did not find good ways to test SELECTED_ROWS and + // LOD_TENSOR_ARRAY re-use logic, + // disable them in version 1.4 + // if (!(type == proto::VarType::LOD_TENSOR || + // type == proto::VarType::SELECTED_ROWS || + // type == proto::VarType::LOD_TENSOR_ARRAY)) { + // return false; + // } + if (type != proto::VarType::LOD_TENSOR) return false; + // persistable variable is parameter if (node.Persistable()) { return false; diff --git a/paddle/fluid/framework/details/memory_optimize_helper.h b/paddle/fluid/framework/details/memory_optimize_helper.h index b5348cc66ea..65c7017d2d4 100644 --- a/paddle/fluid/framework/details/memory_optimize_helper.h +++ b/paddle/fluid/framework/details/memory_optimize_helper.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include "paddle/fluid/framework/data_type.h" @@ -140,11 +141,7 @@ size_t NodeSize(const VarDesc&); std::string DebugString(ir::Node* var); -// NOTE(dzhwinter) -// after node reuse, the replaced node shape is -// different with its VarDesc. So need to find the -// correct VarDesc in Block. -VarDesc* FindVarDescInBlock(ir::Node* n); +VarDesc* GetVarDesc(ir::Node* n); static inline bool IsSameDesc(OpDesc* op1, OpDesc* op2) { return op1->Type() == op2->Type() && op1->Inputs() == op2->Inputs() && -- GitLab