diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index b9a235bafac0a83c4c1fd47861745d4cf1de7b3d..1e54df3c73cda1de10fae7a88db1023a67c04651 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -284,6 +284,28 @@ void InterpreterCore::BuildAndCacheInstructionCtx(Instruction* instr_node) { } void InterpreterCore::BuildInplace() { + // NOTE(Ruibiao): coalesce_tensor_op outputs a FusedOutput Tensor and a list + // of Output Tensors which are sliced from the FusedOutput. These outputs + // sholud not be the outvar of the in-place var-pair since memory reuse + // between FusedOutput and Output Tensors is assumed. For the following + // example: + // fused_var, var1, var2, var3 = coalesce_tensor(var1, var2, var3) + // var1 = sum(var4, var5) + // ... + // + // After running coalesce_tensor_op, var1 is assumed to share the buffer + // slices from fused_var. However, if sum_op is in-place, then var1 would + // re-share the buffer with var4 instead of fused_var. + std::set skip_inplace_outvars; + for (Instruction& instr : vec_instruction_) { + OperatorBase* op = instr.OpBase(); + if (op->Type() == "coalesce_tensor") { + const std::vector& outputs = + op->OutputVars(/*has_intermediate=*/false); + skip_inplace_outvars.insert(outputs.begin(), outputs.end()); + } + } + for (size_t i = 0; i < vec_instruction_.size(); ++i) { auto& instr = vec_instruction_[i]; auto* op_base = instr.OpBase(); @@ -309,17 +331,20 @@ void InterpreterCore::BuildInplace() { if (BuildInplaceCheckVarIsOnlyInput(iter->second[0])) { auto iterout = outputs.find(pair.second); if (iterout != outputs.end() && !iterout->second.empty()) { - auto invar = - local_scope_->FindVar(var_scope_.GetNameById(iter->second[0])); - auto outvar = local_scope_->FindVar( - var_scope_.GetNameById(iterout->second[0])); + const std::string& invar_name = + var_scope_.GetNameById(iter->second[0]); + const std::string& outvar_name = + var_scope_.GetNameById(iterout->second[0]); + auto invar = local_scope_->FindVar(invar_name); + auto outvar = local_scope_->FindVar(outvar_name); + if (invar && outvar && invar->IsType() && - outvar->IsType()) { + outvar->IsType() && + skip_inplace_outvars.find(outvar_name) == + skip_inplace_outvars.end()) { instr.AddInplace(invar, outvar); - VLOG(3) << "inplace " << vec_instruction_[i].OpBase()->Type() - << " " << var_scope_.GetNameById(iter->second[0]) - << " -> " << var_scope_.GetNameById(iterout->second[0]) - << std::endl; + VLOG(3) << "inplace " << op_base->Type() << " " << invar_name + << " -> " << outvar_name; } } } diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 06fa8eedef4f865654fe792c4cf1c7ea49dcad33..a26a17d91fd03fdfef4acd0b20227808883f281a 100755 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -1751,3 +1751,7 @@ py_test_modules( set_tests_properties(test_add_reader_dependency_for_interpretercore PROPERTIES TIMEOUT 120) + +py_test_modules( + test_eager_deletion_padding_rnn_for_interpretercore MODULES + test_eager_deletion_padding_rnn ENVS FLAGS_CONVERT_GRAPH_TO_PROGRAM=true)