From bd79ae091d30a627c1bbd6a85525142e2f5decef Mon Sep 17 00:00:00 2001 From: wanghuancoder Date: Wed, 15 Sep 2021 06:29:39 -0500 Subject: [PATCH] add inplace logic into new_executor (#35618) * add inplace logic into new_executor, test=develop * check shape and add inplace FLAGS, test=develop * refine, test=develop * refine, test=develop --- .../details/share_tensor_buffer_functor.cc | 21 -------- .../details/share_tensor_buffer_functor.h | 21 ++++++++ .../framework/new_executor/interpretercore.cc | 50 +++++++++++++++++++ .../framework/new_executor/interpretercore.h | 2 + .../new_executor/new_executor_defs.h | 2 + python/paddle/fluid/__init__.py | 1 + 6 files changed, 76 insertions(+), 21 deletions(-) diff --git a/paddle/fluid/framework/details/share_tensor_buffer_functor.cc b/paddle/fluid/framework/details/share_tensor_buffer_functor.cc index 0dc83448b17..ccc64a9cdc3 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_functor.cc +++ b/paddle/fluid/framework/details/share_tensor_buffer_functor.cc @@ -35,27 +35,6 @@ namespace paddle { namespace framework { namespace details { -// TODO(zjl): support SelectedRows -static inline const Tensor &GetTensorFromVar(const Variable *var) { - if (var->IsType()) { - return var->Get(); - } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "Variable must be type of LoDTensor, but received %s.", - framework::ToTypeName(var->Type()))); - } -} - -static inline Tensor *GetMutableTensorFromVar(Variable *var) { - if (var->IsType()) { - return var->GetMutable(); - } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "Variable must be type of LoDTensor, but received %s.", - framework::ToTypeName(var->Type()))); - } -} - ShareTensorBufferFunctor::ShareTensorBufferFunctor( Scope *scope, size_t scope_idx, const std::string &op_type, const std::vector &in_var_infos, diff --git a/paddle/fluid/framework/details/share_tensor_buffer_functor.h b/paddle/fluid/framework/details/share_tensor_buffer_functor.h index d464098202a..528b047bccc 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_functor.h +++ b/paddle/fluid/framework/details/share_tensor_buffer_functor.h @@ -39,6 +39,27 @@ namespace paddle { namespace framework { namespace details { +// TODO(zjl): support SelectedRows +static inline const Tensor &GetTensorFromVar(const Variable *var) { + if (var->IsType()) { + return var->Get(); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Variable must be type of LoDTensor, but received %s.", + framework::ToTypeName(var->Type()))); + } +} + +static inline Tensor *GetMutableTensorFromVar(Variable *var) { + if (var->IsType()) { + return var->GetMutable(); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Variable must be type of LoDTensor, but received %s.", + framework::ToTypeName(var->Type()))); + } +} + // NOTE(paddle-dev): ShareTensorBufferFunctor is responsible for // performing memory reuse in run-time. ShareTensorBufferOpHandle // is only a wrapper of ShareTensorBufferFunctor. diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index b468a1e9c5e..2a7d0d05e90 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -17,6 +17,10 @@ #include +#include "paddle/fluid/framework/details/share_tensor_buffer_functor.h" + +DEFINE_bool(new_executor_use_inplace, true, "Use inplace in new executor"); + namespace paddle { namespace framework { @@ -194,6 +198,41 @@ void InterpreterCore::Convert() { gc_event_.emplace_back(vec_instruction_[i].execution_ctx_.get()->GetPlace(), platform::GenerateDeviceEventFlag()); } + + if (FLAGS_new_executor_use_inplace) { + BuildInplace(); + } +} + +void InterpreterCore::BuildInplace() { + for (size_t i = 0; i < vec_instruction_.size(); ++i) { + if (!vec_instruction_[i] + .kernel_func_.operator_base_->Info() + .infer_inplace_) { + continue; + } + + auto in_to_outs = + vec_instruction_[i].kernel_func_.operator_base_->Info().infer_inplace_( + platform::is_gpu_place(vec_instruction_[i].dev_ctx_->GetPlace())); + + for (auto& pair : in_to_outs) { + auto iter = vec_instruction_[i].input_index_.find(pair.first); + if (iter != vec_instruction_[i].input_index_.end()) { + if (input_var2op_info_[iter->second[0]].size() == 1) { + auto iterout = vec_instruction_[i].output_index_.find(pair.second); + if (iterout != vec_instruction_[i].output_index_.end()) { + auto invar = global_scope_->var_list[iter->second[0]]; + auto outvar = global_scope_->var_list[iterout->second[0]]; + if (invar && outvar) { + vec_instruction_[i].vec_inplace_in_to_out_.emplace_back(invar, + outvar); + } + } + } + } + } + } } void InterpreterCore::BuildAndCacheInstructionCtx( @@ -265,6 +304,17 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) { instr_node.kernel_func_.operator_base_) ->InferShape(instr_node.infershape_ctx_.get()); + if (FLAGS_new_executor_use_inplace) { + for (auto& pair : instr_node.vec_inplace_in_to_out_) { + const auto& in = paddle::framework::details::GetTensorFromVar(pair.first); + auto* out = + paddle::framework::details::GetMutableTensorFromVar(pair.second); + if (in.dims() == out->dims()) { + out->ShareBufferWith(in); + } + } + } + instr_node.kernel_func_.compute_func_(*instr_node.execution_ctx_.get()); } diff --git a/paddle/fluid/framework/new_executor/interpretercore.h b/paddle/fluid/framework/new_executor/interpretercore.h index 276f5cb2d90..d1eff9272d6 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.h +++ b/paddle/fluid/framework/new_executor/interpretercore.h @@ -53,6 +53,8 @@ class InterpreterCore { const VariableScope& var_scope, const platform::Place& place); + void BuildInplace(); + void RunInstruction(const Instruction& instr_node); void ExecuteInstructionList(const std::vector& vec_instr, diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.h b/paddle/fluid/framework/new_executor/new_executor_defs.h index 39ac14b9749..ebbe3ed17bc 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.h +++ b/paddle/fluid/framework/new_executor/new_executor_defs.h @@ -522,6 +522,8 @@ struct Instruction { std::vector output_events_; platform::DeviceContext* dev_ctx_; // not owned + + std::vector> vec_inplace_in_to_out_; }; enum class OpFuncType { diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index cffbc294666..30e3f9dec97 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -209,6 +209,7 @@ def __bootstrap__(): 'sort_sum_gradient', 'max_inplace_grad_add', 'apply_pass_to_program', + 'new_executor_use_inplace', ] if 'Darwin' not in sysstr: read_env_flags.append('use_pinned_memory') -- GitLab