From 1fe4513cc144375fdcd12df03e6b47c2d9cfc719 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Fri, 12 Nov 2021 11:38:43 +0800 Subject: [PATCH] Refine new executor (#37074) * split declaration and implementation * remove initdevices * refine VariableMetaInfo * add ut * fix compile --- paddle/fluid/framework/executor_gc_helper.cc | 30 + paddle/fluid/framework/executor_gc_helper.h | 31 +- .../framework/new_executor/CMakeLists.txt | 9 +- .../framework/new_executor/interpretercore.cc | 12 +- .../new_executor/new_executor_defs.cc | 633 ++++++++++++++++++ .../new_executor/new_executor_defs.h | 610 +++-------------- .../new_executor/standalone_executor.cc | 17 +- .../new_executor/standalone_executor.h | 5 +- .../new_executor/standalone_executor_test.cc | 1 + paddle/fluid/framework/scope.cc | 4 +- paddle/fluid/framework/scope.h | 6 +- .../interpreter/test_standalone_executor.py | 18 +- 12 files changed, 806 insertions(+), 570 deletions(-) create mode 100644 paddle/fluid/framework/new_executor/new_executor_defs.cc diff --git a/paddle/fluid/framework/executor_gc_helper.cc b/paddle/fluid/framework/executor_gc_helper.cc index 8c64d65ff4..6e5578a2d1 100644 --- a/paddle/fluid/framework/executor_gc_helper.cc +++ b/paddle/fluid/framework/executor_gc_helper.cc @@ -31,6 +31,36 @@ namespace paddle { namespace framework { +void OpInOutInfo::Build(const OperatorBase *op) { + is_built_ = true; + auto &inferer = op->Info().NoNeedBufferVarsInferer(); + if (inferer) { + no_need_buffer_ins_ = inferer(op->Inputs(), op->Outputs(), op->Attrs()); + + if (no_need_buffer_ins_.empty()) return; + + for (auto &in_name_pair : op->Inputs()) { + if (no_need_buffer_ins_.count(in_name_pair.first) != 0) { + continue; + } + + for (auto &in_arg_name : in_name_pair.second) { + other_args_set_.insert(in_arg_name); + } + } + + for (auto &out_name_pair : op->Outputs()) { + for (auto &out_arg_name : out_name_pair.second) { + other_args_set_.insert(out_arg_name); + } + } + } +} + +bool OpInOutInfo::IsInArgBufferNeeded(const std::string &in_arg_name) const { + return no_need_buffer_ins_.empty() || other_args_set_.count(in_arg_name) != 0; +} + static bool VarCanBeDeleted(const std::string &name, const BlockDesc &block, const std::unordered_set &skip_vars) { if (skip_vars.count(name) != 0) { diff --git a/paddle/fluid/framework/executor_gc_helper.h b/paddle/fluid/framework/executor_gc_helper.h index e354a83e5c..80bd68f7c8 100644 --- a/paddle/fluid/framework/executor_gc_helper.h +++ b/paddle/fluid/framework/executor_gc_helper.h @@ -33,38 +33,11 @@ class Scope; struct OpInOutInfo { public: - void Build(const OperatorBase *op) { - is_built_ = true; - auto &inferer = op->Info().NoNeedBufferVarsInferer(); - if (inferer) { - no_need_buffer_ins_ = inferer(op->Inputs(), op->Outputs(), op->Attrs()); - - if (no_need_buffer_ins_.empty()) return; - - for (auto &in_name_pair : op->Inputs()) { - if (no_need_buffer_ins_.count(in_name_pair.first) != 0) { - continue; - } - - for (auto &in_arg_name : in_name_pair.second) { - other_args_set_.insert(in_arg_name); - } - } - - for (auto &out_name_pair : op->Outputs()) { - for (auto &out_arg_name : out_name_pair.second) { - other_args_set_.insert(out_arg_name); - } - } - } - } + void Build(const OperatorBase *op); bool IsBuilt() const { return is_built_; } - bool IsInArgBufferNeeded(const std::string &in_arg_name) const { - return no_need_buffer_ins_.empty() || - other_args_set_.count(in_arg_name) != 0; - } + bool IsInArgBufferNeeded(const std::string &in_arg_name) const; private: // A set to record unused buffer input vars of op diff --git a/paddle/fluid/framework/new_executor/CMakeLists.txt b/paddle/fluid/framework/new_executor/CMakeLists.txt index d758e98b41..622aeec142 100644 --- a/paddle/fluid/framework/new_executor/CMakeLists.txt +++ b/paddle/fluid/framework/new_executor/CMakeLists.txt @@ -3,10 +3,11 @@ lod_rank_table fs shell fleet_wrapper heter_wrapper ps_gpu_wrapper box_wrapper l graph_to_program_pass variable_helper timer monitor nan_inf_utils) cc_library(workqueue SRCS workqueue.cc workqueue_utils.cc DEPS enforce) -cc_library(interpretercore_garbage_collector SRCS interpretercore_garbage_collector.cc DEPS workqueue ${DEVICE_EVENT_LIBS}) -cc_library(interpretercore_util SRCS interpretercore_util.cc DEPS ${INTERPRETERCORE_DEPS} workqueue) -cc_library(event_manager SRCS event_manager.cc DEPS ${DEVICE_EVENT_LIBS} glog) -cc_library(stream_analyzer SRCS stream_analyzer.cc DEPS ${DEVICE_EVENT_LIBS} glog device_context) +cc_library(new_executor_defs SRCS new_executor_defs.cc DEPS enforce glog scope) +cc_library(interpretercore_garbage_collector SRCS interpretercore_garbage_collector.cc DEPS workqueue ${DEVICE_EVENT_LIBS} executor_gc_helper) +cc_library(interpretercore_util SRCS interpretercore_util.cc DEPS ${INTERPRETERCORE_DEPS} workqueue new_executor_defs) +cc_library(event_manager SRCS event_manager.cc DEPS ${DEVICE_EVENT_LIBS} glog new_executor_defs) +cc_library(stream_analyzer SRCS stream_analyzer.cc DEPS ${DEVICE_EVENT_LIBS} glog device_context new_executor_defs) cc_library(interpretercore SRCS interpretercore.cc DEPS workqueue ${DEVICE_EVENT_LIBS} interpretercore_util interpretercore_garbage_collector stream_analyzer event_manager) cc_library(standalone_executor SRCS standalone_executor.cc DEPS interpretercore) cc_test(workqueue_test SRCS workqueue_test.cc DEPS workqueue) diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index 89810fd303..84b765680f 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -121,6 +121,8 @@ void InterpreterCore::Convert() { for (auto var_id : gc_check_input_list) { vec_meta_info[var_id].var_ref_count_++; instr.AddGCCheckVar(var_id); + VLOG(4) << "clear " << global_scope_->GetNameById(var_id) << " after " + << instr.OpBase()->Type(); } } @@ -131,6 +133,8 @@ void InterpreterCore::Convert() { if (input_var2op_info_.at(id).size() == 0) { // output var not be used by any kernel vec_instruction_[i].AddGCCheckVar(id); + VLOG(4) << "clear " << global_scope_->GetNameById(id) << " after " + << vec_instruction_[i].OpBase()->Type(); vec_meta_info[id].var_ref_count_++; } } @@ -437,6 +441,8 @@ void InterpreterCore::RunInstructionAsync(size_t instr_id) { try { RunInstruction(instr_node); + // GC infomation + CheckGC(instr_node); } catch (platform::EnforceNotMet& ex) { framework::InsertCallStackInfo(op->Type(), op->Attrs(), &ex); exception_holder_.Catch(std::make_exception_ptr(std::move(ex))); @@ -463,9 +469,6 @@ void InterpreterCore::RunInstructionAsync(size_t instr_id) { interpreter::RecordEvent(instr_node, place_); op_run_number_.fetch_add(1, std::memory_order_relaxed); - // GC infomation - CheckGC(instr_node); - RunNextInstructions(instr_node, &ready_ops); } } @@ -476,6 +479,9 @@ void InterpreterCore::CheckGC(const Instruction& instr) { auto& atomic_var_ref = async_work_queue_->AtomicVarRef(); for (auto var_id : instr.GCCheckVars()) { + VLOG(4) << "GC " << global_scope_->GetNameById(var_id) << " " + << var_scope.VarDesc(var_id); + bool is_ready = atomic_var_ref[var_id]->fetch_sub(1, std::memory_order_relaxed) == 1; // ignore all persistable var while GC diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.cc b/paddle/fluid/framework/new_executor/new_executor_defs.cc new file mode 100644 index 0000000000..221ad2dd62 --- /dev/null +++ b/paddle/fluid/framework/new_executor/new_executor_defs.cc @@ -0,0 +1,633 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "paddle/fluid/framework/new_executor/new_executor_defs.h" +#include "paddle/fluid/framework/rw_lock.h" + +// When in inference scenario, the scopes will not be written by two threads in +// a mean time, but a scope may be read by multiple threads concurrently, and +// the mutex will cause serious performance issue. +// So the mutex is disabled when `ON_INFER`. +#ifdef PADDLE_ON_INFERENCE +#define SCOPE_VARS_READER_LOCK +#define SCOPE_VARS_WRITER_LOCK +#else +#define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_); +#define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_); +#endif + +namespace paddle { +namespace framework { + +InterpretercoreInferShapeContext::InterpretercoreInferShapeContext( + const OperatorBase& op, const RuntimeContext& ctx) + : op_(op), ctx_(ctx), can_skip_lod_(false) {} + +bool InterpretercoreInferShapeContext::HasInput(const std::string& name) const { + // has only one input + const auto& ins = ctx_.inputs; + auto it = ins.find(name); + if (it == ins.end()) { + return false; + } + const auto& in = it->second; + if (in.size() == 0) return false; + PADDLE_ENFORCE_EQ( + in.size(), 1UL, + platform::errors::InvalidArgument( + "Input %s should not contain more than one inputs.", name)); + return in[0] != nullptr; +} + +bool InterpretercoreInferShapeContext::HasOutput( + const std::string& name) const { + // has only one output + const auto& outs = ctx_.outputs; + auto it = outs.find(name); + if (it == outs.end()) { + return false; + } + const auto& out = it->second; + if (out.size() == 0) { + return false; + } + PADDLE_ENFORCE_EQ( + out.size(), 1UL, + platform::errors::InvalidArgument( + "Output %s should not contain more than one outputs.", name)); + return out[0] != nullptr; +} + +bool InterpretercoreInferShapeContext::HasInputs( + const std::string& name) const { + const auto& ins = ctx_.inputs; + auto it = ins.find(name); + if (it == ins.end() || it->second.empty()) { + return false; + } + for (auto& input : it->second) { + if (input == nullptr) { + return false; + } + } + return true; +} + +bool InterpretercoreInferShapeContext::HasOutputs( + const std::string& name) const { + const auto& outs = ctx_.outputs; + auto it = outs.find(name); + if (it == outs.end() || it->second.empty()) { + return false; + } + for (auto& output : it->second) { + if (output == nullptr) { + return false; + } + } + return true; +} + +AttrReader InterpretercoreInferShapeContext::Attrs() const { + return AttrReader(op_.Attrs()); +} + +std::vector InterpretercoreInferShapeContext::Inputs( + const std::string& name) const { + return op_.Inputs(name); +} + +std::vector InterpretercoreInferShapeContext::Outputs( + const std::string& name) const { + return op_.Outputs(name); +} + +std::string InterpretercoreInferShapeContext::GetInputNameByIdx( + size_t idx) const { + auto& op_proto = + paddle::framework::OpInfoMap::Instance().Get(op_.Type()).proto_; + PADDLE_ENFORCE_LT(idx, op_proto->inputs().size(), + platform::errors::OutOfRange( + "The index should be less than the size of inputs of " + "operator %s, but got index is %d and size is %d", + op_.Type(), idx, op_proto->inputs().size())); + return op_proto->inputs()[idx].name(); +} + +std::string InterpretercoreInferShapeContext::GetOutputNameByIdx( + size_t idx) const { + auto& op_proto = + paddle::framework::OpInfoMap::Instance().Get(op_.Type()).proto_; + PADDLE_ENFORCE_LT(idx, op_proto->outputs().size(), + platform::errors::OutOfRange( + "The index should be less than the size of outputs of " + "operator %s, but got index is %d and size is %d", + op_.Type(), idx, op_proto->outputs().size())); + return op_proto->outputs()[idx].name(); +} + +void InterpretercoreInferShapeContext::ShareDim(const std::string& in, + const std::string& out, + size_t i, size_t j) { + auto in_it = ctx_.inputs.find(in); + auto out_it = ctx_.outputs.find(out); + PADDLE_ENFORCE_NE(in_it, ctx_.inputs.end(), + platform::errors::NotFound("Input %s does not exist.", in)); + PADDLE_ENFORCE_NE( + out_it, ctx_.outputs.end(), + platform::errors::NotFound("Output %s does not exist.", out)); + PADDLE_ENFORCE_LT(i, in_it->second.size(), + platform::errors::InvalidArgument( + "The index of input dimension is out of range, " + "excepted index less than %zu, but received %zu.", + in_it->second.size(), i)); + PADDLE_ENFORCE_LT(j, out_it->second.size(), + platform::errors::InvalidArgument( + "The index of output dimension is out of range, " + "excepted index less than %zu, but received %zu.", + out_it->second.size(), j)); + + Variable* in_var = in_it->second[i]; + Variable* out_var = out_it->second[j]; + + PADDLE_ENFORCE_EQ( + in_var->Type(), out_var->Type(), + platform::errors::InvalidArgument( + "The type of input (%s) and output (%s) are inconsistent.", in, out)); + + if (in_var->IsType()) { + auto& in_sele_rows = in_var->Get(); + auto out_sele_rows = out_var->GetMutable(); + out_sele_rows->mutable_value()->Resize(in_sele_rows.value().dims()); + out_sele_rows->set_rows(in_sele_rows.rows()); + out_sele_rows->set_height(in_sele_rows.height()); + } else if (in_var->IsType()) { + auto& in_lod_tensor = in_var->Get(); + auto* out_lod_tensor = out_var->GetMutable(); + out_lod_tensor->Resize(in_lod_tensor.dims()); + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Currently, the input type of ShareDim only can be LoDTensor " + "or SelectedRows.")); + } +} + +void InterpretercoreInferShapeContext::ShareAllLoD( + const std::string& in, const std::string& out) const { + auto in_it = ctx_.inputs.find(in); + auto out_it = ctx_.outputs.find(out); + PADDLE_ENFORCE_NE(in_it, ctx_.inputs.end(), + platform::errors::NotFound( + "Input [%s] found error in Op [%s]", in, op_.Type())); + PADDLE_ENFORCE_NE(out_it, ctx_.outputs.end(), + platform::errors::NotFound( + "Output [%s] found error in Op [%s]", out, op_.Type())); + + auto& in_var_list = in_it->second; + auto& out_var_list = out_it->second; + + PADDLE_ENFORCE_EQ( + in_var_list.size(), out_var_list.size(), + platform::errors::PreconditionNotMet( + "Op [%s]: Input var size should be equal with output var size", + op_.Type())); + + auto& out_var_names = op_.Outputs(out); + + for (size_t i = 0; i < in_var_list.size(); ++i) { + if (out_var_names[i] == framework::kEmptyVarName) { + continue; + } + + Variable* in_var = in_var_list[i]; + if (!in_var->IsType()) return; + Variable* out_var = out_var_list[i]; + PADDLE_ENFORCE_EQ(out_var->IsType(), true, + platform::errors::PreconditionNotMet( + "The %d-th output of Output(%s) must be LoDTensor.", + i, out_var_names[i])); + auto& in_tensor = in_var->Get(); + auto* out_tensor = out_var->GetMutable(); + out_tensor->set_lod(in_tensor.lod()); +#ifdef PADDLE_WITH_MKLDNN + if (in_tensor.layout() != DataLayout::kMKLDNN) +#endif + out_tensor->set_layout(in_tensor.layout()); + } +} + +void InterpretercoreInferShapeContext::ShareLoD(const std::string& in, + const std::string& out, + size_t i, size_t j) const { + if (can_skip_lod_) { + return; + } + auto in_it = ctx_.inputs.find(in); + auto out_it = ctx_.outputs.find(out); + PADDLE_ENFORCE_NE(in_it, ctx_.inputs.end(), + platform::errors::NotFound("Input %s does not exist.", in)); + PADDLE_ENFORCE_NE( + out_it, ctx_.outputs.end(), + platform::errors::NotFound("Output %s does not exist.", out)); + PADDLE_ENFORCE_LT(i, in_it->second.size(), + platform::errors::InvalidArgument( + "The index of input dimension is out of range, " + "excepted index less than %zu, but received %zu.", + in_it->second.size(), i)); + PADDLE_ENFORCE_LT(j, out_it->second.size(), + platform::errors::InvalidArgument( + "The index of output dimension is out of range, " + "excepted index less than %zu, but received %zu.", + out_it->second.size(), j)); + + Variable* in_var = in_it->second.at(i); + if (!in_var->IsType()) return; + Variable* out_var = out_it->second.at(j); + PADDLE_ENFORCE_EQ( + out_var->IsType(), true, + platform::errors::InvalidArgument( + "The %zu-th output of Output(%s) must be LoDTensor.", j, out)); + auto& in_tensor = in_var->Get(); + auto* out_tensor = out_var->GetMutable(); + out_tensor->set_lod(in_tensor.lod()); + +// TODO(dzhwinter) : reuse ShareLoD in most operators. +// Need to call ShareLayout explicitly in sequence related ops. +// Shall we have a better method to shared info between in/out Tensor? +#ifdef PADDLE_WITH_MKLDNN + // Fix me: ugly workaround below + // Correct solution: + // set_layout() should NOT be called here (i.e. ShareLoD). Instead, + // layout of output tensor should be set "manually" in Compute() + // of each OPKernel. The reason layout should NOT be shared between + // input and output "automatically" (now by InferShape()->ShareLoD()) + // is that layout transform may occur after InferShape(). + // Workaround: + // Skip set_layout() when input layout is kMKLDNN + // This is to avoid kMKLDNN is populated wrongly into a non-MKLDNN + // OPKernel. In all MKLDNN OPkernel, set_layout(kMKLDNN) should be called + // in Compute() + if (in_tensor.layout() != DataLayout::kMKLDNN) +#endif + out_tensor->set_layout(in_tensor.layout()); +} + +int32_t InterpretercoreInferShapeContext::GetLoDLevel(const std::string& in, + size_t i) const { + PADDLE_THROW(platform::errors::PreconditionNotMet( + "GetLoDLevel is only used in compile time. The calculation of " + "output's actual lod is different among operators so that should be " + "set in the runtime kernel.")); +} + +void InterpretercoreInferShapeContext::SetLoDLevel(const std::string& out, + int32_t lod_level, + size_t j) const { + PADDLE_THROW(platform::errors::PreconditionNotMet( + "SetLoDLevel is only used in compile time. The calculation of " + "output's actual lod is different among operators so that should be " + "set in the runtime kernel.")); +} + +bool InterpretercoreInferShapeContext::IsRuntime() const { return true; } + +// TODO(paddle-dev): Can this be template? +std::vector InterpretercoreInferShapeContext::GetInputVarPtrs( + const std::string& name) { + const std::vector& vars = InputVars(name); + std::vector res; + res.reserve(vars.size()); + res.insert(res.begin(), vars.begin(), vars.end()); + return res; +} + +std::vector +InterpretercoreInferShapeContext::GetOutputVarPtrs(const std::string& name) { + const std::vector& vars = OutputVars(name); + std::vector res; + res.reserve(vars.size()); + res.insert(res.begin(), vars.begin(), vars.end()); + return res; +} + +DDim InterpretercoreInferShapeContext::GetInputDim( + const std::string& name) const { + const std::vector& vars = InputVars(name); + PADDLE_ENFORCE_EQ( + vars.size(), 1UL, + platform::errors::InvalidArgument( + "Input(%s) should hold one element, but now it holds %zu elements.", + name, vars.size())); + return this->GetDim(vars[0]); +} + +std::vector InterpretercoreInferShapeContext::GetInputsDim( + const std::string& name) const { + const std::vector& vars = InputVars(name); + return GetDims(vars); +} + +std::vector +InterpretercoreInferShapeContext::GetInputsVarType( + const std::string& name) const { + return GetVarTypes(InputVars(name)); +} + +std::vector +InterpretercoreInferShapeContext::GetOutputsVarType( + const std::string& name) const { + return GetVarTypes(OutputVars(name)); +} + +void InterpretercoreInferShapeContext::SetOutputDim(const std::string& name, + const DDim& dim) { + auto& vars = OutputVars(name); + PADDLE_ENFORCE_EQ(vars.size(), 1UL, platform::errors::InvalidArgument( + "Output(%s) should hold one element, " + "but now it holds %zu elements.", + name, vars.size())); + SetDim(vars[0], dim); +} + +void InterpretercoreInferShapeContext::SetOutputsDim( + const std::string& name, const std::vector& dims) { + auto& vars = OutputVars(name); + SetDims(vars, dims); +} + +void InterpretercoreInferShapeContext::SetSkipLoD(bool skip) { + can_skip_lod_ = skip; +} + +DDim InterpretercoreInferShapeContext::GetDim(Variable* var) const { + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::InvalidArgument("Input variable is nullptr.")); + if (var->IsType()) { + return var->Get().dims(); + } else if (var->IsType()) { + return var->Get().GetCompleteDims(); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Only LoDTensor or SelectedRows support 'GetDim', but input " + "Variable's type is %s.", + ToTypeName(var->Type()))); + } +} + +std::vector InterpretercoreInferShapeContext::GetDims( + const std::vector& vars) const { + std::vector ret; + ret.reserve(vars.size()); + std::transform(vars.begin(), vars.end(), std::back_inserter(ret), + [this](Variable* var) { return this->GetDim(var); }); + return ret; +} + +std::vector InterpretercoreInferShapeContext::GetRepeatedDims( + const std::string& name) const { + PADDLE_THROW(platform::errors::PreconditionNotMet( + "GetRepeatedDims method only ban be used in compile time.")); +} + +void InterpretercoreInferShapeContext::SetDim(Variable* var, const DDim& dim) { + if (var->IsType()) { + var->GetMutable()->Resize(dim); + } else if (var->IsType()) { + var->GetMutable()->set_height(dim[0]); + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Variable type error, expect LoDTensor or SelectedRows, but received " + "(%s).", + ToTypeName(var->Type()))); + } +} + +void InterpretercoreInferShapeContext::SetDims( + const std::vector& vars, const std::vector& dims) { + size_t length = vars.size(); + PADDLE_ENFORCE_EQ(length, dims.size(), + platform::errors::InvalidArgument( + "The number of input variables do not match the " + "number of input dimensions, the number of variables " + "is %zu, the number of dimensions is %zu.", + length, dims.size())); + for (size_t i = 0; i < length; ++i) { + if (vars[i] == nullptr) { + continue; + } + SetDim(vars[i], dims[i]); + } +} + +void InterpretercoreInferShapeContext::SetRepeatedDims( + const std::string& name, const std::vector& dims) { + PADDLE_THROW(platform::errors::PreconditionNotMet( + "SetRepeatedDims method only can be used in compile time.")); +} + +std::vector InterpretercoreInferShapeContext::GetVarTypes( + const std::vector& vars) const { + std::vector retv; + retv.resize(vars.size()); + std::transform( + vars.begin(), vars.end(), retv.begin(), + std::bind(std::mem_fn(&InterpretercoreInferShapeContext::GetVarType), + this, std::placeholders::_1)); + return retv; +} + +proto::VarType::Type InterpretercoreInferShapeContext::GetVarType( + Variable* var) const { + return ToVarType(var->Type()); +} + +const std::vector& InterpretercoreInferShapeContext::InputVars( + const std::string& name) const { + auto it = ctx_.inputs.find(name); + PADDLE_ENFORCE_NE( + it, ctx_.inputs.end(), + platform::errors::NotFound("Operator (%s) does not have the input (%s).", + op_.Type(), name)); + return it->second; +} + +const std::vector& InterpretercoreInferShapeContext::OutputVars( + const std::string& name) const { + auto it = ctx_.outputs.find(name); + PADDLE_ENFORCE_NE( + it, ctx_.outputs.end(), + platform::errors::NotFound( + "Operator (%s) does not have the outputs (%s).", op_.Type(), name)); + return it->second; +} + +VariableScope::VariableScope(Scope* scope) { + // for @EMPTY@ variable + var_list_.push_back(nullptr); + name2id_[kEmptyVarName] = 0; + vec_meta_info_.emplace_back(0, nullptr); + scope_ = scope; + PADDLE_ENFORCE_NE( + scope, nullptr, + platform::errors::PreconditionNotMet( + "You have passed a nullptr to construct VariableScope.")); + listener_ = std::make_shared(this); + scope->AddListener(listener_); +} + +VariableScope::~VariableScope() { + if (scope_ && listener_) { + scope_->DelListener(listener_); + } +} + +const Scope* VariableScope::GetScope() const { return scope_; } + +Variable* VariableScope::FindVar(const std::string& name) const { + auto it = name2id_.find(name); + if (it != name2id_.end()) { + PADDLE_ENFORCE_LT(it->second, var_list_.size(), + platform::errors::NotFound( + "The id(%d) of variable(%s) should not be larger " + "than the size of variable list(%d).", + it->second, name, var_list_.size())); + return var_list_[it->second]; + } + return nullptr; +} + +// Get variable id by name, return -1 if not found +int VariableScope::GetIdByName(const std::string& name) const { + auto it = name2id_.find(name); + if (it != name2id_.end()) { + return it->second; + } + return -1; +} + +// Get variable name by id, return "" if not found +std::string VariableScope::GetNameById(int id) const { + // NOTE(zhiqiu): do not use vec_meta_info_[id].vardesc_->Name() since + // vec_meta_info_[id] may be nullptr, + // typically when the target variable is not existed in the original program + // desc, but created by interpretercore. + // For example, created and used by d2h_copy or h2d_copy operator. + auto it = std::find_if(name2id_.begin(), name2id_.end(), + [id](const auto& pair) { return pair.second == id; }); + if (it != name2id_.end()) { + return it->first; + } + return ""; +} + +bool VariableScope::HasVar(const std::string& name) const { + return name2id_.find(name) != name2id_.end(); +} + +int VariableScope::VarId(const std::string& name) const { + CheckExist(name); + return name2id_.at(name); +} + +Variable* VariableScope::Var(int id) const { return var_list_.at(id); } + +Variable* VariableScope::Var(const std::string& name) const { + return var_list_.at(VarId(name)); +} + +size_t VariableScope::VarSize() const { return var_list_.size(); } + +void VariableScope::AddVar(const std::string& name, + framework::VarDesc* var_desc) { // NOLINT + auto v = scope_->Var(name); + if (nullptr == var_desc) { + v->GetMutable(); + } else { + InitializeVariable( + v, + var_desc + ->GetType()); // Scope don't initialize variable recently created + } + SetVarDesc(name, var_desc); +} + +void VariableScope::AddVar(const std::string& name, + const Variable& var) { // NOLINT + // Though name existed in outer_scope_, we need + // add again to create name2id map. + scope_->Var(name); +} + +void VariableScope::SetVarDesc(const std::string& name, + framework::VarDesc* var_desc) { + CheckExist(name); + vec_meta_info_[VarId(name)].var_desc_ = var_desc; +} + +paddle::framework::VarDesc* VariableScope::VarDesc( + const std::string& name) const { + return VarDesc(VarId(name)); +} + +paddle::framework::VarDesc* VariableScope::VarDesc(int id) const { + CheckExist(id); + return vec_meta_info_[id].var_desc_; +} + +void VariableScope::CheckExist(int id) const { + PADDLE_ENFORCE_LT(id, var_list_.size(), + platform::errors::PreconditionNotMet( + "Required var_id < %d, but received var_id = %d.", + var_list_.size(), id)); +} + +void VariableScope::CheckExist(const std::string& name) const { + PADDLE_ENFORCE_EQ(HasVar(name), true, platform::errors::NotFound( + "%s not in VariableScope.", name)); +} + +VariableScopeListener::VariableScopeListener(VariableScope* var_scope) { + var_scope_ = var_scope; +} + +void VariableScopeListener::onCreateVariable(const std::string& name) { + auto v = var_scope_->scope_->GetVar(name); // must exsit in outer_scope_ + if (!var_scope_->HasVar(name)) { // may exist in variable scope. + VLOG(4) << "Calling VariableScope::onCreateVariable with var_name: " + << name; + var_scope_->name2id_[name] = var_scope_->VarSize(); + var_scope_->var_list_.emplace_back(v); + var_scope_->vec_meta_info_.emplace_back(0, nullptr); + } +} + +void VariableScopeListener::onDeleteVariable(const std::string& name) { + if (var_scope_->HasVar(name)) { + VLOG(4) << "Calling VariableScope::onDeleteVariable with var_name: " + << name; + } +} +void VariableScopeListener::onRenameVariable(const std::string& old_name, + const std::string& new_name) {} +void VariableScopeListener::onCreateScope(Scope* Scope) {} +void VariableScopeListener::onDeleteScope(Scope* Scope) {} +void VariableScopeListener::onClear() {} + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.h b/paddle/fluid/framework/new_executor/new_executor_defs.h index c765b7fe4d..4206f2733a 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.h +++ b/paddle/fluid/framework/new_executor/new_executor_defs.h @@ -19,10 +19,23 @@ #include #include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/rw_lock.h" #include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/platform/device_event_base.h" #include "paddle/fluid/platform/event.h" +// When in inference scenario, the scopes will not be written by two threads in +// a mean time, but a scope may be read by multiple threads concurrently, and +// the mutex will cause serious performance issue. +// So the mutex is disabled when `ON_INFER`. +#ifdef PADDLE_ON_INFERENCE +#define SCOPE_VARS_READER_LOCK +#define SCOPE_VARS_WRITER_LOCK +#else +#define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_); +#define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_); +#endif + namespace paddle { namespace framework { @@ -33,429 +46,90 @@ using OpKernelMap = class InterpretercoreInferShapeContext : public InferShapeContext { public: InterpretercoreInferShapeContext(const OperatorBase& op, - const RuntimeContext& ctx) - : op_(op), ctx_(ctx), can_skip_lod_(false) {} - - bool HasInput(const std::string& name) const override { - // has only one input - const auto& ins = ctx_.inputs; - auto it = ins.find(name); - if (it == ins.end()) { - return false; - } - const auto& in = it->second; - if (in.size() == 0) return false; - PADDLE_ENFORCE_EQ( - in.size(), 1UL, - platform::errors::InvalidArgument( - "Input %s should not contain more than one inputs.", name)); - return in[0] != nullptr; - } + const RuntimeContext& ctx); - bool HasOutput(const std::string& name) const override { - // has only one output - const auto& outs = ctx_.outputs; - auto it = outs.find(name); - if (it == outs.end()) { - return false; - } - const auto& out = it->second; - if (out.size() == 0) { - return false; - } - PADDLE_ENFORCE_EQ( - out.size(), 1UL, - platform::errors::InvalidArgument( - "Output %s should not contain more than one outputs.", name)); - return out[0] != nullptr; - } + bool HasInput(const std::string& name) const override; - bool HasInputs(const std::string& name) const override { - const auto& ins = ctx_.inputs; - auto it = ins.find(name); - if (it == ins.end() || it->second.empty()) { - return false; - } - for (auto& input : it->second) { - if (input == nullptr) { - return false; - } - } - return true; - } + bool HasOutput(const std::string& name) const override; - bool HasOutputs(const std::string& name) const override { - const auto& outs = ctx_.outputs; - auto it = outs.find(name); - if (it == outs.end() || it->second.empty()) { - return false; - } - for (auto& output : it->second) { - if (output == nullptr) { - return false; - } - } - return true; - } + bool HasInputs(const std::string& name) const override; - AttrReader Attrs() const override { return AttrReader(op_.Attrs()); } + bool HasOutputs(const std::string& name) const override; - std::vector Inputs(const std::string& name) const override { - return op_.Inputs(name); - } + AttrReader Attrs() const override; - std::vector Outputs(const std::string& name) const override { - return op_.Outputs(name); - } + std::vector Inputs(const std::string& name) const override; - std::string GetInputNameByIdx(size_t idx) const override { - auto& op_proto = - paddle::framework::OpInfoMap::Instance().Get(op_.Type()).proto_; - PADDLE_ENFORCE_LT(idx, op_proto->inputs().size(), - platform::errors::OutOfRange( - "The index should be less than the size of inputs of " - "operator %s, but got index is %d and size is %d", - op_.Type(), idx, op_proto->inputs().size())); - return op_proto->inputs()[idx].name(); - } + std::vector Outputs(const std::string& name) const override; - std::string GetOutputNameByIdx(size_t idx) const override { - auto& op_proto = - paddle::framework::OpInfoMap::Instance().Get(op_.Type()).proto_; - PADDLE_ENFORCE_LT( - idx, op_proto->outputs().size(), - platform::errors::OutOfRange( - "The index should be less than the size of outputs of " - "operator %s, but got index is %d and size is %d", - op_.Type(), idx, op_proto->outputs().size())); - return op_proto->outputs()[idx].name(); - } + std::string GetInputNameByIdx(size_t idx) const override; + + std::string GetOutputNameByIdx(size_t idx) const override; void ShareDim(const std::string& in, const std::string& out, size_t i = 0, - size_t j = 0) override { - auto in_it = ctx_.inputs.find(in); - auto out_it = ctx_.outputs.find(out); - PADDLE_ENFORCE_NE( - in_it, ctx_.inputs.end(), - platform::errors::NotFound("Input %s does not exist.", in)); - PADDLE_ENFORCE_NE( - out_it, ctx_.outputs.end(), - platform::errors::NotFound("Output %s does not exist.", out)); - PADDLE_ENFORCE_LT(i, in_it->second.size(), - platform::errors::InvalidArgument( - "The index of input dimension is out of range, " - "excepted index less than %zu, but received %zu.", - in_it->second.size(), i)); - PADDLE_ENFORCE_LT(j, out_it->second.size(), - platform::errors::InvalidArgument( - "The index of output dimension is out of range, " - "excepted index less than %zu, but received %zu.", - out_it->second.size(), j)); - - Variable* in_var = in_it->second[i]; - Variable* out_var = out_it->second[j]; - - PADDLE_ENFORCE_EQ( - in_var->Type(), out_var->Type(), - platform::errors::InvalidArgument( - "The type of input (%s) and output (%s) are inconsistent.", in, - out)); - - if (in_var->IsType()) { - auto& in_sele_rows = in_var->Get(); - auto out_sele_rows = out_var->GetMutable(); - out_sele_rows->mutable_value()->Resize(in_sele_rows.value().dims()); - out_sele_rows->set_rows(in_sele_rows.rows()); - out_sele_rows->set_height(in_sele_rows.height()); - } else if (in_var->IsType()) { - auto& in_lod_tensor = in_var->Get(); - auto* out_lod_tensor = out_var->GetMutable(); - out_lod_tensor->Resize(in_lod_tensor.dims()); - } else { - PADDLE_THROW(platform::errors::Unimplemented( - "Currently, the input type of ShareDim only can be LoDTensor " - "or SelectedRows.")); - } - } + size_t j = 0) override; void ShareAllLoD(const std::string& in, - const std::string& out) const override { - auto in_it = ctx_.inputs.find(in); - auto out_it = ctx_.outputs.find(out); - PADDLE_ENFORCE_NE(in_it, ctx_.inputs.end(), - platform::errors::NotFound( - "Input [%s] found error in Op [%s]", in, op_.Type())); - PADDLE_ENFORCE_NE( - out_it, ctx_.outputs.end(), - platform::errors::NotFound("Output [%s] found error in Op [%s]", out, - op_.Type())); - - auto& in_var_list = in_it->second; - auto& out_var_list = out_it->second; - - PADDLE_ENFORCE_EQ( - in_var_list.size(), out_var_list.size(), - platform::errors::PreconditionNotMet( - "Op [%s]: Input var size should be equal with output var size", - op_.Type())); - - auto& out_var_names = op_.Outputs(out); - - for (size_t i = 0; i < in_var_list.size(); ++i) { - if (out_var_names[i] == framework::kEmptyVarName) { - continue; - } - - Variable* in_var = in_var_list[i]; - if (!in_var->IsType()) return; - Variable* out_var = out_var_list[i]; - PADDLE_ENFORCE_EQ(out_var->IsType(), true, - platform::errors::PreconditionNotMet( - "The %d-th output of Output(%s) must be LoDTensor.", - i, out_var_names[i])); - auto& in_tensor = in_var->Get(); - auto* out_tensor = out_var->GetMutable(); - out_tensor->set_lod(in_tensor.lod()); -#ifdef PADDLE_WITH_MKLDNN - if (in_tensor.layout() != DataLayout::kMKLDNN) -#endif - out_tensor->set_layout(in_tensor.layout()); - } - } + const std::string& out) const override; void ShareLoD(const std::string& in, const std::string& out, size_t i = 0, - size_t j = 0) const override { - if (can_skip_lod_) { - return; - } - auto in_it = ctx_.inputs.find(in); - auto out_it = ctx_.outputs.find(out); - PADDLE_ENFORCE_NE( - in_it, ctx_.inputs.end(), - platform::errors::NotFound("Input %s does not exist.", in)); - PADDLE_ENFORCE_NE( - out_it, ctx_.outputs.end(), - platform::errors::NotFound("Output %s does not exist.", out)); - PADDLE_ENFORCE_LT(i, in_it->second.size(), - platform::errors::InvalidArgument( - "The index of input dimension is out of range, " - "excepted index less than %zu, but received %zu.", - in_it->second.size(), i)); - PADDLE_ENFORCE_LT(j, out_it->second.size(), - platform::errors::InvalidArgument( - "The index of output dimension is out of range, " - "excepted index less than %zu, but received %zu.", - out_it->second.size(), j)); - - Variable* in_var = in_it->second.at(i); - if (!in_var->IsType()) return; - Variable* out_var = out_it->second.at(j); - PADDLE_ENFORCE_EQ( - out_var->IsType(), true, - platform::errors::InvalidArgument( - "The %zu-th output of Output(%s) must be LoDTensor.", j, out)); - auto& in_tensor = in_var->Get(); - auto* out_tensor = out_var->GetMutable(); - out_tensor->set_lod(in_tensor.lod()); - -// TODO(dzhwinter) : reuse ShareLoD in most operators. -// Need to call ShareLayout explicitly in sequence related ops. -// Shall we have a better method to shared info between in/out Tensor? -#ifdef PADDLE_WITH_MKLDNN - // Fix me: ugly workaround below - // Correct solution: - // set_layout() should NOT be called here (i.e. ShareLoD). Instead, - // layout of output tensor should be set "manually" in Compute() - // of each OPKernel. The reason layout should NOT be shared between - // input and output "automatically" (now by InferShape()->ShareLoD()) - // is that layout transform may occur after InferShape(). - // Workaround: - // Skip set_layout() when input layout is kMKLDNN - // This is to avoid kMKLDNN is populated wrongly into a non-MKLDNN - // OPKernel. In all MKLDNN OPkernel, set_layout(kMKLDNN) should be called - // in Compute() - if (in_tensor.layout() != DataLayout::kMKLDNN) -#endif - out_tensor->set_layout(in_tensor.layout()); - } + size_t j = 0) const override; - int32_t GetLoDLevel(const std::string& in, size_t i = 0) const override { - PADDLE_THROW(platform::errors::PreconditionNotMet( - "GetLoDLevel is only used in compile time. The calculation of " - "output's actual lod is different among operators so that should be " - "set in the runtime kernel.")); - } + int32_t GetLoDLevel(const std::string& in, size_t i = 0) const override; void SetLoDLevel(const std::string& out, int32_t lod_level, - size_t j = 0) const override { - PADDLE_THROW(platform::errors::PreconditionNotMet( - "SetLoDLevel is only used in compile time. The calculation of " - "output's actual lod is different among operators so that should be " - "set in the runtime kernel.")); - } + size_t j = 0) const override; - bool IsRuntime() const override { return true; } + bool IsRuntime() const override; // TODO(paddle-dev): Can this be template? std::vector GetInputVarPtrs( - const std::string& name) override { - const std::vector& vars = InputVars(name); - std::vector res; - res.reserve(vars.size()); - res.insert(res.begin(), vars.begin(), vars.end()); - return res; - } + const std::string& name) override; std::vector GetOutputVarPtrs( - const std::string& name) override { - const std::vector& vars = OutputVars(name); - std::vector res; - res.reserve(vars.size()); - res.insert(res.begin(), vars.begin(), vars.end()); - return res; - } + const std::string& name) override; - DDim GetInputDim(const std::string& name) const override { - const std::vector& vars = InputVars(name); - PADDLE_ENFORCE_EQ( - vars.size(), 1UL, - platform::errors::InvalidArgument( - "Input(%s) should hold one element, but now it holds %zu elements.", - name, vars.size())); - return this->GetDim(vars[0]); - } + DDim GetInputDim(const std::string& name) const override; - std::vector GetInputsDim(const std::string& name) const override { - const std::vector& vars = InputVars(name); - return GetDims(vars); - } + std::vector GetInputsDim(const std::string& name) const override; std::vector GetInputsVarType( - const std::string& name) const override { - return GetVarTypes(InputVars(name)); - } + const std::string& name) const override; std::vector GetOutputsVarType( - const std::string& name) const override { - return GetVarTypes(OutputVars(name)); - } + const std::string& name) const override; - void SetOutputDim(const std::string& name, const DDim& dim) override { - auto& vars = OutputVars(name); - PADDLE_ENFORCE_EQ( - vars.size(), 1UL, - platform::errors::InvalidArgument("Output(%s) should hold one element, " - "but now it holds %zu elements.", - name, vars.size())); - SetDim(vars[0], dim); - } + void SetOutputDim(const std::string& name, const DDim& dim) override; void SetOutputsDim(const std::string& name, - const std::vector& dims) override { - auto& vars = OutputVars(name); - SetDims(vars, dims); - } + const std::vector& dims) override; - void SetSkipLoD(bool skip) { can_skip_lod_ = skip; } + void SetSkipLoD(bool skip); protected: - DDim GetDim(Variable* var) const { - PADDLE_ENFORCE_NOT_NULL( - var, platform::errors::InvalidArgument("Input variable is nullptr.")); - if (var->IsType()) { - return var->Get().dims(); - } else if (var->IsType()) { - return var->Get().GetCompleteDims(); - } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "Only LoDTensor or SelectedRows support 'GetDim', but input " - "Variable's type is %s.", - ToTypeName(var->Type()))); - } - } + DDim GetDim(Variable* var) const; - std::vector GetDims(const std::vector& vars) const { - std::vector ret; - ret.reserve(vars.size()); - std::transform(vars.begin(), vars.end(), std::back_inserter(ret), - [this](Variable* var) { return this->GetDim(var); }); - return ret; - } + std::vector GetDims(const std::vector& vars) const; - std::vector GetRepeatedDims(const std::string& name) const override { - PADDLE_THROW(platform::errors::PreconditionNotMet( - "GetRepeatedDims method only ban be used in compile time.")); - } + std::vector GetRepeatedDims(const std::string& name) const override; - void SetDim(Variable* var, const DDim& dim) { - if (var->IsType()) { - var->GetMutable()->Resize(dim); - } else if (var->IsType()) { - var->GetMutable()->set_height(dim[0]); - } else { - PADDLE_THROW(platform::errors::Unimplemented( - "Variable type error, expect LoDTensor or SelectedRows, but received " - "(%s).", - ToTypeName(var->Type()))); - } - } + void SetDim(Variable* var, const DDim& dim); void SetDims(const std::vector& vars, - const std::vector& dims) { - size_t length = vars.size(); - PADDLE_ENFORCE_EQ(length, dims.size(), - platform::errors::InvalidArgument( - "The number of input variables do not match the " - "number of input dimensions, the number of variables " - "is %zu, the number of dimensions is %zu.", - length, dims.size())); - for (size_t i = 0; i < length; ++i) { - if (vars[i] == nullptr) { - continue; - } - SetDim(vars[i], dims[i]); - } - } + const std::vector& dims); void SetRepeatedDims(const std::string& name, - const std::vector& dims) override { - PADDLE_THROW(platform::errors::PreconditionNotMet( - "SetRepeatedDims method only can be used in compile time.")); - } + const std::vector& dims) override; std::vector GetVarTypes( - const std::vector& vars) const { - std::vector retv; - retv.resize(vars.size()); - std::transform( - vars.begin(), vars.end(), retv.begin(), - std::bind(std::mem_fn(&InterpretercoreInferShapeContext::GetVarType), - this, std::placeholders::_1)); - return retv; - } + const std::vector& vars) const; - proto::VarType::Type GetVarType(Variable* var) const { - return ToVarType(var->Type()); - } + proto::VarType::Type GetVarType(Variable* var) const; private: - const std::vector& InputVars(const std::string& name) const { - auto it = ctx_.inputs.find(name); - PADDLE_ENFORCE_NE( - it, ctx_.inputs.end(), - platform::errors::NotFound( - "Operator (%s) does not have the input (%s).", op_.Type(), name)); - return it->second; - } + const std::vector& InputVars(const std::string& name) const; - const std::vector& OutputVars(const std::string& name) const { - auto it = ctx_.outputs.find(name); - PADDLE_ENFORCE_NE( - it, ctx_.outputs.end(), - platform::errors::NotFound( - "Operator (%s) does not have the outputs (%s).", op_.Type(), name)); - return it->second; - } + const std::vector& OutputVars(const std::string& name) const; const OperatorBase& op_; const RuntimeContext& ctx_; @@ -467,8 +141,28 @@ struct OpKernelFunc { }; struct VariableMetaInfo { - int var_ref_count_; - paddle::framework::VarDesc* vardesc_; + int var_ref_count_{0}; + framework::VarDesc* var_desc_{nullptr}; + + VariableMetaInfo() {} + VariableMetaInfo(int var_ref_count, framework::VarDesc* var_desc) + : var_ref_count_(var_ref_count), var_desc_(var_desc) {} +}; + +class VariableScope; +class VariableScopeListener : public ScopeListener { + public: + explicit VariableScopeListener(VariableScope* var_scope_); + void onCreateVariable(const std::string& name) override; + void onDeleteVariable(const std::string& name) override; + void onRenameVariable(const std::string& old_name, + const std::string& new_name) override; + void onCreateScope(Scope* Scope) override; + void onDeleteScope(Scope* Scope) override; + void onClear() override; + + private: + VariableScope* var_scope_; // not owned }; // TODO(zhiqiu): Maybe we need to add rwlock for VariableScope? @@ -477,171 +171,61 @@ struct VariableMetaInfo { // ScopeBase. Scope manager the variables and VariableScope is just a quick // access machanism. ScopeListener is the callback to sync changes in Original // Scope. We can make it a membership of VariableScope. Here we use inherent. -class VariableScope : public ScopeBase, public ScopeListener { +class VariableScope : public ScopeBase { public: - VariableScope(Scope* outer_scope) { - // for @EMPTY@ variable - var_list_.push_back(nullptr); - name2id_[kEmptyVarName] = 0; - VariableMetaInfo info; - info.var_ref_count_ = 0; - info.vardesc_ = nullptr; - vec_meta_info_.push_back(info); - outer_scope_ = outer_scope; - - PADDLE_ENFORCE_NE( - outer_scope_, nullptr, - platform::errors::PreconditionNotMet( - "You have passed a nullptr to construct VariableScope.")); - outer_scope->AddListener(this); - } + explicit VariableScope(Scope* scope); - ~VariableScope() { - if (outer_scope_ != nullptr) outer_scope_->DelListener(this); - } + const Scope* GetScope() const; - const Scope* GetScope() const { return outer_scope_; } - - Variable* FindVar(const std::string& name) const { - auto it = name2id_.find(name); - if (it != name2id_.end()) { - PADDLE_ENFORCE_LT(it->second, var_list_.size(), - platform::errors::NotFound( - "The id(%d) of variable(%s) should not be larger " - "than the size of variable list(%d).", - it->second, name, var_list_.size())); - return var_list_[it->second]; - } - return nullptr; - } + Variable* FindVar(const std::string& name) const; + + ~VariableScope(); // Get variable id by name, return -1 if not found - int GetIdByName(const std::string& name) const { - auto it = name2id_.find(name); - if (it != name2id_.end()) { - return it->second; - } - return -1; - } + int GetIdByName(const std::string& name) const; // Get variable name by id, return "" if not found - std::string GetNameById(int id) const { - // NOTE(zhiqiu): do not use vec_meta_info_[id].vardesc_->Name() since - // vec_meta_info_[id] may be nullptr, - // typically when the target variable is not existed in the original program - // desc, but created by interpretercore. - // For example, created and used by d2h_copy or h2d_copy operator. - auto it = - std::find_if(name2id_.begin(), name2id_.end(), - [id](const auto& pair) { return pair.second == id; }); - if (it != name2id_.end()) { - return it->first; - } - return ""; - } + std::string GetNameById(int id) const; - bool HasVar(const std::string& name) const { - return name2id_.find(name) != name2id_.end(); - } + bool HasVar(const std::string& name) const; - int VarId(const std::string& name) const { - CheckExist(name); - return name2id_.at(name); - } + int VarId(const std::string& name) const; - Variable* Var(int id) const { return var_list_.at(id); } + Variable* Var(int id) const; - Variable* Var(const std::string& name) const { - return var_list_.at(VarId(name)); - } + Variable* Var(const std::string& name) const; - size_t VarSize() const { return var_list_.size(); } - - void AddVar(const std::string& name, VarDesc* var_desc) { // NOLINT - // AddVar -> Scope::Var -> onCreateVariable. - VLOG(4) << "Add variable: " << name << " through AddVar()"; - auto v = outer_scope_->Var(name); - if (nullptr == var_desc) { - v->GetMutable(); - } else { - InitializeVariable( - v, - var_desc - ->GetType()); // Scope don't initialize variable recently created - } - SetVarDesc(name, var_desc); - } + size_t VarSize() const; - void AddVar(const std::string& name, Variable& var) { // NOLINT - // Though name existed in outer_scope_, we need - // add again to create name2id map. - outer_scope_->Var(name); - } + void AddVar(const std::string& name, VarDesc* var_desc); - void SetVarDesc(const std::string& name, framework::VarDesc* var_desc) { - CheckExist(name); - vec_meta_info_[VarId(name)].vardesc_ = var_desc; - } + void AddVar(const std::string& name, const Variable& var); - paddle::framework::VarDesc* VarDesc(const std::string& name) const { - return VarDesc(VarId(name)); - } + void SetVarDesc(const std::string& name, framework::VarDesc* var_desc); - paddle::framework::VarDesc* VarDesc(int id) const { - CheckExist(id); - return vec_meta_info_[id].vardesc_; - } + paddle::framework::VarDesc* VarDesc(const std::string& name) const; - void CheckExist(int id) const { - PADDLE_ENFORCE_LT(id, var_list_.size(), - platform::errors::PreconditionNotMet( - "Required var_id < %d, but received var_id = %d.", - var_list_.size(), id)); - } + paddle::framework::VarDesc* VarDesc(int id) const; - void CheckExist(const std::string& name) const { - PADDLE_ENFORCE_EQ( - HasVar(name), true, - platform::errors::NotFound("%s not in VariableScope.", name)); - } + void CheckExist(int id) const; + + void CheckExist(const std::string& name) const; - public: // callbacks from ScopeListener class - void onCreateVariable(const std::string& name) override { - auto v = outer_scope_->GetVar(name); // must exsit in outer_scope_ - if (!HasVar(name)) { // may exist in variable scope. - VLOG(4) << "Calling VariableScope::onCreateVariable with var_name: " - << name; - name2id_[name] = VarSize(); - var_list_.push_back(v); - - VariableMetaInfo info; - info.var_ref_count_ = 0; - info.vardesc_ = nullptr; // set nullptr, then modifty it in AddVar() - vec_meta_info_.push_back(info); - } - } - void onDeleteVariable(const std::string& name) override { - if (HasVar(name)) { - VLOG(4) << "Calling VariableScope::onDeleteVariable with var_name: " - << name; - } - } - void onRenameVariable(const std::string& old_name, - const std::string& new_name) override {} - void onCreateScope(Scope* Scope) override {} - void onDeleteScope(Scope* Scope) override {} - void onClear() override {} std::vector& MutableVecMetaInfo() { return vec_meta_info_; } const std::vector& VecMetaInfo() const { return vec_meta_info_; } + friend class VariableScopeListener; + private: std::vector var_list_; std::map name2id_; std::vector vec_meta_info_; - Scope* outer_scope_ = nullptr; + Scope* scope_ = nullptr; + // mutable RWLock vars_lock_; + std::shared_ptr listener_; }; class NextInstruction { diff --git a/paddle/fluid/framework/new_executor/standalone_executor.cc b/paddle/fluid/framework/new_executor/standalone_executor.cc index 1c9f6b3d90..d1c8871485 100644 --- a/paddle/fluid/framework/new_executor/standalone_executor.cc +++ b/paddle/fluid/framework/new_executor/standalone_executor.cc @@ -23,16 +23,14 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place, : place_(place), startup_prog_(startup_prog), main_prog_(main_prog), - outer_scope_(scope), - global_scope_(scope) { - paddle::framework::InitDevices(); + global_scope_(VariableScope(scope)) { // init scope - BuildVariableOuterScope(startup_prog, &global_scope_, scope); + BuildVariableScope(startup_prog, &global_scope_); - if (outer_scope_ != nullptr) { - auto name_list = outer_scope_->LocalVarNames(); + if (scope != nullptr) { + auto name_list = scope->LocalVarNames(); for (auto name : name_list) { - auto v = outer_scope_->Var(name); + auto v = scope->Var(name); if (!global_scope_.HasVar(name)) { global_scope_.AddVar(name, *v); } @@ -62,9 +60,8 @@ framework::interpreter::CostInfo StandaloneExecutor::DryRun( return core->DryRun(feed_names, feed_tensors); } -void StandaloneExecutor::BuildVariableOuterScope( - const framework::ProgramDesc& pdesc, VariableScope* var_scope, - Scope* outer_scope) { +void StandaloneExecutor::BuildVariableScope(const framework::ProgramDesc& pdesc, + VariableScope* var_scope) { auto& global_block = pdesc.Block(0); for (auto& var : global_block.AllVars()) { diff --git a/paddle/fluid/framework/new_executor/standalone_executor.h b/paddle/fluid/framework/new_executor/standalone_executor.h index 1fbdf7b4b0..9b535c9b63 100644 --- a/paddle/fluid/framework/new_executor/standalone_executor.h +++ b/paddle/fluid/framework/new_executor/standalone_executor.h @@ -50,8 +50,8 @@ class StandaloneExecutor : public ExecutorBase { const std::vector& feed_tensors); private: - void BuildVariableOuterScope(const framework::ProgramDesc& pdesc, - VariableScope* var_scope, Scope* outer_scope); + void BuildVariableScope(const framework::ProgramDesc& pdesc, + VariableScope* var_scope); std::shared_ptr GetInterpreterCore( const std::vector& feed_names, @@ -60,7 +60,6 @@ class StandaloneExecutor : public ExecutorBase { const platform::Place& place_; const ProgramDesc& startup_prog_; const ProgramDesc& main_prog_; - Scope* outer_scope_; VariableScope global_scope_; std::unordered_map> programs_; diff --git a/paddle/fluid/framework/new_executor/standalone_executor_test.cc b/paddle/fluid/framework/new_executor/standalone_executor_test.cc index 2aad8d245a..20bc5c7789 100644 --- a/paddle/fluid/framework/new_executor/standalone_executor_test.cc +++ b/paddle/fluid/framework/new_executor/standalone_executor_test.cc @@ -75,6 +75,7 @@ paddle::framework::ProgramDesc load_from_file(const std::string& file_name) { } int main(int argc, char* argv[]) { + paddle::framework::InitDevices(); std::cout << "main" << std::endl; int64_t batch_size = std::stoi(argv[1]); paddle::framework::InitDevices(); diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc index 7a36354d7e..4bb94a4e7e 100644 --- a/paddle/fluid/framework/scope.cc +++ b/paddle/fluid/framework/scope.cc @@ -266,14 +266,14 @@ Variable* Scope::FindVarLocally(const std::string& name) const { return nullptr; } -void Scope::AddListener(ScopeListener* listener) { +void Scope::AddListener(const std::shared_ptr& listener) { auto it = std::find(listeners_.begin(), listeners_.end(), listener); if (it == listeners_.end()) { listeners_.push_back(listener); } } -void Scope::DelListener(ScopeListener* listener) { +void Scope::DelListener(const std::shared_ptr& listener) { listeners_.remove(listener); } diff --git a/paddle/fluid/framework/scope.h b/paddle/fluid/framework/scope.h index ca486aec8c..892618b7e6 100644 --- a/paddle/fluid/framework/scope.h +++ b/paddle/fluid/framework/scope.h @@ -144,9 +144,9 @@ class Scope : public ScopeBase { // Rename variable to a new name and return the new name std::string Rename(const std::string& origin_name) const; - void AddListener(ScopeListener* listener); + void AddListener(const std::shared_ptr& listener); - void DelListener(ScopeListener* listener); + void DelListener(const std::shared_ptr& listener); protected: struct KeyHasher { @@ -184,7 +184,7 @@ class Scope : public ScopeBase { // Scope in `kids_` are owned by this class. mutable std::list kids_; const Scope* parent_{nullptr}; - std::list listeners_; + std::list> listeners_; DISABLE_COPY_AND_ASSIGN(Scope); diff --git a/python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py b/python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py index 03062ab8e2..325d74bb5d 100644 --- a/python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py +++ b/python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py @@ -275,7 +275,7 @@ class TestException(unittest.TestCase): for feed in feeds: out = exe.run(main_program, feed=feed, fetch_list=fetch_vars) - print(out) + print(main_program) return out def run_new_executor(self, feed): @@ -287,10 +287,10 @@ class TestException(unittest.TestCase): def test_exception(self): feed = [{ 'id': np.array([1, 2, 3, 4, 5]).astype(np.int64), - 'data': np.array([1, 2, 3, 4]).astype(np.float32), + 'data': np.array([1, 2, 3]).astype(np.float32), }, { 'id': np.array([1, 2, 3, 4, 11]).astype(np.int64), - 'data': np.array([1, 2, 3, 4]).astype(np.float32), + 'data': np.array([1, 2, 3]).astype(np.float32), }] self.assertRaises(ValueError, self.run_new_executor, feed) @@ -307,6 +307,18 @@ class TestException(unittest.TestCase): feed[1]['data'][0] = np.nan self.assertRaises(RuntimeError, self.run_new_executor, feed) + def test_scope(self): + feed = [{ + 'id': np.array([1, 2, 3, 4, 5]).astype(np.int64), + 'data': np.array([1, 2, 3]).astype(np.float32), + }, { + 'id': np.array([1, 2, 3, 4, 5]).astype(np.int64), + 'data': np.array([2, 2, 2]).astype(np.float32), + }] + self.run_new_executor(feed) + self.assertIsNotNone(paddle.static.global_scope().find_var( + 'embedding.tmp_2')) + if __name__ == "__main__": unittest.main() -- GitLab