未验证 提交 1fe4513c 编写于 作者: L Leo Chen 提交者: GitHub

Refine new executor (#37074)

* split declaration and implementation

* remove initdevices

* refine VariableMetaInfo

* add ut

* fix compile
上级 0a92c857
...@@ -31,6 +31,36 @@ ...@@ -31,6 +31,36 @@
namespace paddle { namespace paddle {
namespace framework { namespace framework {
void OpInOutInfo::Build(const OperatorBase *op) {
is_built_ = true;
auto &inferer = op->Info().NoNeedBufferVarsInferer();
if (inferer) {
no_need_buffer_ins_ = inferer(op->Inputs(), op->Outputs(), op->Attrs());
if (no_need_buffer_ins_.empty()) return;
for (auto &in_name_pair : op->Inputs()) {
if (no_need_buffer_ins_.count(in_name_pair.first) != 0) {
continue;
}
for (auto &in_arg_name : in_name_pair.second) {
other_args_set_.insert(in_arg_name);
}
}
for (auto &out_name_pair : op->Outputs()) {
for (auto &out_arg_name : out_name_pair.second) {
other_args_set_.insert(out_arg_name);
}
}
}
}
bool OpInOutInfo::IsInArgBufferNeeded(const std::string &in_arg_name) const {
return no_need_buffer_ins_.empty() || other_args_set_.count(in_arg_name) != 0;
}
static bool VarCanBeDeleted(const std::string &name, const BlockDesc &block, static bool VarCanBeDeleted(const std::string &name, const BlockDesc &block,
const std::unordered_set<std::string> &skip_vars) { const std::unordered_set<std::string> &skip_vars) {
if (skip_vars.count(name) != 0) { if (skip_vars.count(name) != 0) {
......
...@@ -33,38 +33,11 @@ class Scope; ...@@ -33,38 +33,11 @@ class Scope;
struct OpInOutInfo { struct OpInOutInfo {
public: public:
void Build(const OperatorBase *op) { void Build(const OperatorBase *op);
is_built_ = true;
auto &inferer = op->Info().NoNeedBufferVarsInferer();
if (inferer) {
no_need_buffer_ins_ = inferer(op->Inputs(), op->Outputs(), op->Attrs());
if (no_need_buffer_ins_.empty()) return;
for (auto &in_name_pair : op->Inputs()) {
if (no_need_buffer_ins_.count(in_name_pair.first) != 0) {
continue;
}
for (auto &in_arg_name : in_name_pair.second) {
other_args_set_.insert(in_arg_name);
}
}
for (auto &out_name_pair : op->Outputs()) {
for (auto &out_arg_name : out_name_pair.second) {
other_args_set_.insert(out_arg_name);
}
}
}
}
bool IsBuilt() const { return is_built_; } bool IsBuilt() const { return is_built_; }
bool IsInArgBufferNeeded(const std::string &in_arg_name) const { bool IsInArgBufferNeeded(const std::string &in_arg_name) const;
return no_need_buffer_ins_.empty() ||
other_args_set_.count(in_arg_name) != 0;
}
private: private:
// A set to record unused buffer input vars of op // A set to record unused buffer input vars of op
......
...@@ -3,10 +3,11 @@ lod_rank_table fs shell fleet_wrapper heter_wrapper ps_gpu_wrapper box_wrapper l ...@@ -3,10 +3,11 @@ lod_rank_table fs shell fleet_wrapper heter_wrapper ps_gpu_wrapper box_wrapper l
graph_to_program_pass variable_helper timer monitor nan_inf_utils) graph_to_program_pass variable_helper timer monitor nan_inf_utils)
cc_library(workqueue SRCS workqueue.cc workqueue_utils.cc DEPS enforce) cc_library(workqueue SRCS workqueue.cc workqueue_utils.cc DEPS enforce)
cc_library(interpretercore_garbage_collector SRCS interpretercore_garbage_collector.cc DEPS workqueue ${DEVICE_EVENT_LIBS}) cc_library(new_executor_defs SRCS new_executor_defs.cc DEPS enforce glog scope)
cc_library(interpretercore_util SRCS interpretercore_util.cc DEPS ${INTERPRETERCORE_DEPS} workqueue) cc_library(interpretercore_garbage_collector SRCS interpretercore_garbage_collector.cc DEPS workqueue ${DEVICE_EVENT_LIBS} executor_gc_helper)
cc_library(event_manager SRCS event_manager.cc DEPS ${DEVICE_EVENT_LIBS} glog) cc_library(interpretercore_util SRCS interpretercore_util.cc DEPS ${INTERPRETERCORE_DEPS} workqueue new_executor_defs)
cc_library(stream_analyzer SRCS stream_analyzer.cc DEPS ${DEVICE_EVENT_LIBS} glog device_context) cc_library(event_manager SRCS event_manager.cc DEPS ${DEVICE_EVENT_LIBS} glog new_executor_defs)
cc_library(stream_analyzer SRCS stream_analyzer.cc DEPS ${DEVICE_EVENT_LIBS} glog device_context new_executor_defs)
cc_library(interpretercore SRCS interpretercore.cc DEPS workqueue ${DEVICE_EVENT_LIBS} interpretercore_util interpretercore_garbage_collector stream_analyzer event_manager) cc_library(interpretercore SRCS interpretercore.cc DEPS workqueue ${DEVICE_EVENT_LIBS} interpretercore_util interpretercore_garbage_collector stream_analyzer event_manager)
cc_library(standalone_executor SRCS standalone_executor.cc DEPS interpretercore) cc_library(standalone_executor SRCS standalone_executor.cc DEPS interpretercore)
cc_test(workqueue_test SRCS workqueue_test.cc DEPS workqueue) cc_test(workqueue_test SRCS workqueue_test.cc DEPS workqueue)
......
...@@ -121,6 +121,8 @@ void InterpreterCore::Convert() { ...@@ -121,6 +121,8 @@ void InterpreterCore::Convert() {
for (auto var_id : gc_check_input_list) { for (auto var_id : gc_check_input_list) {
vec_meta_info[var_id].var_ref_count_++; vec_meta_info[var_id].var_ref_count_++;
instr.AddGCCheckVar(var_id); instr.AddGCCheckVar(var_id);
VLOG(4) << "clear " << global_scope_->GetNameById(var_id) << " after "
<< instr.OpBase()->Type();
} }
} }
...@@ -131,6 +133,8 @@ void InterpreterCore::Convert() { ...@@ -131,6 +133,8 @@ void InterpreterCore::Convert() {
if (input_var2op_info_.at(id).size() == 0) { if (input_var2op_info_.at(id).size() == 0) {
// output var not be used by any kernel // output var not be used by any kernel
vec_instruction_[i].AddGCCheckVar(id); vec_instruction_[i].AddGCCheckVar(id);
VLOG(4) << "clear " << global_scope_->GetNameById(id) << " after "
<< vec_instruction_[i].OpBase()->Type();
vec_meta_info[id].var_ref_count_++; vec_meta_info[id].var_ref_count_++;
} }
} }
...@@ -437,6 +441,8 @@ void InterpreterCore::RunInstructionAsync(size_t instr_id) { ...@@ -437,6 +441,8 @@ void InterpreterCore::RunInstructionAsync(size_t instr_id) {
try { try {
RunInstruction(instr_node); RunInstruction(instr_node);
// GC infomation
CheckGC(instr_node);
} catch (platform::EnforceNotMet& ex) { } catch (platform::EnforceNotMet& ex) {
framework::InsertCallStackInfo(op->Type(), op->Attrs(), &ex); framework::InsertCallStackInfo(op->Type(), op->Attrs(), &ex);
exception_holder_.Catch(std::make_exception_ptr(std::move(ex))); exception_holder_.Catch(std::make_exception_ptr(std::move(ex)));
...@@ -463,9 +469,6 @@ void InterpreterCore::RunInstructionAsync(size_t instr_id) { ...@@ -463,9 +469,6 @@ void InterpreterCore::RunInstructionAsync(size_t instr_id) {
interpreter::RecordEvent(instr_node, place_); interpreter::RecordEvent(instr_node, place_);
op_run_number_.fetch_add(1, std::memory_order_relaxed); op_run_number_.fetch_add(1, std::memory_order_relaxed);
// GC infomation
CheckGC(instr_node);
RunNextInstructions(instr_node, &ready_ops); RunNextInstructions(instr_node, &ready_ops);
} }
} }
...@@ -476,6 +479,9 @@ void InterpreterCore::CheckGC(const Instruction& instr) { ...@@ -476,6 +479,9 @@ void InterpreterCore::CheckGC(const Instruction& instr) {
auto& atomic_var_ref = async_work_queue_->AtomicVarRef(); auto& atomic_var_ref = async_work_queue_->AtomicVarRef();
for (auto var_id : instr.GCCheckVars()) { for (auto var_id : instr.GCCheckVars()) {
VLOG(4) << "GC " << global_scope_->GetNameById(var_id) << " "
<< var_scope.VarDesc(var_id);
bool is_ready = bool is_ready =
atomic_var_ref[var_id]->fetch_sub(1, std::memory_order_relaxed) == 1; atomic_var_ref[var_id]->fetch_sub(1, std::memory_order_relaxed) == 1;
// ignore all persistable var while GC // ignore all persistable var while GC
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <map>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/new_executor/new_executor_defs.h"
#include "paddle/fluid/framework/rw_lock.h"
// When in inference scenario, the scopes will not be written by two threads in
// a mean time, but a scope may be read by multiple threads concurrently, and
// the mutex will cause serious performance issue.
// So the mutex is disabled when `ON_INFER`.
#ifdef PADDLE_ON_INFERENCE
#define SCOPE_VARS_READER_LOCK
#define SCOPE_VARS_WRITER_LOCK
#else
#define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_);
#define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_);
#endif
namespace paddle {
namespace framework {
InterpretercoreInferShapeContext::InterpretercoreInferShapeContext(
const OperatorBase& op, const RuntimeContext& ctx)
: op_(op), ctx_(ctx), can_skip_lod_(false) {}
bool InterpretercoreInferShapeContext::HasInput(const std::string& name) const {
// has only one input
const auto& ins = ctx_.inputs;
auto it = ins.find(name);
if (it == ins.end()) {
return false;
}
const auto& in = it->second;
if (in.size() == 0) return false;
PADDLE_ENFORCE_EQ(
in.size(), 1UL,
platform::errors::InvalidArgument(
"Input %s should not contain more than one inputs.", name));
return in[0] != nullptr;
}
bool InterpretercoreInferShapeContext::HasOutput(
const std::string& name) const {
// has only one output
const auto& outs = ctx_.outputs;
auto it = outs.find(name);
if (it == outs.end()) {
return false;
}
const auto& out = it->second;
if (out.size() == 0) {
return false;
}
PADDLE_ENFORCE_EQ(
out.size(), 1UL,
platform::errors::InvalidArgument(
"Output %s should not contain more than one outputs.", name));
return out[0] != nullptr;
}
bool InterpretercoreInferShapeContext::HasInputs(
const std::string& name) const {
const auto& ins = ctx_.inputs;
auto it = ins.find(name);
if (it == ins.end() || it->second.empty()) {
return false;
}
for (auto& input : it->second) {
if (input == nullptr) {
return false;
}
}
return true;
}
bool InterpretercoreInferShapeContext::HasOutputs(
const std::string& name) const {
const auto& outs = ctx_.outputs;
auto it = outs.find(name);
if (it == outs.end() || it->second.empty()) {
return false;
}
for (auto& output : it->second) {
if (output == nullptr) {
return false;
}
}
return true;
}
AttrReader InterpretercoreInferShapeContext::Attrs() const {
return AttrReader(op_.Attrs());
}
std::vector<std::string> InterpretercoreInferShapeContext::Inputs(
const std::string& name) const {
return op_.Inputs(name);
}
std::vector<std::string> InterpretercoreInferShapeContext::Outputs(
const std::string& name) const {
return op_.Outputs(name);
}
std::string InterpretercoreInferShapeContext::GetInputNameByIdx(
size_t idx) const {
auto& op_proto =
paddle::framework::OpInfoMap::Instance().Get(op_.Type()).proto_;
PADDLE_ENFORCE_LT(idx, op_proto->inputs().size(),
platform::errors::OutOfRange(
"The index should be less than the size of inputs of "
"operator %s, but got index is %d and size is %d",
op_.Type(), idx, op_proto->inputs().size()));
return op_proto->inputs()[idx].name();
}
std::string InterpretercoreInferShapeContext::GetOutputNameByIdx(
size_t idx) const {
auto& op_proto =
paddle::framework::OpInfoMap::Instance().Get(op_.Type()).proto_;
PADDLE_ENFORCE_LT(idx, op_proto->outputs().size(),
platform::errors::OutOfRange(
"The index should be less than the size of outputs of "
"operator %s, but got index is %d and size is %d",
op_.Type(), idx, op_proto->outputs().size()));
return op_proto->outputs()[idx].name();
}
void InterpretercoreInferShapeContext::ShareDim(const std::string& in,
const std::string& out,
size_t i, size_t j) {
auto in_it = ctx_.inputs.find(in);
auto out_it = ctx_.outputs.find(out);
PADDLE_ENFORCE_NE(in_it, ctx_.inputs.end(),
platform::errors::NotFound("Input %s does not exist.", in));
PADDLE_ENFORCE_NE(
out_it, ctx_.outputs.end(),
platform::errors::NotFound("Output %s does not exist.", out));
PADDLE_ENFORCE_LT(i, in_it->second.size(),
platform::errors::InvalidArgument(
"The index of input dimension is out of range, "
"excepted index less than %zu, but received %zu.",
in_it->second.size(), i));
PADDLE_ENFORCE_LT(j, out_it->second.size(),
platform::errors::InvalidArgument(
"The index of output dimension is out of range, "
"excepted index less than %zu, but received %zu.",
out_it->second.size(), j));
Variable* in_var = in_it->second[i];
Variable* out_var = out_it->second[j];
PADDLE_ENFORCE_EQ(
in_var->Type(), out_var->Type(),
platform::errors::InvalidArgument(
"The type of input (%s) and output (%s) are inconsistent.", in, out));
if (in_var->IsType<framework::SelectedRows>()) {
auto& in_sele_rows = in_var->Get<framework::SelectedRows>();
auto out_sele_rows = out_var->GetMutable<framework::SelectedRows>();
out_sele_rows->mutable_value()->Resize(in_sele_rows.value().dims());
out_sele_rows->set_rows(in_sele_rows.rows());
out_sele_rows->set_height(in_sele_rows.height());
} else if (in_var->IsType<framework::LoDTensor>()) {
auto& in_lod_tensor = in_var->Get<framework::LoDTensor>();
auto* out_lod_tensor = out_var->GetMutable<framework::LoDTensor>();
out_lod_tensor->Resize(in_lod_tensor.dims());
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Currently, the input type of ShareDim only can be LoDTensor "
"or SelectedRows."));
}
}
void InterpretercoreInferShapeContext::ShareAllLoD(
const std::string& in, const std::string& out) const {
auto in_it = ctx_.inputs.find(in);
auto out_it = ctx_.outputs.find(out);
PADDLE_ENFORCE_NE(in_it, ctx_.inputs.end(),
platform::errors::NotFound(
"Input [%s] found error in Op [%s]", in, op_.Type()));
PADDLE_ENFORCE_NE(out_it, ctx_.outputs.end(),
platform::errors::NotFound(
"Output [%s] found error in Op [%s]", out, op_.Type()));
auto& in_var_list = in_it->second;
auto& out_var_list = out_it->second;
PADDLE_ENFORCE_EQ(
in_var_list.size(), out_var_list.size(),
platform::errors::PreconditionNotMet(
"Op [%s]: Input var size should be equal with output var size",
op_.Type()));
auto& out_var_names = op_.Outputs(out);
for (size_t i = 0; i < in_var_list.size(); ++i) {
if (out_var_names[i] == framework::kEmptyVarName) {
continue;
}
Variable* in_var = in_var_list[i];
if (!in_var->IsType<LoDTensor>()) return;
Variable* out_var = out_var_list[i];
PADDLE_ENFORCE_EQ(out_var->IsType<LoDTensor>(), true,
platform::errors::PreconditionNotMet(
"The %d-th output of Output(%s) must be LoDTensor.",
i, out_var_names[i]));
auto& in_tensor = in_var->Get<LoDTensor>();
auto* out_tensor = out_var->GetMutable<LoDTensor>();
out_tensor->set_lod(in_tensor.lod());
#ifdef PADDLE_WITH_MKLDNN
if (in_tensor.layout() != DataLayout::kMKLDNN)
#endif
out_tensor->set_layout(in_tensor.layout());
}
}
void InterpretercoreInferShapeContext::ShareLoD(const std::string& in,
const std::string& out,
size_t i, size_t j) const {
if (can_skip_lod_) {
return;
}
auto in_it = ctx_.inputs.find(in);
auto out_it = ctx_.outputs.find(out);
PADDLE_ENFORCE_NE(in_it, ctx_.inputs.end(),
platform::errors::NotFound("Input %s does not exist.", in));
PADDLE_ENFORCE_NE(
out_it, ctx_.outputs.end(),
platform::errors::NotFound("Output %s does not exist.", out));
PADDLE_ENFORCE_LT(i, in_it->second.size(),
platform::errors::InvalidArgument(
"The index of input dimension is out of range, "
"excepted index less than %zu, but received %zu.",
in_it->second.size(), i));
PADDLE_ENFORCE_LT(j, out_it->second.size(),
platform::errors::InvalidArgument(
"The index of output dimension is out of range, "
"excepted index less than %zu, but received %zu.",
out_it->second.size(), j));
Variable* in_var = in_it->second.at(i);
if (!in_var->IsType<LoDTensor>()) return;
Variable* out_var = out_it->second.at(j);
PADDLE_ENFORCE_EQ(
out_var->IsType<LoDTensor>(), true,
platform::errors::InvalidArgument(
"The %zu-th output of Output(%s) must be LoDTensor.", j, out));
auto& in_tensor = in_var->Get<LoDTensor>();
auto* out_tensor = out_var->GetMutable<LoDTensor>();
out_tensor->set_lod(in_tensor.lod());
// TODO(dzhwinter) : reuse ShareLoD in most operators.
// Need to call ShareLayout explicitly in sequence related ops.
// Shall we have a better method to shared info between in/out Tensor?
#ifdef PADDLE_WITH_MKLDNN
// Fix me: ugly workaround below
// Correct solution:
// set_layout() should NOT be called here (i.e. ShareLoD). Instead,
// layout of output tensor should be set "manually" in Compute()
// of each OPKernel. The reason layout should NOT be shared between
// input and output "automatically" (now by InferShape()->ShareLoD())
// is that layout transform may occur after InferShape().
// Workaround:
// Skip set_layout() when input layout is kMKLDNN
// This is to avoid kMKLDNN is populated wrongly into a non-MKLDNN
// OPKernel. In all MKLDNN OPkernel, set_layout(kMKLDNN) should be called
// in Compute()
if (in_tensor.layout() != DataLayout::kMKLDNN)
#endif
out_tensor->set_layout(in_tensor.layout());
}
int32_t InterpretercoreInferShapeContext::GetLoDLevel(const std::string& in,
size_t i) const {
PADDLE_THROW(platform::errors::PreconditionNotMet(
"GetLoDLevel is only used in compile time. The calculation of "
"output's actual lod is different among operators so that should be "
"set in the runtime kernel."));
}
void InterpretercoreInferShapeContext::SetLoDLevel(const std::string& out,
int32_t lod_level,
size_t j) const {
PADDLE_THROW(platform::errors::PreconditionNotMet(
"SetLoDLevel is only used in compile time. The calculation of "
"output's actual lod is different among operators so that should be "
"set in the runtime kernel."));
}
bool InterpretercoreInferShapeContext::IsRuntime() const { return true; }
// TODO(paddle-dev): Can this be template?
std::vector<InferShapeVarPtr> InterpretercoreInferShapeContext::GetInputVarPtrs(
const std::string& name) {
const std::vector<Variable*>& vars = InputVars(name);
std::vector<InferShapeVarPtr> res;
res.reserve(vars.size());
res.insert(res.begin(), vars.begin(), vars.end());
return res;
}
std::vector<InferShapeVarPtr>
InterpretercoreInferShapeContext::GetOutputVarPtrs(const std::string& name) {
const std::vector<Variable*>& vars = OutputVars(name);
std::vector<InferShapeVarPtr> res;
res.reserve(vars.size());
res.insert(res.begin(), vars.begin(), vars.end());
return res;
}
DDim InterpretercoreInferShapeContext::GetInputDim(
const std::string& name) const {
const std::vector<Variable*>& vars = InputVars(name);
PADDLE_ENFORCE_EQ(
vars.size(), 1UL,
platform::errors::InvalidArgument(
"Input(%s) should hold one element, but now it holds %zu elements.",
name, vars.size()));
return this->GetDim(vars[0]);
}
std::vector<DDim> InterpretercoreInferShapeContext::GetInputsDim(
const std::string& name) const {
const std::vector<Variable*>& vars = InputVars(name);
return GetDims(vars);
}
std::vector<proto::VarType::Type>
InterpretercoreInferShapeContext::GetInputsVarType(
const std::string& name) const {
return GetVarTypes(InputVars(name));
}
std::vector<proto::VarType::Type>
InterpretercoreInferShapeContext::GetOutputsVarType(
const std::string& name) const {
return GetVarTypes(OutputVars(name));
}
void InterpretercoreInferShapeContext::SetOutputDim(const std::string& name,
const DDim& dim) {
auto& vars = OutputVars(name);
PADDLE_ENFORCE_EQ(vars.size(), 1UL, platform::errors::InvalidArgument(
"Output(%s) should hold one element, "
"but now it holds %zu elements.",
name, vars.size()));
SetDim(vars[0], dim);
}
void InterpretercoreInferShapeContext::SetOutputsDim(
const std::string& name, const std::vector<DDim>& dims) {
auto& vars = OutputVars(name);
SetDims(vars, dims);
}
void InterpretercoreInferShapeContext::SetSkipLoD(bool skip) {
can_skip_lod_ = skip;
}
DDim InterpretercoreInferShapeContext::GetDim(Variable* var) const {
PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::InvalidArgument("Input variable is nullptr."));
if (var->IsType<LoDTensor>()) {
return var->Get<LoDTensor>().dims();
} else if (var->IsType<SelectedRows>()) {
return var->Get<SelectedRows>().GetCompleteDims();
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Only LoDTensor or SelectedRows support 'GetDim', but input "
"Variable's type is %s.",
ToTypeName(var->Type())));
}
}
std::vector<DDim> InterpretercoreInferShapeContext::GetDims(
const std::vector<Variable*>& vars) const {
std::vector<DDim> ret;
ret.reserve(vars.size());
std::transform(vars.begin(), vars.end(), std::back_inserter(ret),
[this](Variable* var) { return this->GetDim(var); });
return ret;
}
std::vector<DDim> InterpretercoreInferShapeContext::GetRepeatedDims(
const std::string& name) const {
PADDLE_THROW(platform::errors::PreconditionNotMet(
"GetRepeatedDims method only ban be used in compile time."));
}
void InterpretercoreInferShapeContext::SetDim(Variable* var, const DDim& dim) {
if (var->IsType<LoDTensor>()) {
var->GetMutable<LoDTensor>()->Resize(dim);
} else if (var->IsType<SelectedRows>()) {
var->GetMutable<SelectedRows>()->set_height(dim[0]);
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Variable type error, expect LoDTensor or SelectedRows, but received "
"(%s).",
ToTypeName(var->Type())));
}
}
void InterpretercoreInferShapeContext::SetDims(
const std::vector<Variable*>& vars, const std::vector<DDim>& dims) {
size_t length = vars.size();
PADDLE_ENFORCE_EQ(length, dims.size(),
platform::errors::InvalidArgument(
"The number of input variables do not match the "
"number of input dimensions, the number of variables "
"is %zu, the number of dimensions is %zu.",
length, dims.size()));
for (size_t i = 0; i < length; ++i) {
if (vars[i] == nullptr) {
continue;
}
SetDim(vars[i], dims[i]);
}
}
void InterpretercoreInferShapeContext::SetRepeatedDims(
const std::string& name, const std::vector<DDim>& dims) {
PADDLE_THROW(platform::errors::PreconditionNotMet(
"SetRepeatedDims method only can be used in compile time."));
}
std::vector<proto::VarType::Type> InterpretercoreInferShapeContext::GetVarTypes(
const std::vector<Variable*>& vars) const {
std::vector<proto::VarType::Type> retv;
retv.resize(vars.size());
std::transform(
vars.begin(), vars.end(), retv.begin(),
std::bind(std::mem_fn(&InterpretercoreInferShapeContext::GetVarType),
this, std::placeholders::_1));
return retv;
}
proto::VarType::Type InterpretercoreInferShapeContext::GetVarType(
Variable* var) const {
return ToVarType(var->Type());
}
const std::vector<Variable*>& InterpretercoreInferShapeContext::InputVars(
const std::string& name) const {
auto it = ctx_.inputs.find(name);
PADDLE_ENFORCE_NE(
it, ctx_.inputs.end(),
platform::errors::NotFound("Operator (%s) does not have the input (%s).",
op_.Type(), name));
return it->second;
}
const std::vector<Variable*>& InterpretercoreInferShapeContext::OutputVars(
const std::string& name) const {
auto it = ctx_.outputs.find(name);
PADDLE_ENFORCE_NE(
it, ctx_.outputs.end(),
platform::errors::NotFound(
"Operator (%s) does not have the outputs (%s).", op_.Type(), name));
return it->second;
}
VariableScope::VariableScope(Scope* scope) {
// for @EMPTY@ variable
var_list_.push_back(nullptr);
name2id_[kEmptyVarName] = 0;
vec_meta_info_.emplace_back(0, nullptr);
scope_ = scope;
PADDLE_ENFORCE_NE(
scope, nullptr,
platform::errors::PreconditionNotMet(
"You have passed a nullptr to construct VariableScope."));
listener_ = std::make_shared<VariableScopeListener>(this);
scope->AddListener(listener_);
}
VariableScope::~VariableScope() {
if (scope_ && listener_) {
scope_->DelListener(listener_);
}
}
const Scope* VariableScope::GetScope() const { return scope_; }
Variable* VariableScope::FindVar(const std::string& name) const {
auto it = name2id_.find(name);
if (it != name2id_.end()) {
PADDLE_ENFORCE_LT(it->second, var_list_.size(),
platform::errors::NotFound(
"The id(%d) of variable(%s) should not be larger "
"than the size of variable list(%d).",
it->second, name, var_list_.size()));
return var_list_[it->second];
}
return nullptr;
}
// Get variable id by name, return -1 if not found
int VariableScope::GetIdByName(const std::string& name) const {
auto it = name2id_.find(name);
if (it != name2id_.end()) {
return it->second;
}
return -1;
}
// Get variable name by id, return "" if not found
std::string VariableScope::GetNameById(int id) const {
// NOTE(zhiqiu): do not use vec_meta_info_[id].vardesc_->Name() since
// vec_meta_info_[id] may be nullptr,
// typically when the target variable is not existed in the original program
// desc, but created by interpretercore.
// For example, created and used by d2h_copy or h2d_copy operator.
auto it = std::find_if(name2id_.begin(), name2id_.end(),
[id](const auto& pair) { return pair.second == id; });
if (it != name2id_.end()) {
return it->first;
}
return "";
}
bool VariableScope::HasVar(const std::string& name) const {
return name2id_.find(name) != name2id_.end();
}
int VariableScope::VarId(const std::string& name) const {
CheckExist(name);
return name2id_.at(name);
}
Variable* VariableScope::Var(int id) const { return var_list_.at(id); }
Variable* VariableScope::Var(const std::string& name) const {
return var_list_.at(VarId(name));
}
size_t VariableScope::VarSize() const { return var_list_.size(); }
void VariableScope::AddVar(const std::string& name,
framework::VarDesc* var_desc) { // NOLINT
auto v = scope_->Var(name);
if (nullptr == var_desc) {
v->GetMutable<LoDTensor>();
} else {
InitializeVariable(
v,
var_desc
->GetType()); // Scope don't initialize variable recently created
}
SetVarDesc(name, var_desc);
}
void VariableScope::AddVar(const std::string& name,
const Variable& var) { // NOLINT
// Though name existed in outer_scope_, we need
// add again to create name2id map.
scope_->Var(name);
}
void VariableScope::SetVarDesc(const std::string& name,
framework::VarDesc* var_desc) {
CheckExist(name);
vec_meta_info_[VarId(name)].var_desc_ = var_desc;
}
paddle::framework::VarDesc* VariableScope::VarDesc(
const std::string& name) const {
return VarDesc(VarId(name));
}
paddle::framework::VarDesc* VariableScope::VarDesc(int id) const {
CheckExist(id);
return vec_meta_info_[id].var_desc_;
}
void VariableScope::CheckExist(int id) const {
PADDLE_ENFORCE_LT(id, var_list_.size(),
platform::errors::PreconditionNotMet(
"Required var_id < %d, but received var_id = %d.",
var_list_.size(), id));
}
void VariableScope::CheckExist(const std::string& name) const {
PADDLE_ENFORCE_EQ(HasVar(name), true, platform::errors::NotFound(
"%s not in VariableScope.", name));
}
VariableScopeListener::VariableScopeListener(VariableScope* var_scope) {
var_scope_ = var_scope;
}
void VariableScopeListener::onCreateVariable(const std::string& name) {
auto v = var_scope_->scope_->GetVar(name); // must exsit in outer_scope_
if (!var_scope_->HasVar(name)) { // may exist in variable scope.
VLOG(4) << "Calling VariableScope::onCreateVariable with var_name: "
<< name;
var_scope_->name2id_[name] = var_scope_->VarSize();
var_scope_->var_list_.emplace_back(v);
var_scope_->vec_meta_info_.emplace_back(0, nullptr);
}
}
void VariableScopeListener::onDeleteVariable(const std::string& name) {
if (var_scope_->HasVar(name)) {
VLOG(4) << "Calling VariableScope::onDeleteVariable with var_name: "
<< name;
}
}
void VariableScopeListener::onRenameVariable(const std::string& old_name,
const std::string& new_name) {}
void VariableScopeListener::onCreateScope(Scope* Scope) {}
void VariableScopeListener::onDeleteScope(Scope* Scope) {}
void VariableScopeListener::onClear() {}
} // namespace framework
} // namespace paddle
...@@ -19,10 +19,23 @@ ...@@ -19,10 +19,23 @@
#include <vector> #include <vector>
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/rw_lock.h"
#include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/platform/device_event_base.h" #include "paddle/fluid/platform/device_event_base.h"
#include "paddle/fluid/platform/event.h" #include "paddle/fluid/platform/event.h"
// When in inference scenario, the scopes will not be written by two threads in
// a mean time, but a scope may be read by multiple threads concurrently, and
// the mutex will cause serious performance issue.
// So the mutex is disabled when `ON_INFER`.
#ifdef PADDLE_ON_INFERENCE
#define SCOPE_VARS_READER_LOCK
#define SCOPE_VARS_WRITER_LOCK
#else
#define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_);
#define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_);
#endif
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -33,429 +46,90 @@ using OpKernelMap = ...@@ -33,429 +46,90 @@ using OpKernelMap =
class InterpretercoreInferShapeContext : public InferShapeContext { class InterpretercoreInferShapeContext : public InferShapeContext {
public: public:
InterpretercoreInferShapeContext(const OperatorBase& op, InterpretercoreInferShapeContext(const OperatorBase& op,
const RuntimeContext& ctx) const RuntimeContext& ctx);
: op_(op), ctx_(ctx), can_skip_lod_(false) {}
bool HasInput(const std::string& name) const override {
// has only one input
const auto& ins = ctx_.inputs;
auto it = ins.find(name);
if (it == ins.end()) {
return false;
}
const auto& in = it->second;
if (in.size() == 0) return false;
PADDLE_ENFORCE_EQ(
in.size(), 1UL,
platform::errors::InvalidArgument(
"Input %s should not contain more than one inputs.", name));
return in[0] != nullptr;
}
bool HasOutput(const std::string& name) const override { bool HasInput(const std::string& name) const override;
// has only one output
const auto& outs = ctx_.outputs;
auto it = outs.find(name);
if (it == outs.end()) {
return false;
}
const auto& out = it->second;
if (out.size() == 0) {
return false;
}
PADDLE_ENFORCE_EQ(
out.size(), 1UL,
platform::errors::InvalidArgument(
"Output %s should not contain more than one outputs.", name));
return out[0] != nullptr;
}
bool HasInputs(const std::string& name) const override { bool HasOutput(const std::string& name) const override;
const auto& ins = ctx_.inputs;
auto it = ins.find(name);
if (it == ins.end() || it->second.empty()) {
return false;
}
for (auto& input : it->second) {
if (input == nullptr) {
return false;
}
}
return true;
}
bool HasOutputs(const std::string& name) const override { bool HasInputs(const std::string& name) const override;
const auto& outs = ctx_.outputs;
auto it = outs.find(name);
if (it == outs.end() || it->second.empty()) {
return false;
}
for (auto& output : it->second) {
if (output == nullptr) {
return false;
}
}
return true;
}
AttrReader Attrs() const override { return AttrReader(op_.Attrs()); } bool HasOutputs(const std::string& name) const override;
std::vector<std::string> Inputs(const std::string& name) const override { AttrReader Attrs() const override;
return op_.Inputs(name);
}
std::vector<std::string> Outputs(const std::string& name) const override { std::vector<std::string> Inputs(const std::string& name) const override;
return op_.Outputs(name);
}
std::string GetInputNameByIdx(size_t idx) const override { std::vector<std::string> Outputs(const std::string& name) const override;
auto& op_proto =
paddle::framework::OpInfoMap::Instance().Get(op_.Type()).proto_;
PADDLE_ENFORCE_LT(idx, op_proto->inputs().size(),
platform::errors::OutOfRange(
"The index should be less than the size of inputs of "
"operator %s, but got index is %d and size is %d",
op_.Type(), idx, op_proto->inputs().size()));
return op_proto->inputs()[idx].name();
}
std::string GetOutputNameByIdx(size_t idx) const override { std::string GetInputNameByIdx(size_t idx) const override;
auto& op_proto =
paddle::framework::OpInfoMap::Instance().Get(op_.Type()).proto_; std::string GetOutputNameByIdx(size_t idx) const override;
PADDLE_ENFORCE_LT(
idx, op_proto->outputs().size(),
platform::errors::OutOfRange(
"The index should be less than the size of outputs of "
"operator %s, but got index is %d and size is %d",
op_.Type(), idx, op_proto->outputs().size()));
return op_proto->outputs()[idx].name();
}
void ShareDim(const std::string& in, const std::string& out, size_t i = 0, void ShareDim(const std::string& in, const std::string& out, size_t i = 0,
size_t j = 0) override { size_t j = 0) override;
auto in_it = ctx_.inputs.find(in);
auto out_it = ctx_.outputs.find(out);
PADDLE_ENFORCE_NE(
in_it, ctx_.inputs.end(),
platform::errors::NotFound("Input %s does not exist.", in));
PADDLE_ENFORCE_NE(
out_it, ctx_.outputs.end(),
platform::errors::NotFound("Output %s does not exist.", out));
PADDLE_ENFORCE_LT(i, in_it->second.size(),
platform::errors::InvalidArgument(
"The index of input dimension is out of range, "
"excepted index less than %zu, but received %zu.",
in_it->second.size(), i));
PADDLE_ENFORCE_LT(j, out_it->second.size(),
platform::errors::InvalidArgument(
"The index of output dimension is out of range, "
"excepted index less than %zu, but received %zu.",
out_it->second.size(), j));
Variable* in_var = in_it->second[i];
Variable* out_var = out_it->second[j];
PADDLE_ENFORCE_EQ(
in_var->Type(), out_var->Type(),
platform::errors::InvalidArgument(
"The type of input (%s) and output (%s) are inconsistent.", in,
out));
if (in_var->IsType<framework::SelectedRows>()) {
auto& in_sele_rows = in_var->Get<framework::SelectedRows>();
auto out_sele_rows = out_var->GetMutable<framework::SelectedRows>();
out_sele_rows->mutable_value()->Resize(in_sele_rows.value().dims());
out_sele_rows->set_rows(in_sele_rows.rows());
out_sele_rows->set_height(in_sele_rows.height());
} else if (in_var->IsType<framework::LoDTensor>()) {
auto& in_lod_tensor = in_var->Get<framework::LoDTensor>();
auto* out_lod_tensor = out_var->GetMutable<framework::LoDTensor>();
out_lod_tensor->Resize(in_lod_tensor.dims());
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Currently, the input type of ShareDim only can be LoDTensor "
"or SelectedRows."));
}
}
void ShareAllLoD(const std::string& in, void ShareAllLoD(const std::string& in,
const std::string& out) const override { const std::string& out) const override;
auto in_it = ctx_.inputs.find(in);
auto out_it = ctx_.outputs.find(out);
PADDLE_ENFORCE_NE(in_it, ctx_.inputs.end(),
platform::errors::NotFound(
"Input [%s] found error in Op [%s]", in, op_.Type()));
PADDLE_ENFORCE_NE(
out_it, ctx_.outputs.end(),
platform::errors::NotFound("Output [%s] found error in Op [%s]", out,
op_.Type()));
auto& in_var_list = in_it->second;
auto& out_var_list = out_it->second;
PADDLE_ENFORCE_EQ(
in_var_list.size(), out_var_list.size(),
platform::errors::PreconditionNotMet(
"Op [%s]: Input var size should be equal with output var size",
op_.Type()));
auto& out_var_names = op_.Outputs(out);
for (size_t i = 0; i < in_var_list.size(); ++i) {
if (out_var_names[i] == framework::kEmptyVarName) {
continue;
}
Variable* in_var = in_var_list[i];
if (!in_var->IsType<LoDTensor>()) return;
Variable* out_var = out_var_list[i];
PADDLE_ENFORCE_EQ(out_var->IsType<LoDTensor>(), true,
platform::errors::PreconditionNotMet(
"The %d-th output of Output(%s) must be LoDTensor.",
i, out_var_names[i]));
auto& in_tensor = in_var->Get<LoDTensor>();
auto* out_tensor = out_var->GetMutable<LoDTensor>();
out_tensor->set_lod(in_tensor.lod());
#ifdef PADDLE_WITH_MKLDNN
if (in_tensor.layout() != DataLayout::kMKLDNN)
#endif
out_tensor->set_layout(in_tensor.layout());
}
}
void ShareLoD(const std::string& in, const std::string& out, size_t i = 0, void ShareLoD(const std::string& in, const std::string& out, size_t i = 0,
size_t j = 0) const override { size_t j = 0) const override;
if (can_skip_lod_) {
return;
}
auto in_it = ctx_.inputs.find(in);
auto out_it = ctx_.outputs.find(out);
PADDLE_ENFORCE_NE(
in_it, ctx_.inputs.end(),
platform::errors::NotFound("Input %s does not exist.", in));
PADDLE_ENFORCE_NE(
out_it, ctx_.outputs.end(),
platform::errors::NotFound("Output %s does not exist.", out));
PADDLE_ENFORCE_LT(i, in_it->second.size(),
platform::errors::InvalidArgument(
"The index of input dimension is out of range, "
"excepted index less than %zu, but received %zu.",
in_it->second.size(), i));
PADDLE_ENFORCE_LT(j, out_it->second.size(),
platform::errors::InvalidArgument(
"The index of output dimension is out of range, "
"excepted index less than %zu, but received %zu.",
out_it->second.size(), j));
Variable* in_var = in_it->second.at(i);
if (!in_var->IsType<LoDTensor>()) return;
Variable* out_var = out_it->second.at(j);
PADDLE_ENFORCE_EQ(
out_var->IsType<LoDTensor>(), true,
platform::errors::InvalidArgument(
"The %zu-th output of Output(%s) must be LoDTensor.", j, out));
auto& in_tensor = in_var->Get<LoDTensor>();
auto* out_tensor = out_var->GetMutable<LoDTensor>();
out_tensor->set_lod(in_tensor.lod());
// TODO(dzhwinter) : reuse ShareLoD in most operators.
// Need to call ShareLayout explicitly in sequence related ops.
// Shall we have a better method to shared info between in/out Tensor?
#ifdef PADDLE_WITH_MKLDNN
// Fix me: ugly workaround below
// Correct solution:
// set_layout() should NOT be called here (i.e. ShareLoD). Instead,
// layout of output tensor should be set "manually" in Compute()
// of each OPKernel. The reason layout should NOT be shared between
// input and output "automatically" (now by InferShape()->ShareLoD())
// is that layout transform may occur after InferShape().
// Workaround:
// Skip set_layout() when input layout is kMKLDNN
// This is to avoid kMKLDNN is populated wrongly into a non-MKLDNN
// OPKernel. In all MKLDNN OPkernel, set_layout(kMKLDNN) should be called
// in Compute()
if (in_tensor.layout() != DataLayout::kMKLDNN)
#endif
out_tensor->set_layout(in_tensor.layout());
}
int32_t GetLoDLevel(const std::string& in, size_t i = 0) const override { int32_t GetLoDLevel(const std::string& in, size_t i = 0) const override;
PADDLE_THROW(platform::errors::PreconditionNotMet(
"GetLoDLevel is only used in compile time. The calculation of "
"output's actual lod is different among operators so that should be "
"set in the runtime kernel."));
}
void SetLoDLevel(const std::string& out, int32_t lod_level, void SetLoDLevel(const std::string& out, int32_t lod_level,
size_t j = 0) const override { size_t j = 0) const override;
PADDLE_THROW(platform::errors::PreconditionNotMet(
"SetLoDLevel is only used in compile time. The calculation of "
"output's actual lod is different among operators so that should be "
"set in the runtime kernel."));
}
bool IsRuntime() const override { return true; } bool IsRuntime() const override;
// TODO(paddle-dev): Can this be template? // TODO(paddle-dev): Can this be template?
std::vector<InferShapeVarPtr> GetInputVarPtrs( std::vector<InferShapeVarPtr> GetInputVarPtrs(
const std::string& name) override { const std::string& name) override;
const std::vector<Variable*>& vars = InputVars(name);
std::vector<InferShapeVarPtr> res;
res.reserve(vars.size());
res.insert(res.begin(), vars.begin(), vars.end());
return res;
}
std::vector<InferShapeVarPtr> GetOutputVarPtrs( std::vector<InferShapeVarPtr> GetOutputVarPtrs(
const std::string& name) override { const std::string& name) override;
const std::vector<Variable*>& vars = OutputVars(name);
std::vector<InferShapeVarPtr> res;
res.reserve(vars.size());
res.insert(res.begin(), vars.begin(), vars.end());
return res;
}
DDim GetInputDim(const std::string& name) const override { DDim GetInputDim(const std::string& name) const override;
const std::vector<Variable*>& vars = InputVars(name);
PADDLE_ENFORCE_EQ(
vars.size(), 1UL,
platform::errors::InvalidArgument(
"Input(%s) should hold one element, but now it holds %zu elements.",
name, vars.size()));
return this->GetDim(vars[0]);
}
std::vector<DDim> GetInputsDim(const std::string& name) const override { std::vector<DDim> GetInputsDim(const std::string& name) const override;
const std::vector<Variable*>& vars = InputVars(name);
return GetDims(vars);
}
std::vector<proto::VarType::Type> GetInputsVarType( std::vector<proto::VarType::Type> GetInputsVarType(
const std::string& name) const override { const std::string& name) const override;
return GetVarTypes(InputVars(name));
}
std::vector<proto::VarType::Type> GetOutputsVarType( std::vector<proto::VarType::Type> GetOutputsVarType(
const std::string& name) const override { const std::string& name) const override;
return GetVarTypes(OutputVars(name));
}
void SetOutputDim(const std::string& name, const DDim& dim) override { void SetOutputDim(const std::string& name, const DDim& dim) override;
auto& vars = OutputVars(name);
PADDLE_ENFORCE_EQ(
vars.size(), 1UL,
platform::errors::InvalidArgument("Output(%s) should hold one element, "
"but now it holds %zu elements.",
name, vars.size()));
SetDim(vars[0], dim);
}
void SetOutputsDim(const std::string& name, void SetOutputsDim(const std::string& name,
const std::vector<DDim>& dims) override { const std::vector<DDim>& dims) override;
auto& vars = OutputVars(name);
SetDims(vars, dims);
}
void SetSkipLoD(bool skip) { can_skip_lod_ = skip; } void SetSkipLoD(bool skip);
protected: protected:
DDim GetDim(Variable* var) const { DDim GetDim(Variable* var) const;
PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::InvalidArgument("Input variable is nullptr."));
if (var->IsType<LoDTensor>()) {
return var->Get<LoDTensor>().dims();
} else if (var->IsType<SelectedRows>()) {
return var->Get<SelectedRows>().GetCompleteDims();
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Only LoDTensor or SelectedRows support 'GetDim', but input "
"Variable's type is %s.",
ToTypeName(var->Type())));
}
}
std::vector<DDim> GetDims(const std::vector<Variable*>& vars) const { std::vector<DDim> GetDims(const std::vector<Variable*>& vars) const;
std::vector<DDim> ret;
ret.reserve(vars.size());
std::transform(vars.begin(), vars.end(), std::back_inserter(ret),
[this](Variable* var) { return this->GetDim(var); });
return ret;
}
std::vector<DDim> GetRepeatedDims(const std::string& name) const override { std::vector<DDim> GetRepeatedDims(const std::string& name) const override;
PADDLE_THROW(platform::errors::PreconditionNotMet(
"GetRepeatedDims method only ban be used in compile time."));
}
void SetDim(Variable* var, const DDim& dim) { void SetDim(Variable* var, const DDim& dim);
if (var->IsType<LoDTensor>()) {
var->GetMutable<LoDTensor>()->Resize(dim);
} else if (var->IsType<SelectedRows>()) {
var->GetMutable<SelectedRows>()->set_height(dim[0]);
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Variable type error, expect LoDTensor or SelectedRows, but received "
"(%s).",
ToTypeName(var->Type())));
}
}
void SetDims(const std::vector<Variable*>& vars, void SetDims(const std::vector<Variable*>& vars,
const std::vector<DDim>& dims) { const std::vector<DDim>& dims);
size_t length = vars.size();
PADDLE_ENFORCE_EQ(length, dims.size(),
platform::errors::InvalidArgument(
"The number of input variables do not match the "
"number of input dimensions, the number of variables "
"is %zu, the number of dimensions is %zu.",
length, dims.size()));
for (size_t i = 0; i < length; ++i) {
if (vars[i] == nullptr) {
continue;
}
SetDim(vars[i], dims[i]);
}
}
void SetRepeatedDims(const std::string& name, void SetRepeatedDims(const std::string& name,
const std::vector<DDim>& dims) override { const std::vector<DDim>& dims) override;
PADDLE_THROW(platform::errors::PreconditionNotMet(
"SetRepeatedDims method only can be used in compile time."));
}
std::vector<proto::VarType::Type> GetVarTypes( std::vector<proto::VarType::Type> GetVarTypes(
const std::vector<Variable*>& vars) const { const std::vector<Variable*>& vars) const;
std::vector<proto::VarType::Type> retv;
retv.resize(vars.size());
std::transform(
vars.begin(), vars.end(), retv.begin(),
std::bind(std::mem_fn(&InterpretercoreInferShapeContext::GetVarType),
this, std::placeholders::_1));
return retv;
}
proto::VarType::Type GetVarType(Variable* var) const { proto::VarType::Type GetVarType(Variable* var) const;
return ToVarType(var->Type());
}
private: private:
const std::vector<Variable*>& InputVars(const std::string& name) const { const std::vector<Variable*>& InputVars(const std::string& name) const;
auto it = ctx_.inputs.find(name);
PADDLE_ENFORCE_NE(
it, ctx_.inputs.end(),
platform::errors::NotFound(
"Operator (%s) does not have the input (%s).", op_.Type(), name));
return it->second;
}
const std::vector<Variable*>& OutputVars(const std::string& name) const { const std::vector<Variable*>& OutputVars(const std::string& name) const;
auto it = ctx_.outputs.find(name);
PADDLE_ENFORCE_NE(
it, ctx_.outputs.end(),
platform::errors::NotFound(
"Operator (%s) does not have the outputs (%s).", op_.Type(), name));
return it->second;
}
const OperatorBase& op_; const OperatorBase& op_;
const RuntimeContext& ctx_; const RuntimeContext& ctx_;
...@@ -467,8 +141,28 @@ struct OpKernelFunc { ...@@ -467,8 +141,28 @@ struct OpKernelFunc {
}; };
struct VariableMetaInfo { struct VariableMetaInfo {
int var_ref_count_; int var_ref_count_{0};
paddle::framework::VarDesc* vardesc_; framework::VarDesc* var_desc_{nullptr};
VariableMetaInfo() {}
VariableMetaInfo(int var_ref_count, framework::VarDesc* var_desc)
: var_ref_count_(var_ref_count), var_desc_(var_desc) {}
};
class VariableScope;
class VariableScopeListener : public ScopeListener {
public:
explicit VariableScopeListener(VariableScope* var_scope_);
void onCreateVariable(const std::string& name) override;
void onDeleteVariable(const std::string& name) override;
void onRenameVariable(const std::string& old_name,
const std::string& new_name) override;
void onCreateScope(Scope* Scope) override;
void onDeleteScope(Scope* Scope) override;
void onClear() override;
private:
VariableScope* var_scope_; // not owned
}; };
// TODO(zhiqiu): Maybe we need to add rwlock for VariableScope? // TODO(zhiqiu): Maybe we need to add rwlock for VariableScope?
...@@ -477,171 +171,61 @@ struct VariableMetaInfo { ...@@ -477,171 +171,61 @@ struct VariableMetaInfo {
// ScopeBase. Scope manager the variables and VariableScope is just a quick // ScopeBase. Scope manager the variables and VariableScope is just a quick
// access machanism. ScopeListener is the callback to sync changes in Original // access machanism. ScopeListener is the callback to sync changes in Original
// Scope. We can make it a membership of VariableScope. Here we use inherent. // Scope. We can make it a membership of VariableScope. Here we use inherent.
class VariableScope : public ScopeBase, public ScopeListener { class VariableScope : public ScopeBase {
public: public:
VariableScope(Scope* outer_scope) { explicit VariableScope(Scope* scope);
// for @EMPTY@ variable
var_list_.push_back(nullptr);
name2id_[kEmptyVarName] = 0;
VariableMetaInfo info;
info.var_ref_count_ = 0;
info.vardesc_ = nullptr;
vec_meta_info_.push_back(info);
outer_scope_ = outer_scope;
PADDLE_ENFORCE_NE(
outer_scope_, nullptr,
platform::errors::PreconditionNotMet(
"You have passed a nullptr to construct VariableScope."));
outer_scope->AddListener(this);
}
~VariableScope() { const Scope* GetScope() const;
if (outer_scope_ != nullptr) outer_scope_->DelListener(this);
}
const Scope* GetScope() const { return outer_scope_; } Variable* FindVar(const std::string& name) const;
Variable* FindVar(const std::string& name) const { ~VariableScope();
auto it = name2id_.find(name);
if (it != name2id_.end()) {
PADDLE_ENFORCE_LT(it->second, var_list_.size(),
platform::errors::NotFound(
"The id(%d) of variable(%s) should not be larger "
"than the size of variable list(%d).",
it->second, name, var_list_.size()));
return var_list_[it->second];
}
return nullptr;
}
// Get variable id by name, return -1 if not found // Get variable id by name, return -1 if not found
int GetIdByName(const std::string& name) const { int GetIdByName(const std::string& name) const;
auto it = name2id_.find(name);
if (it != name2id_.end()) {
return it->second;
}
return -1;
}
// Get variable name by id, return "" if not found // Get variable name by id, return "" if not found
std::string GetNameById(int id) const { std::string GetNameById(int id) const;
// NOTE(zhiqiu): do not use vec_meta_info_[id].vardesc_->Name() since
// vec_meta_info_[id] may be nullptr,
// typically when the target variable is not existed in the original program
// desc, but created by interpretercore.
// For example, created and used by d2h_copy or h2d_copy operator.
auto it =
std::find_if(name2id_.begin(), name2id_.end(),
[id](const auto& pair) { return pair.second == id; });
if (it != name2id_.end()) {
return it->first;
}
return "";
}
bool HasVar(const std::string& name) const { bool HasVar(const std::string& name) const;
return name2id_.find(name) != name2id_.end();
}
int VarId(const std::string& name) const { int VarId(const std::string& name) const;
CheckExist(name);
return name2id_.at(name);
}
Variable* Var(int id) const { return var_list_.at(id); } Variable* Var(int id) const;
Variable* Var(const std::string& name) const { Variable* Var(const std::string& name) const;
return var_list_.at(VarId(name));
}
size_t VarSize() const { return var_list_.size(); } size_t VarSize() const;
void AddVar(const std::string& name, VarDesc* var_desc) { // NOLINT
// AddVar -> Scope::Var -> onCreateVariable.
VLOG(4) << "Add variable: " << name << " through AddVar()";
auto v = outer_scope_->Var(name);
if (nullptr == var_desc) {
v->GetMutable<LoDTensor>();
} else {
InitializeVariable(
v,
var_desc
->GetType()); // Scope don't initialize variable recently created
}
SetVarDesc(name, var_desc);
}
void AddVar(const std::string& name, Variable& var) { // NOLINT void AddVar(const std::string& name, VarDesc* var_desc);
// Though name existed in outer_scope_, we need
// add again to create name2id map.
outer_scope_->Var(name);
}
void SetVarDesc(const std::string& name, framework::VarDesc* var_desc) { void AddVar(const std::string& name, const Variable& var);
CheckExist(name);
vec_meta_info_[VarId(name)].vardesc_ = var_desc;
}
paddle::framework::VarDesc* VarDesc(const std::string& name) const { void SetVarDesc(const std::string& name, framework::VarDesc* var_desc);
return VarDesc(VarId(name));
}
paddle::framework::VarDesc* VarDesc(int id) const { paddle::framework::VarDesc* VarDesc(const std::string& name) const;
CheckExist(id);
return vec_meta_info_[id].vardesc_;
}
void CheckExist(int id) const { paddle::framework::VarDesc* VarDesc(int id) const;
PADDLE_ENFORCE_LT(id, var_list_.size(),
platform::errors::PreconditionNotMet(
"Required var_id < %d, but received var_id = %d.",
var_list_.size(), id));
}
void CheckExist(const std::string& name) const { void CheckExist(int id) const;
PADDLE_ENFORCE_EQ(
HasVar(name), true, void CheckExist(const std::string& name) const;
platform::errors::NotFound("%s not in VariableScope.", name));
}
public: // callbacks from ScopeListener class
void onCreateVariable(const std::string& name) override {
auto v = outer_scope_->GetVar(name); // must exsit in outer_scope_
if (!HasVar(name)) { // may exist in variable scope.
VLOG(4) << "Calling VariableScope::onCreateVariable with var_name: "
<< name;
name2id_[name] = VarSize();
var_list_.push_back(v);
VariableMetaInfo info;
info.var_ref_count_ = 0;
info.vardesc_ = nullptr; // set nullptr, then modifty it in AddVar()
vec_meta_info_.push_back(info);
}
}
void onDeleteVariable(const std::string& name) override {
if (HasVar(name)) {
VLOG(4) << "Calling VariableScope::onDeleteVariable with var_name: "
<< name;
}
}
void onRenameVariable(const std::string& old_name,
const std::string& new_name) override {}
void onCreateScope(Scope* Scope) override {}
void onDeleteScope(Scope* Scope) override {}
void onClear() override {}
std::vector<VariableMetaInfo>& MutableVecMetaInfo() { return vec_meta_info_; } std::vector<VariableMetaInfo>& MutableVecMetaInfo() { return vec_meta_info_; }
const std::vector<VariableMetaInfo>& VecMetaInfo() const { const std::vector<VariableMetaInfo>& VecMetaInfo() const {
return vec_meta_info_; return vec_meta_info_;
} }
friend class VariableScopeListener;
private: private:
std::vector<Variable*> var_list_; std::vector<Variable*> var_list_;
std::map<std::string, int> name2id_; std::map<std::string, int> name2id_;
std::vector<VariableMetaInfo> vec_meta_info_; std::vector<VariableMetaInfo> vec_meta_info_;
Scope* outer_scope_ = nullptr; Scope* scope_ = nullptr;
// mutable RWLock vars_lock_;
std::shared_ptr<VariableScopeListener> listener_;
}; };
class NextInstruction { class NextInstruction {
......
...@@ -23,16 +23,14 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place, ...@@ -23,16 +23,14 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
: place_(place), : place_(place),
startup_prog_(startup_prog), startup_prog_(startup_prog),
main_prog_(main_prog), main_prog_(main_prog),
outer_scope_(scope), global_scope_(VariableScope(scope)) {
global_scope_(scope) {
paddle::framework::InitDevices();
// init scope // init scope
BuildVariableOuterScope(startup_prog, &global_scope_, scope); BuildVariableScope(startup_prog, &global_scope_);
if (outer_scope_ != nullptr) { if (scope != nullptr) {
auto name_list = outer_scope_->LocalVarNames(); auto name_list = scope->LocalVarNames();
for (auto name : name_list) { for (auto name : name_list) {
auto v = outer_scope_->Var(name); auto v = scope->Var(name);
if (!global_scope_.HasVar(name)) { if (!global_scope_.HasVar(name)) {
global_scope_.AddVar(name, *v); global_scope_.AddVar(name, *v);
} }
...@@ -62,9 +60,8 @@ framework::interpreter::CostInfo StandaloneExecutor::DryRun( ...@@ -62,9 +60,8 @@ framework::interpreter::CostInfo StandaloneExecutor::DryRun(
return core->DryRun(feed_names, feed_tensors); return core->DryRun(feed_names, feed_tensors);
} }
void StandaloneExecutor::BuildVariableOuterScope( void StandaloneExecutor::BuildVariableScope(const framework::ProgramDesc& pdesc,
const framework::ProgramDesc& pdesc, VariableScope* var_scope, VariableScope* var_scope) {
Scope* outer_scope) {
auto& global_block = pdesc.Block(0); auto& global_block = pdesc.Block(0);
for (auto& var : global_block.AllVars()) { for (auto& var : global_block.AllVars()) {
......
...@@ -50,8 +50,8 @@ class StandaloneExecutor : public ExecutorBase { ...@@ -50,8 +50,8 @@ class StandaloneExecutor : public ExecutorBase {
const std::vector<framework::LoDTensor>& feed_tensors); const std::vector<framework::LoDTensor>& feed_tensors);
private: private:
void BuildVariableOuterScope(const framework::ProgramDesc& pdesc, void BuildVariableScope(const framework::ProgramDesc& pdesc,
VariableScope* var_scope, Scope* outer_scope); VariableScope* var_scope);
std::shared_ptr<InterpreterCore> GetInterpreterCore( std::shared_ptr<InterpreterCore> GetInterpreterCore(
const std::vector<std::string>& feed_names, const std::vector<std::string>& feed_names,
...@@ -60,7 +60,6 @@ class StandaloneExecutor : public ExecutorBase { ...@@ -60,7 +60,6 @@ class StandaloneExecutor : public ExecutorBase {
const platform::Place& place_; const platform::Place& place_;
const ProgramDesc& startup_prog_; const ProgramDesc& startup_prog_;
const ProgramDesc& main_prog_; const ProgramDesc& main_prog_;
Scope* outer_scope_;
VariableScope global_scope_; VariableScope global_scope_;
std::unordered_map<std::string, std::shared_ptr<ProgramDesc>> programs_; std::unordered_map<std::string, std::shared_ptr<ProgramDesc>> programs_;
......
...@@ -75,6 +75,7 @@ paddle::framework::ProgramDesc load_from_file(const std::string& file_name) { ...@@ -75,6 +75,7 @@ paddle::framework::ProgramDesc load_from_file(const std::string& file_name) {
} }
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
paddle::framework::InitDevices();
std::cout << "main" << std::endl; std::cout << "main" << std::endl;
int64_t batch_size = std::stoi(argv[1]); int64_t batch_size = std::stoi(argv[1]);
paddle::framework::InitDevices(); paddle::framework::InitDevices();
......
...@@ -266,14 +266,14 @@ Variable* Scope::FindVarLocally(const std::string& name) const { ...@@ -266,14 +266,14 @@ Variable* Scope::FindVarLocally(const std::string& name) const {
return nullptr; return nullptr;
} }
void Scope::AddListener(ScopeListener* listener) { void Scope::AddListener(const std::shared_ptr<ScopeListener>& listener) {
auto it = std::find(listeners_.begin(), listeners_.end(), listener); auto it = std::find(listeners_.begin(), listeners_.end(), listener);
if (it == listeners_.end()) { if (it == listeners_.end()) {
listeners_.push_back(listener); listeners_.push_back(listener);
} }
} }
void Scope::DelListener(ScopeListener* listener) { void Scope::DelListener(const std::shared_ptr<ScopeListener>& listener) {
listeners_.remove(listener); listeners_.remove(listener);
} }
......
...@@ -144,9 +144,9 @@ class Scope : public ScopeBase { ...@@ -144,9 +144,9 @@ class Scope : public ScopeBase {
// Rename variable to a new name and return the new name // Rename variable to a new name and return the new name
std::string Rename(const std::string& origin_name) const; std::string Rename(const std::string& origin_name) const;
void AddListener(ScopeListener* listener); void AddListener(const std::shared_ptr<ScopeListener>& listener);
void DelListener(ScopeListener* listener); void DelListener(const std::shared_ptr<ScopeListener>& listener);
protected: protected:
struct KeyHasher { struct KeyHasher {
...@@ -184,7 +184,7 @@ class Scope : public ScopeBase { ...@@ -184,7 +184,7 @@ class Scope : public ScopeBase {
// Scope in `kids_` are owned by this class. // Scope in `kids_` are owned by this class.
mutable std::list<Scope*> kids_; mutable std::list<Scope*> kids_;
const Scope* parent_{nullptr}; const Scope* parent_{nullptr};
std::list<ScopeListener*> listeners_; std::list<std::shared_ptr<ScopeListener>> listeners_;
DISABLE_COPY_AND_ASSIGN(Scope); DISABLE_COPY_AND_ASSIGN(Scope);
......
...@@ -275,7 +275,7 @@ class TestException(unittest.TestCase): ...@@ -275,7 +275,7 @@ class TestException(unittest.TestCase):
for feed in feeds: for feed in feeds:
out = exe.run(main_program, feed=feed, fetch_list=fetch_vars) out = exe.run(main_program, feed=feed, fetch_list=fetch_vars)
print(out) print(main_program)
return out return out
def run_new_executor(self, feed): def run_new_executor(self, feed):
...@@ -287,10 +287,10 @@ class TestException(unittest.TestCase): ...@@ -287,10 +287,10 @@ class TestException(unittest.TestCase):
def test_exception(self): def test_exception(self):
feed = [{ feed = [{
'id': np.array([1, 2, 3, 4, 5]).astype(np.int64), 'id': np.array([1, 2, 3, 4, 5]).astype(np.int64),
'data': np.array([1, 2, 3, 4]).astype(np.float32), 'data': np.array([1, 2, 3]).astype(np.float32),
}, { }, {
'id': np.array([1, 2, 3, 4, 11]).astype(np.int64), 'id': np.array([1, 2, 3, 4, 11]).astype(np.int64),
'data': np.array([1, 2, 3, 4]).astype(np.float32), 'data': np.array([1, 2, 3]).astype(np.float32),
}] }]
self.assertRaises(ValueError, self.run_new_executor, feed) self.assertRaises(ValueError, self.run_new_executor, feed)
...@@ -307,6 +307,18 @@ class TestException(unittest.TestCase): ...@@ -307,6 +307,18 @@ class TestException(unittest.TestCase):
feed[1]['data'][0] = np.nan feed[1]['data'][0] = np.nan
self.assertRaises(RuntimeError, self.run_new_executor, feed) self.assertRaises(RuntimeError, self.run_new_executor, feed)
def test_scope(self):
feed = [{
'id': np.array([1, 2, 3, 4, 5]).astype(np.int64),
'data': np.array([1, 2, 3]).astype(np.float32),
}, {
'id': np.array([1, 2, 3, 4, 5]).astype(np.int64),
'data': np.array([2, 2, 2]).astype(np.float32),
}]
self.run_new_executor(feed)
self.assertIsNotNone(paddle.static.global_scope().find_var(
'embedding.tmp_2'))
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册