未验证 提交 fd192303 编写于 作者: Z zhangbo9674 提交者: GitHub

[IR] Add Dependency build for new ir interpretercore (#55468)

* add interface

* add code

* add code

* add code

* add code

* fix bug

* fix bug
上级 4a55f5e7
......@@ -84,12 +84,12 @@ void InstructionBase::AddInplace(Variable* in, Variable* out) {
void InstructionBase::ClearInplace() { vec_inplace_in_to_out_.clear(); }
void InstructionBase::SetInputs(
const std::map<std::string, std::vector<int>>& inputs) {
const std::unordered_map<ir::Value, std::vector<int>>& inputs) {
input_index_ = inputs;
}
void InstructionBase::SetOutputs(
const std::map<std::string, std::vector<int>>& outputs) {
const std::unordered_map<ir::Value, std::vector<int>>& outputs) {
output_index_ = outputs;
}
......
......@@ -22,6 +22,10 @@
#include "paddle/fluid/framework/new_executor/new_executor_defs.h"
#include "paddle/fluid/platform/event.h"
namespace ir {
class Value;
} // namespace ir
namespace paddle {
namespace framework {
......@@ -103,24 +107,27 @@ class InstructionBase {
std::map<int, int>& GetMutableInplaceBackMap() { return inplace_back_map_; }
const std::map<int, int>& GetInplaceBackMap() { return inplace_back_map_; }
const std::map<std::string, std::vector<int>>& Inputs() const {
const std::unordered_map<ir::Value, std::vector<int>>& Inputs() const {
return input_index_;
}
std::map<std::string, std::vector<int>>& GetMutableInputs() {
std::unordered_map<ir::Value, std::vector<int>>& GetMutableInputs() {
return input_index_;
}
void SetInputs(const std::map<std::string, std::vector<int>>& inputs);
void SetInputs(const std::unordered_map<ir::Value, std::vector<int>>& inputs);
const std::map<std::string, std::vector<int>>& Outputs() const {
const std::unordered_map<ir::Value, std::vector<int>>& Outputs() const {
return output_index_;
}
std::map<std::string, std::vector<int>>& GetMutableOutputs() {
std::unordered_map<ir::Value, std::vector<int>>& GetMutableOutputs() {
return output_index_;
}
void SetOutputs(const std::map<std::string, std::vector<int>>& outputs);
void SetOutputs(
const std::unordered_map<ir::Value, std::vector<int>>& outputs);
virtual void Run() = 0;
virtual const std::string& Name() const = 0;
private:
size_t id_;
......@@ -130,25 +137,31 @@ class InstructionBase {
// dist attrs:lower value, higher priority
int stream_priority_{0};
SchedulingPriority scheduling_priority_{0};
std::string execution_stream_{kDefaultStream};
platform::DeviceContext* dev_ctx_; // not owned
std::vector<size_t> next_instrs_in_different_thread_;
std::vector<size_t> next_instrs_in_same_thread_;
std::shared_ptr<EventInter> event_to_record_;
std::vector<EventInter> events_to_wait_;
std::vector<size_t> gc_check_vars_;
std::vector<std::pair<Variable*, Variable*>>
vec_inplace_in_to_out_; // If not use share data, need this ?
std::map<int, int> inplace_back_map_;
std::map<std::string, std::vector<int>> input_index_;
std::map<std::string, std::vector<int>> output_index_;
std::unordered_map<ir::Value, std::vector<int>> input_index_;
std::unordered_map<ir::Value, std::vector<int>> output_index_;
};
} // namespace framework
......
......@@ -73,7 +73,10 @@ PhiKernelInstruction::PhiKernelInstruction(
ir::Operation* op,
Scope* scope,
Scope* local_scope,
const std::unordered_map<::ir::Value, std::string>& value_2_name_map)
const std::unordered_map<::ir::Value, std::string>& value_2_var_name,
const std::map<std::string, int>& var_name_2_id,
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name)
: InstructionBase(id, place) {
auto op_attributes = op->attributes();
auto op_name =
......@@ -81,14 +84,7 @@ PhiKernelInstruction::PhiKernelInstruction(
ir::OpInfo op_info = ir::IrContext::Instance()->GetRegisteredOpInfo(op_name);
phi_op_name_ = op_name;
if (op_name == "builtin.combine" || op_name == "pd.feed" ||
op_name == "builtin.set_parameter" ||
op_name == "builtin.get_parameter") {
VLOG(6) << "skip process " << op_name;
SetArtificial(true);
return;
}
VLOG(6) << "construct phi kernel instruction for: " << phi_op_name_;
// Todo: support paddle::dialect::DistAttribute
// if (op_attributes.count("dist_attr") != 0) {
......@@ -117,15 +113,24 @@ PhiKernelInstruction::PhiKernelInstruction(
// op_func_node.scheduling_priority_ = 1;
// }
// }
VLOG(6) << "finish process dist attributes";
SetKernelType(AnalyseOpFuncType(op, place));
VLOG(6) << "finish process analyse kernel type";
infer_meta_interface_ =
op_info.GetInterfaceImpl<paddle::dialect::InferMetaInterface>();
VLOG(6) << "finish process infer_meta_interface_";
auto yaml_interface =
op_info.GetInterfaceImpl<paddle::dialect::OpYamlInfoInterface>();
PADDLE_ENFORCE_NOT_NULL(
yaml_interface,
phi::errors::PreconditionNotMet(
"can not find OpYamlInfoInterface from [%s]", phi_op_name_));
paddle::dialect::OpYamlInfoParser yaml_info_parser(
yaml_interface->get_op_info_());
VLOG(6) << "finish process yaml_info_parser";
::ir::BuildPhiContext<
phi::InferMetaContext,
......@@ -134,7 +139,7 @@ PhiKernelInstruction::PhiKernelInstruction(
paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
false>(op,
value_2_name_map,
value_2_var_name,
scope,
local_scope,
yaml_info_parser,
......@@ -159,13 +164,11 @@ PhiKernelInstruction::PhiKernelInstruction(
paddle::small_vector<const phi::TensorBase*>,
paddle::small_vector<phi::TensorBase*>,
true>(op,
value_2_name_map,
value_2_var_name,
scope,
local_scope,
yaml_info_parser,
&kernel_context_,
&(GetMutableInputs()),
&(GetMutableOutputs()));
&kernel_context_);
kernel_context_.SetDeviceContext(phi::DeviceContextPool::Instance().Get(
phi::TransToPhiPlace(kernel_key.backend())));
VLOG(6) << "finish process kernel context";
......@@ -173,13 +176,90 @@ PhiKernelInstruction::PhiKernelInstruction(
SetDeviceContext(phi::DeviceContextPool::Instance().Get(
phi::TransToPhiPlace(kernel_key.backend())));
VLOG(6) << "finish process device context";
Scope* inner_scope = local_scope == nullptr ? scope : local_scope;
InitInputsOutputsIds(
op, inner_scope, value_2_var_name, var_name_2_id, variable_2_var_name);
VLOG(6) << "finish process inputs outputs index";
}
std::vector<int> GetValueIds(
ir::Value value,
Scope* inner_scope,
const std::unordered_map<::ir::Value, std::string>& value_2_var_name,
const std::map<std::string, int>& var_name_2_id,
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name) {
std::vector<int> ids;
std::string var_name = value_2_var_name.at(value);
ids.push_back(var_name_2_id.at(var_name));
// NOTE(zhangbo): Value maybe a VariableRefArray
auto var = inner_scope->FindVar(var_name);
if (var->IsType<paddle::framework::VariableRefArray>()) {
auto& var_array = var->Get<paddle::framework::VariableRefArray>();
for (size_t i = 0; i < var_array.size(); ++i) {
ids.push_back(var_name_2_id.at(variable_2_var_name.at(var_array[i])));
}
}
return ids;
}
void PhiKernelInstruction::InitInputsOutputsIds(
::ir::Operation* op,
Scope* inner_scope,
const std::unordered_map<::ir::Value, std::string>& value_2_var_name,
const std::map<std::string, int>& var_name_2_id,
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name) {
std::unordered_map<ir::Value, std::vector<int>> inputs;
for (size_t i = 0; i < op->num_operands(); i++) {
ir::Value value = op->operand(i);
if (value) {
PADDLE_ENFORCE_NE(
value_2_var_name.find(value),
value_2_var_name.end(),
phi::errors::PreconditionNotMet(
"input should in name map, [%d] 'th input of [%s] op",
i,
phi_op_name_));
std::vector<int> inputs_id = GetValueIds(value,
inner_scope,
value_2_var_name,
var_name_2_id,
variable_2_var_name);
inputs.emplace(value, inputs_id);
}
}
SetInputs(inputs);
VLOG(8) << "finish process inputs_index";
std::unordered_map<ir::Value, std::vector<int>> outputs;
for (size_t i = 0; i < op->num_results(); i++) {
ir::Value value = op->result(i);
if (value) {
PADDLE_ENFORCE_NE(
value_2_var_name.find(value),
value_2_var_name.end(),
phi::errors::PreconditionNotMet(
"input should in name map, [%d] 'th input of [%s] op",
i,
phi_op_name_));
std::vector<int> outputs_id = GetValueIds(value,
inner_scope,
value_2_var_name,
var_name_2_id,
variable_2_var_name);
outputs.emplace(value, outputs_id);
}
}
SetOutputs(outputs);
VLOG(8) << "finish process outputs_index";
}
void PhiKernelInstruction::Run() {
VLOG(5) << "Run op " << phi_op_name_ << " infer meta.";
infer_meta_interface_->infer_meta_(&(infer_meta_context_));
VLOG(5) << "Run op " << phi_op_name_ << " kernel.";
VLOG(6) << "Run op " << phi_op_name_ << " infer meta.";
(*(phi_kernel_))(&(kernel_context_));
VLOG(6) << "Run op " << phi_op_name_ << " kernel.";
}
} // namespace framework
......
......@@ -33,9 +33,10 @@ class PhiKernelInstruction : public InstructionBase {
::ir::Operation* op,
Scope* scope,
Scope* local_scope,
const std::unordered_map<::ir::Value, std::string>& value_2_name_map);
const std::string& PhiOpName() const { return phi_op_name_; }
const std::unordered_map<::ir::Value, std::string>& value_2_var_name,
const std::map<std::string, int>& var_name_2_id,
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name);
phi::Kernel* PhiKernel() const { return phi_kernel_; }
......@@ -51,7 +52,17 @@ class PhiKernelInstruction : public InstructionBase {
void Run() override;
const std::string& Name() const override { return phi_op_name_; }
private:
void InitInputsOutputsIds(
::ir::Operation* op,
Scope* inner_scope,
const std::unordered_map<::ir::Value, std::string>& value_2_var_name,
const std::map<std::string, int>& var_name_2_id,
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name);
std::string phi_op_name_;
paddle::dialect::InferMetaInterface::Concept* infer_meta_interface_{
......
......@@ -15,6 +15,7 @@
#include "paddle/fluid/framework/new_executor/interpreter/dependency_builder.h"
#include <queue>
#include "paddle/fluid/framework/new_executor/instruction/instruction_base.h"
#include "paddle/fluid/framework/new_executor/interpreter/interpreter_util.h"
#include "paddle/fluid/platform/flags.h"
PADDLE_DEFINE_EXPORTED_bool(
......@@ -527,6 +528,247 @@ void DependencyBuilder::ShrinkDownstreamMap() {
<< StringizeDownstreamMap(op_downstream_map_);
}
/// ======================== ///
/// For new ir ///
/// ======================== ///
const std::map<size_t, std::set<size_t>>& IrDependencyBuilder::Build(
const std::vector<std::unique_ptr<paddle::framework::InstructionBase>>&
instructions) {
if (is_build_) {
return op_downstream_map_;
}
instructions_ = &instructions;
op_num_ = instructions_->size();
ops_before_.assign(op_num_, {});
ops_behind_.assign(op_num_, {});
op_happens_before_.assign(op_num_, std::vector<bool>(op_num_, false));
BuildDownstreamMap();
VLOG(6) << "Finish BuildDownstreamMap";
ShrinkDownstreamMap();
VLOG(6) << "Finish ShrinkDownstreamMap";
if (FLAGS_new_executor_sequential_run) {
AddDependencyForSequentialRun();
}
// TODO(zhangbo): Add dependency for special op ?
VLOG(6) << "Finish build dependency";
VLOG(8) << "downstream count: " << CountDownstreamMap(op_downstream_map_);
VLOG(8) << "downstream_map: " << std::endl
<< StringizeDownstreamMap(op_downstream_map_);
is_build_ = true;
return op_downstream_map_;
}
void IrDependencyBuilder::BuildDownstreamMap() {
auto var2min_rw_op =
std::map<size_t, std::list<size_t>>(); // # map from variable id to read
// write op id.
auto var2recent_write_op =
std::map<size_t, size_t>(); // # map from variable to recent write op.
auto op2dependences =
std::map<size_t,
std::set<size_t>>(); //# map from op to the dependence list,
// op must run after the dependence.
std::set<size_t>
remove_duplicate; // remove the duplicate between inputs and outputs
// reserve
for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) {
op2dependences[op_idx] = std::set<size_t>();
}
auto update_var_min_rw_op =
[](const std::map<size_t, std::set<size_t>>& op2dependences,
std::map<size_t, std::list<size_t>>* var2min_rw_op,
size_t cur_op,
size_t rw_var) {
// rw_var is inputs or outputs of cur_op
// this function update the var2min_rw_op set .
if (var2min_rw_op->find(rw_var) == var2min_rw_op->end()) {
(*var2min_rw_op)[rw_var] = std::list<size_t>();
}
for (auto dep_op : op2dependences.at(cur_op)) {
var2min_rw_op->at(rw_var).remove(dep_op);
}
var2min_rw_op->at(rw_var).push_back(cur_op);
};
for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) {
remove_duplicate.clear();
// step1: update the op2dependences structure
for (auto& item :
instructions_->at(op_idx)->Inputs()) { // for all inputs(read only)
for (auto var : item.second) {
if (var2recent_write_op.count(var))
op2dependences[op_idx].insert(var2recent_write_op[var]);
}
}
for (auto& item :
instructions_->at(op_idx)->Outputs()) { // for all write vars
for (auto var : item.second) {
if (var2min_rw_op.count(var)) {
for (auto dep_op : var2min_rw_op[var]) {
op2dependences[op_idx].insert(dep_op);
}
}
}
}
// step2: update 2 var2xxxx data structure
for (auto& item :
instructions_->at(op_idx)->Outputs()) { // for all write vars
for (auto var : item.second) {
var2recent_write_op[var] = op_idx;
var2min_rw_op[var] = {static_cast<size_t>(op_idx)};
remove_duplicate.insert(var);
}
}
for (auto& item :
instructions_->at(op_idx)->Inputs()) { // for all inputs(read only)
for (auto var : item.second) {
if (remove_duplicate.count(var) ==
0) { // var in input list and in output list, so remove it.
update_var_min_rw_op(op2dependences, &var2min_rw_op, op_idx, var);
}
}
}
}
// convert op2dependences to downstream_map directly. op2dependences is op ->
// it's dependences, we want to get op -> [next ops] map, where ops is the
// next instruction of op. The size of downstream != size of op2dependences
// since there are some ops that have no downstream-op.
for (auto& item : op2dependences) {
size_t op = item.first;
for (auto dep_op : item.second) {
AddDownstreamOp(dep_op, op);
}
}
}
void IrDependencyBuilder::AddDownstreamOp(size_t prior_op_idx,
size_t posterior_op_idx) {
PADDLE_ENFORCE_EQ(
OpHappensBefore(posterior_op_idx, prior_op_idx),
false,
phi::errors::Unavailable(
"Can not add dependency %d->%d because %d is run before %d",
prior_op_idx,
posterior_op_idx,
posterior_op_idx,
prior_op_idx));
std::set<size_t>& downstream_ops = op_downstream_map_[prior_op_idx];
// NOTE(Ruibiao): Here the downstream map shrinking is best-effort, therefore
// ShrinkDownstreamMap after BuildDownstreamMap is still helpful. For example,
// a->c will not be shrinked in the following case: AddDownstreamOp(a, b) ->
// AddDownstreamOp(a, c) -> AddDownstreamOp(b, c), it should be shrinked by
// ShrinkDownstreamMap.
for (size_t op_idx : downstream_ops) {
if (OpHappensBefore(op_idx, posterior_op_idx)) {
VLOG(7) << "Find dependencies " << prior_op_idx << "->" << op_idx << "->"
<< posterior_op_idx << ", skip adding " << prior_op_idx << "->"
<< posterior_op_idx;
return;
}
}
downstream_ops.insert(posterior_op_idx);
std::vector<size_t> prior_of_prior = ops_before_[prior_op_idx];
std::vector<size_t> posterior_of_posterior = ops_behind_[posterior_op_idx];
auto update_op_happen_before = [this](size_t prior_op_idx,
size_t posterior_op_idx) {
if (!op_happens_before_[prior_op_idx][posterior_op_idx]) {
op_happens_before_[prior_op_idx][posterior_op_idx] = true;
ops_before_[posterior_op_idx].push_back(prior_op_idx);
ops_behind_[prior_op_idx].push_back(posterior_op_idx);
}
};
update_op_happen_before(prior_op_idx, posterior_op_idx);
// All ops before prior-op are also before posterior-op
for (size_t op_idx : prior_of_prior) {
update_op_happen_before(op_idx, posterior_op_idx);
}
// All ops after posterior-op are also after prior-op
for (size_t op_idx : posterior_of_posterior) {
update_op_happen_before(prior_op_idx, op_idx);
}
VLOG(8) << prior_op_idx << "->" << posterior_op_idx;
VLOG(8) << "Add dependency from " << instructions_->at(prior_op_idx)->Name()
<< "(" << prior_op_idx << ") to "
<< instructions_->at(posterior_op_idx)->Name() << "("
<< posterior_op_idx << ")";
}
void IrDependencyBuilder::ShrinkDownstreamMap() {
// remove unnecessary downstream ops
// for example, a->b->c
// a: b, c
// b: c
// =>
// a: b
// b: c
// shrink, find the downstream op that has no other op in the
// downstream list happens before it
for (size_t i = 0; i < op_num_; ++i) {
if (op_downstream_map_.find(i) == op_downstream_map_.end()) {
continue;
}
std::set<size_t> minumum_nexts;
for (size_t item : op_downstream_map_.at(i)) {
bool not_after_any = true;
// find the op that is not executed after any
for (size_t other_item : op_downstream_map_.at(i)) {
if (OpHappensBefore(other_item, item)) {
VLOG(8) << "happens_before: " << other_item << "->" << item
<< ", so skip " << item;
not_after_any = false;
break;
}
}
if (not_after_any) {
VLOG(8) << "downstream op of " << i << ": " << item;
minumum_nexts.insert(item);
}
}
// NOTE(Ruibiao): op_happens_before will not be changed when shrink
// dowstream map
op_downstream_map_.at(i) = minumum_nexts;
}
VLOG(8) << "Finish shrink downstream map";
VLOG(8) << "downstream count: " << CountDownstreamMap(op_downstream_map_);
VLOG(8) << "downstream_map: " << std::endl
<< StringizeDownstreamMap(op_downstream_map_);
}
void IrDependencyBuilder::AddDependencyForSequentialRun() {
size_t dependence_op_idx = ULLONG_MAX;
for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) {
if (dependence_op_idx != ULLONG_MAX) {
AddDownstreamOp(dependence_op_idx, op_idx);
}
dependence_op_idx = op_idx;
}
}
} // namespace interpreter
} // namespace framework
} // namespace paddle
......@@ -23,6 +23,7 @@ DECLARE_bool(new_executor_sequential_run);
namespace paddle {
namespace framework {
class InstructionBase;
namespace interpreter {
// DependencyBuilder provides some dependency adding function to handle the
......@@ -84,6 +85,64 @@ class DependencyBuilder {
std::vector<std::vector<bool>> op_happens_before_;
};
// /// ======================== ///
// /// For new ir ///
// /// ======================== ///
class IrDependencyBuilder {
public:
IrDependencyBuilder() : is_build_(false), instructions_(nullptr) {}
// build op dependencies and return the mapping from op to its downstream-op
// set
const std::map<size_t, std::set<size_t>>& Build(
const std::vector<std::unique_ptr<paddle::framework::InstructionBase>>&
instructions);
const std::map<size_t, std::set<size_t>>& OpDownstreamMap() const;
bool OpHappensBefore(size_t prior_op_idx, size_t posterior_op_idx) const {
PADDLE_ENFORCE_GE(
op_happens_before_.size(),
0,
phi::errors::Unavailable("op_happen_before is not yet built"));
return op_happens_before_.at(prior_op_idx).at(posterior_op_idx);
}
private:
void AddDependencyForCoalesceTensorOp();
void AddDependencyForCommunicationOp();
void AddDependencyForRandomOp();
void AddDependencyForReadOp();
void AddDependencyForSequentialRun();
void AddDownstreamOp(size_t prior_op_idx, size_t posterior_op_idx);
void BuildDownstreamMap();
void ShrinkDownstreamMap();
bool is_build_;
const std::vector<std::unique_ptr<paddle::framework::InstructionBase>>*
instructions_; // not_own
size_t op_num_;
// ops_behind_ is the adjacency list about op to its posterior-ops, that is to
// say, op_behind_[i] == {a, b, c} means op[a], op[b] and op[c] depend on
// op[i] directly or indirectly. ops_before_ is the revered adjacency list of
// ops_behind_.
std::vector<std::vector<size_t>> ops_before_;
std::vector<std::vector<size_t>> ops_behind_;
// op_downstream_map_ is the mapping from op to its downstream-op set, that is
// to say, op_downstream_map_[i] == {a, b, c} means op[a], op[b] and op[c]
// depend on op[i] directly.
std::map<size_t, std::set<size_t>> op_downstream_map_;
// op_happens_before_ is a matrix form of ops_before_ and ops_behind_, it is
// used to speed up the query.
std::vector<std::vector<bool>> op_happens_before_;
};
} // namespace interpreter
} // namespace framework
} // namespace paddle
......@@ -1011,9 +1011,7 @@ void BuildOpFuncList(
scope,
local_scope,
op_yaml_info_parser,
&(op_func_node.kernel_context_),
&(op_func_node.input_index),
&(op_func_node.output_index));
&(op_func_node.kernel_context_));
VLOG(6) << "finish process kernel context";
op_func_node.kernel_context_.SetDeviceContext(
......
......@@ -185,12 +185,21 @@ FetchList NewIRInterpreter::Run(const std::vector<std::string>& feed_names,
if (!is_build_) {
LOG_FIRST_N(INFO, 1) << "New Executor is Running.";
std::stringstream ss;
ss << this;
::ir::BuildScope(*ir_program_->block(),
InnerScope(),
ss.str(),
&value_2_var_name_,
&variable_2_var_name_,
&var_name_2_id_,
&variable_list_);
VLOG(4) << DebugValueInfo();
// NOTE(zhangbo): Iterative version, gradually replacing BuildOpFuncList()
// and Convert()
// BuildInstruction();
// BuildInstructionDependences();
std::vector<paddle::framework::OpFuncNode> op_func_nodes;
interpreter::BuildOpFuncList(place_,
......@@ -241,13 +250,18 @@ FetchList NewIRInterpreter::BetaRun(const std::vector<std::string>& feed_names,
SetDeviceId(place_);
if (!is_build_) {
LOG_FIRST_N(INFO, 1) << "New Executor is BetaRunning.";
std::stringstream ss;
ss << this;
::ir::BuildScope(*ir_program_->block(),
InnerScope(),
ss.str(),
&value_2_var_name_,
&variable_2_var_name_,
&var_name_2_id_,
&variable_list_);
VLOG(4) << DebugValueInfo();
BuildInstruction();
BuildInstructionDependences();
for (size_t instr_id = 0; instr_id < vec_instruction_base_.size();
++instr_id) {
vec_instruction_base_[instr_id]->Run();
......@@ -1534,8 +1548,27 @@ void NewIRInterpreter::BuildInstruction() {
++it) {
VLOG(0) << "Build Instruction for op: " << op_idx;
if ((*it)->dialect()->name() == "pd_kernel") {
vec_instruction_base_.emplace_back(std::make_unique<PhiKernelInstruction>(
op_idx++, place_, (*it), scope_, local_scope_, value_2_var_name_));
auto op_name = (*it)
->attributes()
.at("op_name")
.dyn_cast<::ir::StrAttribute>()
.AsString();
if (op_name == "builtin.combine" || op_name == "builtin.slice" ||
op_name == "pd.feed" || op_name == "pd.fetch" ||
op_name == "builtin.set_parameter" ||
op_name == "builtin.get_parameter") {
VLOG(6) << "skip process " << op_name;
continue;
}
vec_instruction_base_.emplace_back(
std::make_unique<PhiKernelInstruction>(op_idx++,
place_,
(*it),
scope_,
local_scope_,
value_2_var_name_,
var_name_2_id_,
variable_2_var_name_));
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Now only support pd_kernel dialect."));
......@@ -1543,5 +1576,64 @@ void NewIRInterpreter::BuildInstruction() {
}
}
std::string NewIRInterpreter::DebugValueInfo() {
std::stringstream os;
os << "value info of interpretercore " << this << "\n"
<< "value -> var_name -> id -> variable*"
<< "\n";
for (auto kv : value_2_var_name_) {
os << kv.first.impl() << " -> " << kv.second << " -> "
<< var_name_2_id_.at(kv.second) << " -> "
<< InnerScope()->FindVar(kv.second) << "\n";
}
return os.str();
}
void NewIRInterpreter::BuildInstructionDependences() {
// analysis the dependences between instructions, add next_instr_list to each
// instr, and set the dependecy_count_
size_t instr_num = vec_instruction_base_.size();
dependecy_count_ = std::vector<size_t>(instr_num, 0);
auto downstream_map = ir_dependency_builder_.Build(vec_instruction_base_);
for (size_t instr_id = 0; instr_id < instr_num; ++instr_id) {
InstructionBase* cur_instr = vec_instruction_base_[instr_id].get();
const std::set<size_t>& next_instr_ids = downstream_map[instr_id];
if (FLAGS_new_executor_serial_run) {
for (size_t next_instr_id : next_instr_ids) {
cur_instr->AddNextInstrInSameThread(next_instr_id);
}
} else {
if (cur_instr->KernelType() == OpFuncType::kGpuAsync) {
for (size_t next_instr_id : next_instr_ids) {
if (vec_instruction_base_[next_instr_id]->KernelType() ==
OpFuncType::kGpuAsync) {
cur_instr->AddNextInstrInSameThread(next_instr_id);
} else {
cur_instr->AddNextInstrInDifferentThread(next_instr_id);
}
}
} else {
bool has_instr_in_same_thread = false;
for (size_t next_instr_id : next_instr_ids) {
if (!has_instr_in_same_thread &&
vec_instruction_base_[next_instr_id]->KernelType() !=
OpFuncType::kGpuAsync) {
cur_instr->AddNextInstrInSameThread(next_instr_id);
has_instr_in_same_thread = true;
} else {
cur_instr->AddNextInstrInDifferentThread(next_instr_id);
}
}
}
}
for (size_t next_instr_id : next_instr_ids) {
++dependecy_count_[next_instr_id];
}
}
}
} // namespace framework
} // namespace paddle
......@@ -186,17 +186,26 @@ class NewIRInterpreter : public InterpreterBaseImpl {
/// ======================== ///
/// For new ir ///
/// ======================== ///
std::string DebugValueInfo();
void BuildInstruction();
void BuildInstructionDependences();
std::unique_ptr<::ir::Program> ir_program_{nullptr};
std::vector<std::unique_ptr<InstructionBase>> vec_instruction_base_;
std::unordered_map<::ir::Value, std::string> value_2_var_name_;
std::unordered_map<const paddle::framework::Variable*, std::string>
variable_2_var_name_;
std::map<std::string, int> var_name_2_id_;
std::vector<Variable*> variable_list_;
interpreter::IrDependencyBuilder ir_dependency_builder_;
};
} // namespace framework
......
......@@ -88,6 +88,10 @@ const std::map<std::string, int>& OpYamlInfoParser::InputName2Id() const {
return input_name2id_;
}
const std::map<std::string, int>& OpYamlInfoParser::OutputName2Id() const {
return input_name2id_;
}
bool OpYamlInfoParser::HasInplace(const std::string& out_name) const {
auto& inplace_info = std::get<3>(op_info_tuple_).inplace;
for (size_t i = 0; i < inplace_info.size(); i++) {
......@@ -113,10 +117,9 @@ const std::string& OpYamlInfoParser::InplaceName(
void OpYamlInfoParser::parse() {
auto input_info = std::get<0>(op_info_tuple_);
int start_index = 0;
int input_start_index = 0;
for (size_t i = 0; i < input_info.size(); ++i) {
input_name2id_[input_info[i].name] = start_index++;
input_name2id_[input_info[i].name] = input_start_index++;
input_name_list_.push_back(input_info[i].name);
input_info_[input_info[i].name] = input_info[i];
if (!input_info[i].is_mutable_attribute) {
......@@ -130,8 +133,10 @@ void OpYamlInfoParser::parse() {
attr_info_[attribute_info[i].name] = attribute_info[i];
}
int output_start_index = 0;
auto output_info = std::get<2>(op_info_tuple_);
for (size_t i = 0; i < output_info.size(); ++i) {
output_name2id_[output_info[i].name] = output_start_index++;
output_name_list_.push_back(output_info[i].name);
output_info_[output_info[i].name] = output_info[i];
}
......
......@@ -35,6 +35,7 @@ class OpYamlInfoParser {
const std::vector<std::string>& AttrParams(bool is_kernel = false) const;
const OpRunTimeInfo& OpRuntimeInfo() const;
const std::map<std::string, int>& InputName2Id() const;
const std::map<std::string, int>& OutputName2Id() const;
const std::vector<std::string>& InputNames() const {
return input_name_list_;
......@@ -69,6 +70,7 @@ class OpYamlInfoParser {
std::map<std::string, OpAttributeInfo> attr_info_;
// output info
std::map<std::string, int> output_name2id_;
std::vector<std::string> output_name_list_;
std::map<std::string, OpOutputInfo> output_info_;
......
......@@ -60,9 +60,12 @@ class PhiKernelAdaptor {
variable_2_var_name;
std::map<std::string, int> var_name_2_id;
std::vector<paddle::framework::Variable*> variable_list;
std::stringstream ss;
ss << this;
BuildScope(*block,
scope_,
ss.str(),
&value_2_var_name,
&variable_2_var_name,
&var_name_2_id,
......
......@@ -49,6 +49,7 @@ using VariableNameMap =
paddle::framework::Variable* CreateVar(
ir::Value value,
paddle::framework::Scope* inner_scope,
const std::string& var_name_prefix,
bool force_persisable,
std::unordered_map<ir::Value, std::string>* value_2_var_name,
std::unordered_map<const paddle::framework::Variable*, std::string>*
......@@ -65,6 +66,7 @@ paddle::framework::Variable* CreateVar(
}
paddle::framework::Variable* var = nullptr;
VLOG(6) << "var_name_prefix is: " << var_name_prefix;
std::string name = "inner_var_" + std::to_string(variable_2_var_name->size());
if (force_persisable || is_persisable) {
VLOG(6) << "Create var: " << name << " in scope " << inner_scope->root();
......@@ -109,6 +111,7 @@ void CheckInputVars(
void BuildValue(ir::Value value,
paddle::framework::Scope* inner_scope,
const std::string& var_name_prefix,
std::unordered_map<ir::Value, std::string>* value_2_var_name,
std::unordered_map<const paddle::framework::Variable*,
std::string>* variable_2_var_name,
......@@ -120,6 +123,7 @@ void BuildValue(ir::Value value,
} else {
var = CreateVar(value,
inner_scope,
var_name_prefix,
false,
value_2_var_name,
variable_2_var_name,
......@@ -146,6 +150,7 @@ void BuildValue(ir::Value value,
"DenseTensorType"));
auto var_i = CreateVar(value,
inner_scope,
var_name_prefix,
false,
value_2_var_name,
variable_2_var_name,
......@@ -163,6 +168,7 @@ void BuildValue(ir::Value value,
void HandleForSpecialOp(
ir::Operation* op,
paddle::framework::Scope* inner_scope,
const std::string& var_name_prefix,
std::unordered_map<ir::Value, std::string>* value_2_var_name,
std::unordered_map<const paddle::framework::Variable*, std::string>*
variable_2_var_name,
......@@ -189,6 +195,7 @@ void HandleForSpecialOp(
auto value = op->result(0);
auto var = CreateVar(value,
inner_scope,
var_name_prefix,
false,
value_2_var_name,
variable_2_var_name,
......@@ -217,6 +224,7 @@ void HandleForSpecialOp(
} else {
var = CreateVar(out_value,
inner_scope,
var_name_prefix,
false,
value_2_var_name,
variable_2_var_name,
......@@ -296,6 +304,7 @@ void HandleForSpecialOp(
void HandleForInplaceOp(
ir::Operation* op,
paddle::framework::Scope* inner_scope,
const std::string& var_name_prefix,
std::unordered_map<ir::Value, std::string>* value_2_var_name,
std::unordered_map<const paddle::framework::Variable*, std::string>*
variable_2_var_name,
......@@ -328,6 +337,7 @@ void HandleForInplaceOp(
} else {
BuildValue(value,
inner_scope,
var_name_prefix,
value_2_var_name,
variable_2_var_name,
var_name_2_id,
......@@ -340,6 +350,7 @@ void HandleForInplaceOp(
// created in inner_scope.
void BuildScope(const ir::Block& block,
paddle::framework::Scope* inner_scope,
const std::string& var_name_prefix,
std::unordered_map<ir::Value, std::string>* value_2_var_name,
std::unordered_map<const paddle::framework::Variable*,
std::string>* variable_2_var_name,
......@@ -367,6 +378,7 @@ void BuildScope(const ir::Block& block,
op_name == "builtin.get_parameter" || op_name == "builtin.slice") {
HandleForSpecialOp(op,
inner_scope,
var_name_prefix,
value_2_var_name,
variable_2_var_name,
var_name_2_id,
......@@ -384,6 +396,7 @@ void BuildScope(const ir::Block& block,
.data()) {
HandleForInplaceOp(op,
inner_scope,
var_name_prefix,
value_2_var_name,
variable_2_var_name,
var_name_2_id,
......@@ -393,6 +406,7 @@ void BuildScope(const ir::Block& block,
for (size_t i = 0; i < op->num_results(); ++i) {
BuildValue(op->result(i),
inner_scope,
var_name_prefix,
value_2_var_name,
variable_2_var_name,
var_name_2_id,
......
......@@ -43,6 +43,7 @@
namespace ir {
void BuildScope(const ir::Block& block,
paddle::framework::Scope* inner_scope,
const std::string& var_name_prefix,
std::unordered_map<ir::Value, std::string>* value_2_var_name,
std::unordered_map<const paddle::framework::Variable*,
std::string>* variable_2_var_name,
......@@ -55,15 +56,12 @@ template <typename Context,
typename InListType,
typename OutListType,
bool is_kernel>
void BuildPhiContext(
ir::Operation* op,
const std::unordered_map<ir::Value, std::string>& name_map,
paddle::framework::Scope* scope,
paddle::framework::Scope* local_scope,
const paddle::dialect::OpYamlInfoParser& op_yaml_info,
Context* ctx,
std::map<std::string, std::vector<int>>* input_map = nullptr,
std::map<std::string, std::vector<int>>* output_map = nullptr) {
void BuildPhiContext(ir::Operation* op,
const std::unordered_map<ir::Value, std::string>& name_map,
paddle::framework::Scope* scope,
paddle::framework::Scope* local_scope,
const paddle::dialect::OpYamlInfoParser& op_yaml_info,
Context* ctx) {
paddle::framework::Scope* inner_scope =
local_scope != nullptr ? local_scope : scope;
VLOG(6) << "BuildPhiContext in scope[" << scope << "] inner_scope["
......@@ -120,17 +118,6 @@ void BuildPhiContext(
ir::Value ptr = op->operand(name2id.at(t));
auto in_var_name = name_map.at(ptr);
if (input_map != nullptr) {
// only deal with single input for now, [todo] need support multi input
// like concat
// TODO(phlrain): OpFuncNode need input_index and output_index,
// construct input_index and output_here, should remove input_index and
// output_index from OpFuncNode Each in_var_name named "inner_var_" +
// index, len("inner_var_") = 10
size_t tmp_id = std::atol(in_var_name.substr(4, 100).c_str());
(*input_map)[std::to_string(name2id.at(t))].push_back(tmp_id);
}
auto& tensor_attr_type = op_yaml_info.TensorAttrTypeName(t);
VLOG(6) << "ctx->EmplaceBack mutable attr: " << t << "\t" << in_var_name;
......@@ -324,18 +311,6 @@ void BuildPhiContext(
PADDLE_THROW(
phi::errors::Unimplemented("only support DenseTensor and vector "));
}
if (output_map != nullptr) {
// only deal with single input for now, [todo] need support multi input
// like concat
// TODO(phlrain): OpFuncNode need input_index and output_index,
// construct input_index and output_here, should remove input_index and
// output_index from OpFuncNode Each in_var_name named "inner_var_" +
// index, len("inner_var_") = 10
size_t tmp_id = std::atol(name.substr(4, 100).c_str());
(*output_map)["out"].push_back(tmp_id);
}
}
}
VLOG(6) << "Done build phi context";
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册