From fd1923033ed07b9525b1e229703a244d333f49e3 Mon Sep 17 00:00:00 2001 From: zhangbo9674 <82555433+zhangbo9674@users.noreply.github.com> Date: Wed, 19 Jul 2023 18:45:38 +0800 Subject: [PATCH] [IR] Add Dependency build for new ir interpretercore (#55468) * add interface * add code * add code * add code * add code * fix bug * fix bug --- .../instruction/instruction_base.cc | 4 +- .../instruction/instruction_base.h | 29 ++- .../instruction/phi_kernel_instruction.cc | 112 ++++++-- .../instruction/phi_kernel_instruction.h | 17 +- .../interpreter/dependency_builder.cc | 242 ++++++++++++++++++ .../interpreter/dependency_builder.h | 59 +++++ .../interpreter/interpreter_util.cc | 4 +- .../new_executor/new_ir_interpreter.cc | 96 ++++++- .../new_executor/new_ir_interpreter.h | 9 + .../fluid/ir/interface/op_yaml_info_parser.cc | 11 +- .../fluid/ir/interface/op_yaml_info_parser.h | 2 + .../phi_kernel_adaptor/phi_kernel_adaptor.h | 3 + .../ir/phi_kernel_adaptor/phi_kernel_util.cc | 14 + .../ir/phi_kernel_adaptor/phi_kernel_util.h | 39 +-- 14 files changed, 572 insertions(+), 69 deletions(-) diff --git a/paddle/fluid/framework/new_executor/instruction/instruction_base.cc b/paddle/fluid/framework/new_executor/instruction/instruction_base.cc index eb6394f9794..6c09d7aa2a1 100644 --- a/paddle/fluid/framework/new_executor/instruction/instruction_base.cc +++ b/paddle/fluid/framework/new_executor/instruction/instruction_base.cc @@ -84,12 +84,12 @@ void InstructionBase::AddInplace(Variable* in, Variable* out) { void InstructionBase::ClearInplace() { vec_inplace_in_to_out_.clear(); } void InstructionBase::SetInputs( - const std::map>& inputs) { + const std::unordered_map>& inputs) { input_index_ = inputs; } void InstructionBase::SetOutputs( - const std::map>& outputs) { + const std::unordered_map>& outputs) { output_index_ = outputs; } diff --git a/paddle/fluid/framework/new_executor/instruction/instruction_base.h b/paddle/fluid/framework/new_executor/instruction/instruction_base.h index cd9531660af..a31b65c1039 100644 --- a/paddle/fluid/framework/new_executor/instruction/instruction_base.h +++ b/paddle/fluid/framework/new_executor/instruction/instruction_base.h @@ -22,6 +22,10 @@ #include "paddle/fluid/framework/new_executor/new_executor_defs.h" #include "paddle/fluid/platform/event.h" +namespace ir { +class Value; +} // namespace ir + namespace paddle { namespace framework { @@ -103,24 +107,27 @@ class InstructionBase { std::map& GetMutableInplaceBackMap() { return inplace_back_map_; } const std::map& GetInplaceBackMap() { return inplace_back_map_; } - const std::map>& Inputs() const { + const std::unordered_map>& Inputs() const { return input_index_; } - std::map>& GetMutableInputs() { + std::unordered_map>& GetMutableInputs() { return input_index_; } - void SetInputs(const std::map>& inputs); + void SetInputs(const std::unordered_map>& inputs); - const std::map>& Outputs() const { + const std::unordered_map>& Outputs() const { return output_index_; } - std::map>& GetMutableOutputs() { + std::unordered_map>& GetMutableOutputs() { return output_index_; } - void SetOutputs(const std::map>& outputs); + void SetOutputs( + const std::unordered_map>& outputs); virtual void Run() = 0; + virtual const std::string& Name() const = 0; + private: size_t id_; @@ -130,25 +137,31 @@ class InstructionBase { // dist attrs:lower value, higher priority int stream_priority_{0}; + SchedulingPriority scheduling_priority_{0}; + std::string execution_stream_{kDefaultStream}; platform::DeviceContext* dev_ctx_; // not owned std::vector next_instrs_in_different_thread_; + std::vector next_instrs_in_same_thread_; std::shared_ptr event_to_record_; + std::vector events_to_wait_; std::vector gc_check_vars_; std::vector> vec_inplace_in_to_out_; // If not use share data, need this ? + std::map inplace_back_map_; - std::map> input_index_; - std::map> output_index_; + std::unordered_map> input_index_; + + std::unordered_map> output_index_; }; } // namespace framework diff --git a/paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.cc index 53cef292d9f..50a8161cd43 100644 --- a/paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.cc +++ b/paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.cc @@ -73,7 +73,10 @@ PhiKernelInstruction::PhiKernelInstruction( ir::Operation* op, Scope* scope, Scope* local_scope, - const std::unordered_map<::ir::Value, std::string>& value_2_name_map) + const std::unordered_map<::ir::Value, std::string>& value_2_var_name, + const std::map& var_name_2_id, + const std::unordered_map& + variable_2_var_name) : InstructionBase(id, place) { auto op_attributes = op->attributes(); auto op_name = @@ -81,14 +84,7 @@ PhiKernelInstruction::PhiKernelInstruction( ir::OpInfo op_info = ir::IrContext::Instance()->GetRegisteredOpInfo(op_name); phi_op_name_ = op_name; - - if (op_name == "builtin.combine" || op_name == "pd.feed" || - op_name == "builtin.set_parameter" || - op_name == "builtin.get_parameter") { - VLOG(6) << "skip process " << op_name; - SetArtificial(true); - return; - } + VLOG(6) << "construct phi kernel instruction for: " << phi_op_name_; // Todo: support paddle::dialect::DistAttribute // if (op_attributes.count("dist_attr") != 0) { @@ -117,15 +113,24 @@ PhiKernelInstruction::PhiKernelInstruction( // op_func_node.scheduling_priority_ = 1; // } // } + VLOG(6) << "finish process dist attributes"; SetKernelType(AnalyseOpFuncType(op, place)); + VLOG(6) << "finish process analyse kernel type"; infer_meta_interface_ = op_info.GetInterfaceImpl(); + VLOG(6) << "finish process infer_meta_interface_"; + auto yaml_interface = op_info.GetInterfaceImpl(); + PADDLE_ENFORCE_NOT_NULL( + yaml_interface, + phi::errors::PreconditionNotMet( + "can not find OpYamlInfoInterface from [%s]", phi_op_name_)); paddle::dialect::OpYamlInfoParser yaml_info_parser( yaml_interface->get_op_info_()); + VLOG(6) << "finish process yaml_info_parser"; ::ir::BuildPhiContext< phi::InferMetaContext, @@ -134,7 +139,7 @@ PhiKernelInstruction::PhiKernelInstruction( paddle::small_vector, paddle::small_vector, false>(op, - value_2_name_map, + value_2_var_name, scope, local_scope, yaml_info_parser, @@ -159,13 +164,11 @@ PhiKernelInstruction::PhiKernelInstruction( paddle::small_vector, paddle::small_vector, true>(op, - value_2_name_map, + value_2_var_name, scope, local_scope, yaml_info_parser, - &kernel_context_, - &(GetMutableInputs()), - &(GetMutableOutputs())); + &kernel_context_); kernel_context_.SetDeviceContext(phi::DeviceContextPool::Instance().Get( phi::TransToPhiPlace(kernel_key.backend()))); VLOG(6) << "finish process kernel context"; @@ -173,13 +176,90 @@ PhiKernelInstruction::PhiKernelInstruction( SetDeviceContext(phi::DeviceContextPool::Instance().Get( phi::TransToPhiPlace(kernel_key.backend()))); VLOG(6) << "finish process device context"; + + Scope* inner_scope = local_scope == nullptr ? scope : local_scope; + InitInputsOutputsIds( + op, inner_scope, value_2_var_name, var_name_2_id, variable_2_var_name); + VLOG(6) << "finish process inputs outputs index"; +} + +std::vector GetValueIds( + ir::Value value, + Scope* inner_scope, + const std::unordered_map<::ir::Value, std::string>& value_2_var_name, + const std::map& var_name_2_id, + const std::unordered_map& + variable_2_var_name) { + std::vector ids; + std::string var_name = value_2_var_name.at(value); + ids.push_back(var_name_2_id.at(var_name)); + // NOTE(zhangbo): Value maybe a VariableRefArray + auto var = inner_scope->FindVar(var_name); + if (var->IsType()) { + auto& var_array = var->Get(); + for (size_t i = 0; i < var_array.size(); ++i) { + ids.push_back(var_name_2_id.at(variable_2_var_name.at(var_array[i]))); + } + } + return ids; +} + +void PhiKernelInstruction::InitInputsOutputsIds( + ::ir::Operation* op, + Scope* inner_scope, + const std::unordered_map<::ir::Value, std::string>& value_2_var_name, + const std::map& var_name_2_id, + const std::unordered_map& + variable_2_var_name) { + std::unordered_map> inputs; + for (size_t i = 0; i < op->num_operands(); i++) { + ir::Value value = op->operand(i); + if (value) { + PADDLE_ENFORCE_NE( + value_2_var_name.find(value), + value_2_var_name.end(), + phi::errors::PreconditionNotMet( + "input should in name map, [%d] 'th input of [%s] op", + i, + phi_op_name_)); + std::vector inputs_id = GetValueIds(value, + inner_scope, + value_2_var_name, + var_name_2_id, + variable_2_var_name); + inputs.emplace(value, inputs_id); + } + } + SetInputs(inputs); + VLOG(8) << "finish process inputs_index"; + std::unordered_map> outputs; + for (size_t i = 0; i < op->num_results(); i++) { + ir::Value value = op->result(i); + if (value) { + PADDLE_ENFORCE_NE( + value_2_var_name.find(value), + value_2_var_name.end(), + phi::errors::PreconditionNotMet( + "input should in name map, [%d] 'th input of [%s] op", + i, + phi_op_name_)); + std::vector outputs_id = GetValueIds(value, + inner_scope, + value_2_var_name, + var_name_2_id, + variable_2_var_name); + outputs.emplace(value, outputs_id); + } + } + SetOutputs(outputs); + VLOG(8) << "finish process outputs_index"; } void PhiKernelInstruction::Run() { - VLOG(5) << "Run op " << phi_op_name_ << " infer meta."; infer_meta_interface_->infer_meta_(&(infer_meta_context_)); - VLOG(5) << "Run op " << phi_op_name_ << " kernel."; + VLOG(6) << "Run op " << phi_op_name_ << " infer meta."; (*(phi_kernel_))(&(kernel_context_)); + VLOG(6) << "Run op " << phi_op_name_ << " kernel."; } } // namespace framework diff --git a/paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.h b/paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.h index 72a34f722ce..b30fa8bff75 100644 --- a/paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.h +++ b/paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.h @@ -33,9 +33,10 @@ class PhiKernelInstruction : public InstructionBase { ::ir::Operation* op, Scope* scope, Scope* local_scope, - const std::unordered_map<::ir::Value, std::string>& value_2_name_map); - - const std::string& PhiOpName() const { return phi_op_name_; } + const std::unordered_map<::ir::Value, std::string>& value_2_var_name, + const std::map& var_name_2_id, + const std::unordered_map& + variable_2_var_name); phi::Kernel* PhiKernel() const { return phi_kernel_; } @@ -51,7 +52,17 @@ class PhiKernelInstruction : public InstructionBase { void Run() override; + const std::string& Name() const override { return phi_op_name_; } + private: + void InitInputsOutputsIds( + ::ir::Operation* op, + Scope* inner_scope, + const std::unordered_map<::ir::Value, std::string>& value_2_var_name, + const std::map& var_name_2_id, + const std::unordered_map& + variable_2_var_name); + std::string phi_op_name_; paddle::dialect::InferMetaInterface::Concept* infer_meta_interface_{ diff --git a/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc b/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc index 76fb08baca4..559e8d7afa6 100644 --- a/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc +++ b/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/framework/new_executor/interpreter/dependency_builder.h" #include +#include "paddle/fluid/framework/new_executor/instruction/instruction_base.h" #include "paddle/fluid/framework/new_executor/interpreter/interpreter_util.h" #include "paddle/fluid/platform/flags.h" PADDLE_DEFINE_EXPORTED_bool( @@ -527,6 +528,247 @@ void DependencyBuilder::ShrinkDownstreamMap() { << StringizeDownstreamMap(op_downstream_map_); } +/// ======================== /// +/// For new ir /// +/// ======================== /// +const std::map>& IrDependencyBuilder::Build( + const std::vector>& + instructions) { + if (is_build_) { + return op_downstream_map_; + } + + instructions_ = &instructions; + op_num_ = instructions_->size(); + + ops_before_.assign(op_num_, {}); + ops_behind_.assign(op_num_, {}); + op_happens_before_.assign(op_num_, std::vector(op_num_, false)); + + BuildDownstreamMap(); + VLOG(6) << "Finish BuildDownstreamMap"; + + ShrinkDownstreamMap(); + VLOG(6) << "Finish ShrinkDownstreamMap"; + + if (FLAGS_new_executor_sequential_run) { + AddDependencyForSequentialRun(); + } + + // TODO(zhangbo): Add dependency for special op ? + + VLOG(6) << "Finish build dependency"; + VLOG(8) << "downstream count: " << CountDownstreamMap(op_downstream_map_); + VLOG(8) << "downstream_map: " << std::endl + << StringizeDownstreamMap(op_downstream_map_); + + is_build_ = true; + + return op_downstream_map_; +} + +void IrDependencyBuilder::BuildDownstreamMap() { + auto var2min_rw_op = + std::map>(); // # map from variable id to read + // write op id. + auto var2recent_write_op = + std::map(); // # map from variable to recent write op. + + auto op2dependences = + std::map>(); //# map from op to the dependence list, + // op must run after the dependence. + std::set + remove_duplicate; // remove the duplicate between inputs and outputs + + // reserve + for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) { + op2dependences[op_idx] = std::set(); + } + + auto update_var_min_rw_op = + [](const std::map>& op2dependences, + std::map>* var2min_rw_op, + size_t cur_op, + size_t rw_var) { + // rw_var is inputs or outputs of cur_op + // this function update the var2min_rw_op set . + if (var2min_rw_op->find(rw_var) == var2min_rw_op->end()) { + (*var2min_rw_op)[rw_var] = std::list(); + } + for (auto dep_op : op2dependences.at(cur_op)) { + var2min_rw_op->at(rw_var).remove(dep_op); + } + var2min_rw_op->at(rw_var).push_back(cur_op); + }; + + for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) { + remove_duplicate.clear(); + // step1: update the op2dependences structure + for (auto& item : + instructions_->at(op_idx)->Inputs()) { // for all inputs(read only) + for (auto var : item.second) { + if (var2recent_write_op.count(var)) + op2dependences[op_idx].insert(var2recent_write_op[var]); + } + } + + for (auto& item : + instructions_->at(op_idx)->Outputs()) { // for all write vars + for (auto var : item.second) { + if (var2min_rw_op.count(var)) { + for (auto dep_op : var2min_rw_op[var]) { + op2dependences[op_idx].insert(dep_op); + } + } + } + } + + // step2: update 2 var2xxxx data structure + for (auto& item : + instructions_->at(op_idx)->Outputs()) { // for all write vars + for (auto var : item.second) { + var2recent_write_op[var] = op_idx; + var2min_rw_op[var] = {static_cast(op_idx)}; + remove_duplicate.insert(var); + } + } + + for (auto& item : + instructions_->at(op_idx)->Inputs()) { // for all inputs(read only) + for (auto var : item.second) { + if (remove_duplicate.count(var) == + 0) { // var in input list and in output list, so remove it. + update_var_min_rw_op(op2dependences, &var2min_rw_op, op_idx, var); + } + } + } + } + + // convert op2dependences to downstream_map directly. op2dependences is op -> + // it's dependences, we want to get op -> [next ops] map, where ops is the + // next instruction of op. The size of downstream != size of op2dependences + // since there are some ops that have no downstream-op. + for (auto& item : op2dependences) { + size_t op = item.first; + for (auto dep_op : item.second) { + AddDownstreamOp(dep_op, op); + } + } +} + +void IrDependencyBuilder::AddDownstreamOp(size_t prior_op_idx, + size_t posterior_op_idx) { + PADDLE_ENFORCE_EQ( + OpHappensBefore(posterior_op_idx, prior_op_idx), + false, + phi::errors::Unavailable( + "Can not add dependency %d->%d because %d is run before %d", + prior_op_idx, + posterior_op_idx, + posterior_op_idx, + prior_op_idx)); + + std::set& downstream_ops = op_downstream_map_[prior_op_idx]; + // NOTE(Ruibiao): Here the downstream map shrinking is best-effort, therefore + // ShrinkDownstreamMap after BuildDownstreamMap is still helpful. For example, + // a->c will not be shrinked in the following case: AddDownstreamOp(a, b) -> + // AddDownstreamOp(a, c) -> AddDownstreamOp(b, c), it should be shrinked by + // ShrinkDownstreamMap. + for (size_t op_idx : downstream_ops) { + if (OpHappensBefore(op_idx, posterior_op_idx)) { + VLOG(7) << "Find dependencies " << prior_op_idx << "->" << op_idx << "->" + << posterior_op_idx << ", skip adding " << prior_op_idx << "->" + << posterior_op_idx; + return; + } + } + downstream_ops.insert(posterior_op_idx); + + std::vector prior_of_prior = ops_before_[prior_op_idx]; + std::vector posterior_of_posterior = ops_behind_[posterior_op_idx]; + + auto update_op_happen_before = [this](size_t prior_op_idx, + size_t posterior_op_idx) { + if (!op_happens_before_[prior_op_idx][posterior_op_idx]) { + op_happens_before_[prior_op_idx][posterior_op_idx] = true; + ops_before_[posterior_op_idx].push_back(prior_op_idx); + ops_behind_[prior_op_idx].push_back(posterior_op_idx); + } + }; + + update_op_happen_before(prior_op_idx, posterior_op_idx); + + // All ops before prior-op are also before posterior-op + for (size_t op_idx : prior_of_prior) { + update_op_happen_before(op_idx, posterior_op_idx); + } + + // All ops after posterior-op are also after prior-op + for (size_t op_idx : posterior_of_posterior) { + update_op_happen_before(prior_op_idx, op_idx); + } + + VLOG(8) << prior_op_idx << "->" << posterior_op_idx; + VLOG(8) << "Add dependency from " << instructions_->at(prior_op_idx)->Name() + << "(" << prior_op_idx << ") to " + << instructions_->at(posterior_op_idx)->Name() << "(" + << posterior_op_idx << ")"; +} + +void IrDependencyBuilder::ShrinkDownstreamMap() { + // remove unnecessary downstream ops + // for example, a->b->c + // a: b, c + // b: c + // => + // a: b + // b: c + + // shrink, find the downstream op that has no other op in the + // downstream list happens before it + for (size_t i = 0; i < op_num_; ++i) { + if (op_downstream_map_.find(i) == op_downstream_map_.end()) { + continue; + } + + std::set minumum_nexts; + for (size_t item : op_downstream_map_.at(i)) { + bool not_after_any = true; + // find the op that is not executed after any + for (size_t other_item : op_downstream_map_.at(i)) { + if (OpHappensBefore(other_item, item)) { + VLOG(8) << "happens_before: " << other_item << "->" << item + << ", so skip " << item; + not_after_any = false; + break; + } + } + if (not_after_any) { + VLOG(8) << "downstream op of " << i << ": " << item; + minumum_nexts.insert(item); + } + } + // NOTE(Ruibiao): op_happens_before will not be changed when shrink + // dowstream map + op_downstream_map_.at(i) = minumum_nexts; + } + VLOG(8) << "Finish shrink downstream map"; + VLOG(8) << "downstream count: " << CountDownstreamMap(op_downstream_map_); + VLOG(8) << "downstream_map: " << std::endl + << StringizeDownstreamMap(op_downstream_map_); +} + +void IrDependencyBuilder::AddDependencyForSequentialRun() { + size_t dependence_op_idx = ULLONG_MAX; + for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) { + if (dependence_op_idx != ULLONG_MAX) { + AddDownstreamOp(dependence_op_idx, op_idx); + } + dependence_op_idx = op_idx; + } +} + } // namespace interpreter } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/new_executor/interpreter/dependency_builder.h b/paddle/fluid/framework/new_executor/interpreter/dependency_builder.h index 4d427f01fd4..eb65c23f4a6 100644 --- a/paddle/fluid/framework/new_executor/interpreter/dependency_builder.h +++ b/paddle/fluid/framework/new_executor/interpreter/dependency_builder.h @@ -23,6 +23,7 @@ DECLARE_bool(new_executor_sequential_run); namespace paddle { namespace framework { +class InstructionBase; namespace interpreter { // DependencyBuilder provides some dependency adding function to handle the @@ -84,6 +85,64 @@ class DependencyBuilder { std::vector> op_happens_before_; }; +// /// ======================== /// +// /// For new ir /// +// /// ======================== /// +class IrDependencyBuilder { + public: + IrDependencyBuilder() : is_build_(false), instructions_(nullptr) {} + + // build op dependencies and return the mapping from op to its downstream-op + // set + const std::map>& Build( + const std::vector>& + instructions); + + const std::map>& OpDownstreamMap() const; + + bool OpHappensBefore(size_t prior_op_idx, size_t posterior_op_idx) const { + PADDLE_ENFORCE_GE( + op_happens_before_.size(), + 0, + phi::errors::Unavailable("op_happen_before is not yet built")); + return op_happens_before_.at(prior_op_idx).at(posterior_op_idx); + } + + private: + void AddDependencyForCoalesceTensorOp(); + void AddDependencyForCommunicationOp(); + void AddDependencyForRandomOp(); + void AddDependencyForReadOp(); + void AddDependencyForSequentialRun(); + + void AddDownstreamOp(size_t prior_op_idx, size_t posterior_op_idx); + + void BuildDownstreamMap(); + + void ShrinkDownstreamMap(); + + bool is_build_; + const std::vector>* + instructions_; // not_own + size_t op_num_; + + // ops_behind_ is the adjacency list about op to its posterior-ops, that is to + // say, op_behind_[i] == {a, b, c} means op[a], op[b] and op[c] depend on + // op[i] directly or indirectly. ops_before_ is the revered adjacency list of + // ops_behind_. + std::vector> ops_before_; + std::vector> ops_behind_; + + // op_downstream_map_ is the mapping from op to its downstream-op set, that is + // to say, op_downstream_map_[i] == {a, b, c} means op[a], op[b] and op[c] + // depend on op[i] directly. + std::map> op_downstream_map_; + + // op_happens_before_ is a matrix form of ops_before_ and ops_behind_, it is + // used to speed up the query. + std::vector> op_happens_before_; +}; + } // namespace interpreter } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc index 34899261dbd..70be3b9dd03 100644 --- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc +++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc @@ -1011,9 +1011,7 @@ void BuildOpFuncList( scope, local_scope, op_yaml_info_parser, - &(op_func_node.kernel_context_), - &(op_func_node.input_index), - &(op_func_node.output_index)); + &(op_func_node.kernel_context_)); VLOG(6) << "finish process kernel context"; op_func_node.kernel_context_.SetDeviceContext( diff --git a/paddle/fluid/framework/new_executor/new_ir_interpreter.cc b/paddle/fluid/framework/new_executor/new_ir_interpreter.cc index 055c9ff2383..b55c58ba8cc 100644 --- a/paddle/fluid/framework/new_executor/new_ir_interpreter.cc +++ b/paddle/fluid/framework/new_executor/new_ir_interpreter.cc @@ -185,12 +185,21 @@ FetchList NewIRInterpreter::Run(const std::vector& feed_names, if (!is_build_) { LOG_FIRST_N(INFO, 1) << "New Executor is Running."; + std::stringstream ss; + ss << this; ::ir::BuildScope(*ir_program_->block(), InnerScope(), + ss.str(), &value_2_var_name_, &variable_2_var_name_, &var_name_2_id_, &variable_list_); + VLOG(4) << DebugValueInfo(); + + // NOTE(zhangbo): Iterative version, gradually replacing BuildOpFuncList() + // and Convert() + // BuildInstruction(); + // BuildInstructionDependences(); std::vector op_func_nodes; interpreter::BuildOpFuncList(place_, @@ -241,13 +250,18 @@ FetchList NewIRInterpreter::BetaRun(const std::vector& feed_names, SetDeviceId(place_); if (!is_build_) { LOG_FIRST_N(INFO, 1) << "New Executor is BetaRunning."; + std::stringstream ss; + ss << this; ::ir::BuildScope(*ir_program_->block(), InnerScope(), + ss.str(), &value_2_var_name_, &variable_2_var_name_, &var_name_2_id_, &variable_list_); + VLOG(4) << DebugValueInfo(); BuildInstruction(); + BuildInstructionDependences(); for (size_t instr_id = 0; instr_id < vec_instruction_base_.size(); ++instr_id) { vec_instruction_base_[instr_id]->Run(); @@ -1534,8 +1548,27 @@ void NewIRInterpreter::BuildInstruction() { ++it) { VLOG(0) << "Build Instruction for op: " << op_idx; if ((*it)->dialect()->name() == "pd_kernel") { - vec_instruction_base_.emplace_back(std::make_unique( - op_idx++, place_, (*it), scope_, local_scope_, value_2_var_name_)); + auto op_name = (*it) + ->attributes() + .at("op_name") + .dyn_cast<::ir::StrAttribute>() + .AsString(); + if (op_name == "builtin.combine" || op_name == "builtin.slice" || + op_name == "pd.feed" || op_name == "pd.fetch" || + op_name == "builtin.set_parameter" || + op_name == "builtin.get_parameter") { + VLOG(6) << "skip process " << op_name; + continue; + } + vec_instruction_base_.emplace_back( + std::make_unique(op_idx++, + place_, + (*it), + scope_, + local_scope_, + value_2_var_name_, + var_name_2_id_, + variable_2_var_name_)); } else { PADDLE_THROW(platform::errors::Unimplemented( "Now only support pd_kernel dialect.")); @@ -1543,5 +1576,64 @@ void NewIRInterpreter::BuildInstruction() { } } +std::string NewIRInterpreter::DebugValueInfo() { + std::stringstream os; + os << "value info of interpretercore " << this << "\n" + << "value -> var_name -> id -> variable*" + << "\n"; + for (auto kv : value_2_var_name_) { + os << kv.first.impl() << " -> " << kv.second << " -> " + << var_name_2_id_.at(kv.second) << " -> " + << InnerScope()->FindVar(kv.second) << "\n"; + } + return os.str(); +} + +void NewIRInterpreter::BuildInstructionDependences() { + // analysis the dependences between instructions, add next_instr_list to each + // instr, and set the dependecy_count_ + size_t instr_num = vec_instruction_base_.size(); + dependecy_count_ = std::vector(instr_num, 0); + auto downstream_map = ir_dependency_builder_.Build(vec_instruction_base_); + + for (size_t instr_id = 0; instr_id < instr_num; ++instr_id) { + InstructionBase* cur_instr = vec_instruction_base_[instr_id].get(); + const std::set& next_instr_ids = downstream_map[instr_id]; + + if (FLAGS_new_executor_serial_run) { + for (size_t next_instr_id : next_instr_ids) { + cur_instr->AddNextInstrInSameThread(next_instr_id); + } + } else { + if (cur_instr->KernelType() == OpFuncType::kGpuAsync) { + for (size_t next_instr_id : next_instr_ids) { + if (vec_instruction_base_[next_instr_id]->KernelType() == + OpFuncType::kGpuAsync) { + cur_instr->AddNextInstrInSameThread(next_instr_id); + } else { + cur_instr->AddNextInstrInDifferentThread(next_instr_id); + } + } + } else { + bool has_instr_in_same_thread = false; + for (size_t next_instr_id : next_instr_ids) { + if (!has_instr_in_same_thread && + vec_instruction_base_[next_instr_id]->KernelType() != + OpFuncType::kGpuAsync) { + cur_instr->AddNextInstrInSameThread(next_instr_id); + has_instr_in_same_thread = true; + } else { + cur_instr->AddNextInstrInDifferentThread(next_instr_id); + } + } + } + } + + for (size_t next_instr_id : next_instr_ids) { + ++dependecy_count_[next_instr_id]; + } + } +} + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/new_executor/new_ir_interpreter.h b/paddle/fluid/framework/new_executor/new_ir_interpreter.h index 7f84fdfcdb8..dfff64e3584 100644 --- a/paddle/fluid/framework/new_executor/new_ir_interpreter.h +++ b/paddle/fluid/framework/new_executor/new_ir_interpreter.h @@ -186,17 +186,26 @@ class NewIRInterpreter : public InterpreterBaseImpl { /// ======================== /// /// For new ir /// /// ======================== /// + std::string DebugValueInfo(); + void BuildInstruction(); + void BuildInstructionDependences(); + std::unique_ptr<::ir::Program> ir_program_{nullptr}; std::vector> vec_instruction_base_; std::unordered_map<::ir::Value, std::string> value_2_var_name_; + std::unordered_map variable_2_var_name_; + std::map var_name_2_id_; + std::vector variable_list_; + + interpreter::IrDependencyBuilder ir_dependency_builder_; }; } // namespace framework diff --git a/paddle/fluid/ir/interface/op_yaml_info_parser.cc b/paddle/fluid/ir/interface/op_yaml_info_parser.cc index 58936631a9f..bf2c8329f17 100644 --- a/paddle/fluid/ir/interface/op_yaml_info_parser.cc +++ b/paddle/fluid/ir/interface/op_yaml_info_parser.cc @@ -88,6 +88,10 @@ const std::map& OpYamlInfoParser::InputName2Id() const { return input_name2id_; } +const std::map& OpYamlInfoParser::OutputName2Id() const { + return input_name2id_; +} + bool OpYamlInfoParser::HasInplace(const std::string& out_name) const { auto& inplace_info = std::get<3>(op_info_tuple_).inplace; for (size_t i = 0; i < inplace_info.size(); i++) { @@ -113,10 +117,9 @@ const std::string& OpYamlInfoParser::InplaceName( void OpYamlInfoParser::parse() { auto input_info = std::get<0>(op_info_tuple_); - int start_index = 0; - + int input_start_index = 0; for (size_t i = 0; i < input_info.size(); ++i) { - input_name2id_[input_info[i].name] = start_index++; + input_name2id_[input_info[i].name] = input_start_index++; input_name_list_.push_back(input_info[i].name); input_info_[input_info[i].name] = input_info[i]; if (!input_info[i].is_mutable_attribute) { @@ -130,8 +133,10 @@ void OpYamlInfoParser::parse() { attr_info_[attribute_info[i].name] = attribute_info[i]; } + int output_start_index = 0; auto output_info = std::get<2>(op_info_tuple_); for (size_t i = 0; i < output_info.size(); ++i) { + output_name2id_[output_info[i].name] = output_start_index++; output_name_list_.push_back(output_info[i].name); output_info_[output_info[i].name] = output_info[i]; } diff --git a/paddle/fluid/ir/interface/op_yaml_info_parser.h b/paddle/fluid/ir/interface/op_yaml_info_parser.h index b2897b0fc2e..6b600a6d70e 100644 --- a/paddle/fluid/ir/interface/op_yaml_info_parser.h +++ b/paddle/fluid/ir/interface/op_yaml_info_parser.h @@ -35,6 +35,7 @@ class OpYamlInfoParser { const std::vector& AttrParams(bool is_kernel = false) const; const OpRunTimeInfo& OpRuntimeInfo() const; const std::map& InputName2Id() const; + const std::map& OutputName2Id() const; const std::vector& InputNames() const { return input_name_list_; @@ -69,6 +70,7 @@ class OpYamlInfoParser { std::map attr_info_; // output info + std::map output_name2id_; std::vector output_name_list_; std::map output_info_; diff --git a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_adaptor.h b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_adaptor.h index 1466a580ff0..24066abecc0 100644 --- a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_adaptor.h +++ b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_adaptor.h @@ -60,9 +60,12 @@ class PhiKernelAdaptor { variable_2_var_name; std::map var_name_2_id; std::vector variable_list; + std::stringstream ss; + ss << this; BuildScope(*block, scope_, + ss.str(), &value_2_var_name, &variable_2_var_name, &var_name_2_id, diff --git a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc index f1ad5042cd6..62fffc28e18 100644 --- a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc +++ b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc @@ -49,6 +49,7 @@ using VariableNameMap = paddle::framework::Variable* CreateVar( ir::Value value, paddle::framework::Scope* inner_scope, + const std::string& var_name_prefix, bool force_persisable, std::unordered_map* value_2_var_name, std::unordered_map* @@ -65,6 +66,7 @@ paddle::framework::Variable* CreateVar( } paddle::framework::Variable* var = nullptr; + VLOG(6) << "var_name_prefix is: " << var_name_prefix; std::string name = "inner_var_" + std::to_string(variable_2_var_name->size()); if (force_persisable || is_persisable) { VLOG(6) << "Create var: " << name << " in scope " << inner_scope->root(); @@ -109,6 +111,7 @@ void CheckInputVars( void BuildValue(ir::Value value, paddle::framework::Scope* inner_scope, + const std::string& var_name_prefix, std::unordered_map* value_2_var_name, std::unordered_map* variable_2_var_name, @@ -120,6 +123,7 @@ void BuildValue(ir::Value value, } else { var = CreateVar(value, inner_scope, + var_name_prefix, false, value_2_var_name, variable_2_var_name, @@ -146,6 +150,7 @@ void BuildValue(ir::Value value, "DenseTensorType")); auto var_i = CreateVar(value, inner_scope, + var_name_prefix, false, value_2_var_name, variable_2_var_name, @@ -163,6 +168,7 @@ void BuildValue(ir::Value value, void HandleForSpecialOp( ir::Operation* op, paddle::framework::Scope* inner_scope, + const std::string& var_name_prefix, std::unordered_map* value_2_var_name, std::unordered_map* variable_2_var_name, @@ -189,6 +195,7 @@ void HandleForSpecialOp( auto value = op->result(0); auto var = CreateVar(value, inner_scope, + var_name_prefix, false, value_2_var_name, variable_2_var_name, @@ -217,6 +224,7 @@ void HandleForSpecialOp( } else { var = CreateVar(out_value, inner_scope, + var_name_prefix, false, value_2_var_name, variable_2_var_name, @@ -296,6 +304,7 @@ void HandleForSpecialOp( void HandleForInplaceOp( ir::Operation* op, paddle::framework::Scope* inner_scope, + const std::string& var_name_prefix, std::unordered_map* value_2_var_name, std::unordered_map* variable_2_var_name, @@ -328,6 +337,7 @@ void HandleForInplaceOp( } else { BuildValue(value, inner_scope, + var_name_prefix, value_2_var_name, variable_2_var_name, var_name_2_id, @@ -340,6 +350,7 @@ void HandleForInplaceOp( // created in inner_scope. void BuildScope(const ir::Block& block, paddle::framework::Scope* inner_scope, + const std::string& var_name_prefix, std::unordered_map* value_2_var_name, std::unordered_map* variable_2_var_name, @@ -367,6 +378,7 @@ void BuildScope(const ir::Block& block, op_name == "builtin.get_parameter" || op_name == "builtin.slice") { HandleForSpecialOp(op, inner_scope, + var_name_prefix, value_2_var_name, variable_2_var_name, var_name_2_id, @@ -384,6 +396,7 @@ void BuildScope(const ir::Block& block, .data()) { HandleForInplaceOp(op, inner_scope, + var_name_prefix, value_2_var_name, variable_2_var_name, var_name_2_id, @@ -393,6 +406,7 @@ void BuildScope(const ir::Block& block, for (size_t i = 0; i < op->num_results(); ++i) { BuildValue(op->result(i), inner_scope, + var_name_prefix, value_2_var_name, variable_2_var_name, var_name_2_id, diff --git a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h index 235f3dc9f35..08b9baceadf 100644 --- a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h +++ b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h @@ -43,6 +43,7 @@ namespace ir { void BuildScope(const ir::Block& block, paddle::framework::Scope* inner_scope, + const std::string& var_name_prefix, std::unordered_map* value_2_var_name, std::unordered_map* variable_2_var_name, @@ -55,15 +56,12 @@ template -void BuildPhiContext( - ir::Operation* op, - const std::unordered_map& name_map, - paddle::framework::Scope* scope, - paddle::framework::Scope* local_scope, - const paddle::dialect::OpYamlInfoParser& op_yaml_info, - Context* ctx, - std::map>* input_map = nullptr, - std::map>* output_map = nullptr) { +void BuildPhiContext(ir::Operation* op, + const std::unordered_map& name_map, + paddle::framework::Scope* scope, + paddle::framework::Scope* local_scope, + const paddle::dialect::OpYamlInfoParser& op_yaml_info, + Context* ctx) { paddle::framework::Scope* inner_scope = local_scope != nullptr ? local_scope : scope; VLOG(6) << "BuildPhiContext in scope[" << scope << "] inner_scope[" @@ -120,17 +118,6 @@ void BuildPhiContext( ir::Value ptr = op->operand(name2id.at(t)); auto in_var_name = name_map.at(ptr); - if (input_map != nullptr) { - // only deal with single input for now, [todo] need support multi input - // like concat - // TODO(phlrain): OpFuncNode need input_index and output_index, - // construct input_index and output_here, should remove input_index and - // output_index from OpFuncNode Each in_var_name named "inner_var_" + - // index, len("inner_var_") = 10 - - size_t tmp_id = std::atol(in_var_name.substr(4, 100).c_str()); - (*input_map)[std::to_string(name2id.at(t))].push_back(tmp_id); - } auto& tensor_attr_type = op_yaml_info.TensorAttrTypeName(t); VLOG(6) << "ctx->EmplaceBack mutable attr: " << t << "\t" << in_var_name; @@ -324,18 +311,6 @@ void BuildPhiContext( PADDLE_THROW( phi::errors::Unimplemented("only support DenseTensor and vector ")); } - - if (output_map != nullptr) { - // only deal with single input for now, [todo] need support multi input - // like concat - // TODO(phlrain): OpFuncNode need input_index and output_index, - // construct input_index and output_here, should remove input_index and - // output_index from OpFuncNode Each in_var_name named "inner_var_" + - // index, len("inner_var_") = 10 - - size_t tmp_id = std::atol(name.substr(4, 100).c_str()); - (*output_map)["out"].push_back(tmp_id); - } } } VLOG(6) << "Done build phi context"; -- GitLab