From ce0c5c2782c000a13e49739ac6976b17ad99a843 Mon Sep 17 00:00:00 2001 From: hong <43953930+phlrain@users.noreply.github.com> Date: Thu, 15 Jun 2023 14:15:10 +0800 Subject: [PATCH] [IR] [Baby step] New interprector support new ir (#54570) * add kernel dialect * change DenseTensorTypeStorage to DenseTensorType * add test case` * add first pd_op to kernel dialect * lower pd op to kernel dialect * update * update * remove useless code * add attrite print test * fix bug * update * update * update * update * polish code * fix bug * polish code and add python test * add test * fix test error * add env flag * fix bug * revert test env * change cc_test_old to cc_test * fix build_static bug * fix type test error * udpate cmake * disable test in windows * fix inference compile --- .../framework/new_executor/CMakeLists.txt | 11 +- .../interpreter/dependency_builder.cc | 12 +- .../interpreter/interpreter_util.cc | 78 +++++ .../interpreter/interpreter_util.h | 9 + .../interpreter/stream_analyzer.cc | 7 +- .../framework/new_executor/interpretercore.cc | 240 +++++++++----- .../framework/new_executor/interpretercore.h | 13 + .../new_executor/new_executor_defs.cc | 14 +- .../new_executor/new_executor_defs.h | 18 +- .../new_executor/standalone_executor.cc | 34 +- .../new_executor/standalone_executor.h | 2 + paddle/fluid/inference/CMakeLists.txt | 8 +- paddle/fluid/ir/CMakeLists.txt | 1 + paddle/fluid/ir/dialect/kernel_dialect.cc | 3 + paddle/fluid/ir/interface/infershape.h | 5 + paddle/fluid/ir/pass/pd_op_to_kernel_pass.cc | 22 +- .../ir/phi_kernel_adaptor/CMakeLists.txt | 7 + .../phi_kernel_adaptor/phi_kernel_adaptor.h | 168 ++++++++++ .../ir/phi_kernel_adaptor/phi_kernel_util.cc | 313 ++++++++++++++++++ .../ir/phi_kernel_adaptor/phi_kernel_util.h | 61 ++++ .../translator/attribute_translator.cc | 6 +- paddle/fluid/platform/CMakeLists.txt | 2 +- paddle/fluid/platform/init.cc | 7 + paddle/fluid/pybind/CMakeLists.txt | 4 + paddle/fluid/pybind/pybind.cc | 1 + paddle/phi/core/flags.cc | 12 + test/CMakeLists.txt | 8 + test/cpp/ir/core/CMakeLists.txt | 1 + test/cpp/ir/core/ir_exe_test.cc | 2 +- test/cpp/ir/core/ir_infershape_test.cc | 6 +- test/cpp/ir/core/phi_kernel_adaptor.h | 10 +- test/cpp/ir/core/type_test.cc | 2 +- test/cpp/ir/kernel_dialect/CMakeLists.txt | 1 + .../ir_kernel_dialect_pass_test.cc | 28 +- test/cpp/new_executor/CMakeLists.txt | 54 +-- .../standalone_executor_new_ir_test.cc | 83 +++++ test/ir/CMakeLists.txt | 1 + test/ir/new_ir/CMakeLists.txt | 10 + test/ir/new_ir/test_standalone_new_ir.py | 54 +++ 39 files changed, 1164 insertions(+), 154 deletions(-) create mode 100644 paddle/fluid/ir/phi_kernel_adaptor/CMakeLists.txt create mode 100644 paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_adaptor.h create mode 100644 paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc create mode 100644 paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h create mode 100644 test/cpp/new_executor/standalone_executor_new_ir_test.cc create mode 100644 test/ir/new_ir/CMakeLists.txt create mode 100644 test/ir/new_ir/test_standalone_new_ir.py diff --git a/paddle/fluid/framework/new_executor/CMakeLists.txt b/paddle/fluid/framework/new_executor/CMakeLists.txt index 894275697f7..bfa2fc3bbb1 100644 --- a/paddle/fluid/framework/new_executor/CMakeLists.txt +++ b/paddle/fluid/framework/new_executor/CMakeLists.txt @@ -5,8 +5,15 @@ add_subdirectory(workqueue) set(STANDALONE_EXECUTOR_SRCS interpretercore.cc new_executor_defs.cc standalone_executor.cc) -set(STANDALONE_EXECUTOR_DEPS interpreter interpretercore_garbage_collector - workqueue) +set(STANDALONE_EXECUTOR_DEPS + interpreter + interpretercore_garbage_collector + workqueue + pd_dialect + pd_op_to_kernel_pass + phi_kernel_adaptor + program_translator + new_ir) cc_library( standalone_executor diff --git a/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc b/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc index 3f0cce3c378..76fb08baca4 100644 --- a/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc +++ b/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc @@ -117,7 +117,8 @@ const std::map>& DependencyBuilder::OpDownstreamMap() void DependencyBuilder::AddDependencyForCoalesceTensorOp() { for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) { - if (instructions_->at(op_idx).OpBase()->Type() == kCoalesceTensor) { + if (instructions_->at(op_idx).OpBaseValid() && + instructions_->at(op_idx).OpBase()->Type() == kCoalesceTensor) { VLOG(4) << "Add depend for " << kCoalesceTensor << " " << op_idx; auto fused_out = instructions_->at(op_idx).Outputs().at("FusedOutput")[0]; auto outputs = instructions_->at(op_idx).Outputs().at("Output"); @@ -224,7 +225,8 @@ void DependencyBuilder::AddDependencyForCommunicationOp() { const std::string kSyncComm = "c_sync_comm_stream"; dependence_op_idx = ULLONG_MAX; for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) { - if (instructions_->at(op_idx).OpBase()->Type() == kSyncComm) { + if (instructions_->at(op_idx).OpBaseValid() && + instructions_->at(op_idx).OpBase()->Type() == kSyncComm) { dependence_op_idx = op_idx; } else { if (dependence_op_idx != ULLONG_MAX) { @@ -251,7 +253,8 @@ void DependencyBuilder::AddDependencyForRandomOp() { size_t dependence_op_idx = ULLONG_MAX; for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) { - if (random_op_set.count(instructions_->at(op_idx).OpBase()->Type())) { + if (instructions_->at(op_idx).OpBaseValid() && + random_op_set.count(instructions_->at(op_idx).OpBase()->Type())) { if (dependence_op_idx != ULLONG_MAX) { AddDownstreamOp(dependence_op_idx, op_idx); } @@ -275,7 +278,8 @@ void DependencyBuilder::AddDependencyForReadOp() { std::vector read_ops; std::vector startup_ops; for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) { - if (instructions_->at(op_idx).OpBase()->Type() == "read") { + if (instructions_->at(op_idx).OpBaseValid() && + instructions_->at(op_idx).OpBase()->Type() == "read") { read_ops.push_back(op_idx); } diff --git a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc index 0de9a11cbba..d676db4d6fe 100644 --- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc +++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc @@ -22,6 +22,9 @@ #include "paddle/fluid/framework/new_executor/interpreter/data_transfer.h" #include "paddle/fluid/framework/new_executor/interpreter/execution_config.h" #include "paddle/fluid/framework/new_executor/interpreter/static_build.h" +#include "paddle/fluid/ir/dialect/pd_dialect.h" +#include "paddle/fluid/ir/interface/op_yaml_info.h" +#include "paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h" #include "paddle/fluid/memory/stats.h" #include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h" #include "paddle/fluid/operators/controlflow/recurrent_op_helper.h" @@ -138,6 +141,9 @@ bool IsCommunicationOp(const std::string& op_name) { } bool IsCommunicationOp(const Instruction& instr) { + if (!instr.OpBaseValid()) { + return false; + } return IsCommunicationOp(instr.OpBase()->Type()); } @@ -924,6 +930,78 @@ void BuildOpFuncList(const platform::Place& place, delete garbages; } +void BuildOpFuncList( + const platform::Place& place, + ::ir::Block* block, + std::vector* vec_func_list, + framework::Scope* scope, + const std::unordered_map<::ir::Value, std::string>& value_2_name_map, + const ExecutionConfig& execution_config) { + vec_func_list->reserve(block->size()); + ::ir::IrContext* ctx = ir::IrContext::Instance(); + + ctx->GetOrRegisterDialect(); + + for (auto it = block->begin(); it != block->end(); ++it) { + OpFuncNode op_func_node; + auto attr_map = (*it)->attributes(); + + auto op_name = attr_map.at("op_name").dyn_cast<::ir::StrAttribute>().data(); + + if (op_name == "pd.fetch") { + VLOG(6) << "skip process pd.fetch op"; + continue; + } + op_func_node.phi_op_name_ = op_name; + + ::ir::OpInfo op_info = ctx->GetRegisteredOpInfo(op_name); + + auto impl = + op_info.GetInterfaceImpl(); + auto yaml_info = impl->get_op_info_(); + + auto attr_info = std::get<1>(yaml_info); + + op_func_node.infer_shape_interface_ = + op_info.GetInterfaceImpl(); + + ::ir::BuildInferMetaContext((*it), + value_2_name_map, + scope, + yaml_info, + &(op_func_node.infer_meta_context_)); + + auto kernel_name = + attr_map.at("kernel_name").dyn_cast().data(); + auto kernel_key = attr_map.at("kernel_key") + .dyn_cast() + .data(); + auto t1 = + phi::KernelFactory::Instance().SelectKernel(kernel_name, kernel_key); + op_func_node.phi_kernel_ = new phi::Kernel(t1); + + PADDLE_ENFORCE_EQ(op_func_node.phi_kernel_->IsValid(), + true, + "not found kernel for [%s]", + kernel_name); + ::ir::BuildPhiKernelContext((*it), + value_2_name_map, + scope, + yaml_info, + &(op_func_node.kernel_context_), + &(op_func_node.input_index), + &(op_func_node.output_index)); + + op_func_node.kernel_context_.SetDeviceContext( + phi::DeviceContextPool::Instance().Get( + phi::TransToPhiPlace(kernel_key.backend()))); + op_func_node.dev_ctx_ = phi::DeviceContextPool::Instance().Get( + phi::TransToPhiPlace(kernel_key.backend())); + + vec_func_list->emplace_back(op_func_node); + } +} + void BuildVariableScope(const framework::BlockDesc& block, const ExecutionConfig& execution_config, VariableScope* var_scope) { diff --git a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.h b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.h index f31dd7f789d..fb32b4f80d9 100644 --- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.h +++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.h @@ -38,6 +38,7 @@ #include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/init.h" +#include "paddle/ir/core/program.h" using AtomicVectorSizeT = std::vector>; @@ -93,6 +94,14 @@ void BuildOpFuncList(const platform::Place& place, bool use_local_scope = true, bool static_build = false); +void BuildOpFuncList( + const platform::Place& place, + ::ir::Block* block, + std::vector* vec_func_list, + framework::Scope* scope, + const std::unordered_map<::ir::Value, std::string>& value_2_name_map, + const ExecutionConfig& execution_config); + void BuildVariableScope(const framework::BlockDesc& block, const ExecutionConfig& execution_config, VariableScope* var_scope); diff --git a/paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc b/paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc index 693365c9f47..acde737e9e0 100644 --- a/paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc +++ b/paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc @@ -143,6 +143,9 @@ void StreamAnalyzer::ConstructEvents( DeviceContext* StreamAnalyzer::ParseDeviceContext( const OpFuncNode& op_func_node) const { auto& op = op_func_node.operator_base_; + if (op == nullptr) { + return op_func_node.dev_ctx_; + } auto& op_type = op->Type(); const std::string& execution_stream = op_func_node.execution_stream_; const int stream_priority = op_func_node.stream_priority_; @@ -201,7 +204,9 @@ DeviceContext* StreamAnalyzer::ParseDeviceContext( #endif } - SetDeviceCommContext(op.get(), op_func_node.dev_ctx_); + if (op != nullptr) { + SetDeviceCommContext(op.get(), op_func_node.dev_ctx_); + } return op_func_node.dev_ctx_; } diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index dc3674e8d80..f005b7d1072 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -32,6 +32,7 @@ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif +#include "paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h" #include "paddle/fluid/platform/cuda_graph_with_memory_pool.h" #include "paddle/fluid/platform/flags.h" #include "paddle/phi/backends/device_manager.h" @@ -60,6 +61,8 @@ PHI_DECLARE_bool(new_executor_use_cuda_graph); PHI_DECLARE_bool(sync_nccl_allreduce); #endif +PHI_DECLARE_bool(enable_new_ir_in_executor); + constexpr const char* kExceptionCaught = "ExceptionCaught"; constexpr const char* kTaskCompletion = "TaskCompletion"; @@ -145,6 +148,58 @@ InterpreterCore::InterpreterCore(const platform::Place& place, PrepareForCUDAGraphCapture(); } +InterpreterCore::InterpreterCore(const platform::Place& place, + const BlockDesc& block, + framework::Scope* scope, + ::ir::Program* ir_prog, + const ExecutionConfig& execution_config) + : place_(place), + block_(block), + stream_analyzer_(place), + execution_config_(execution_config), + var_scope_(scope), + ir_program_(ir_prog) { + VLOG(4) << "InterpreterCore(): " << this << " on " << place_; + + static_build_ = FLAGS_new_executor_static_build && + !FLAGS_new_executor_use_cuda_graph && + !execution_config.used_for_control_flow_op && + interpreter::BlockCanBeStaticBuilt(block); + + exception_notifier_ = main_thread_blocker_.RegisterEvent(kExceptionCaught); + completion_notifier_ = main_thread_blocker_.RegisterEvent(kTaskCompletion); + + if (!FLAGS_new_executor_use_local_scope) { + execution_config_.create_local_scope = false; + } + execution_config_.AnalyzeThreadPoolConfig(place, block.OpSize()); + execution_config_.Log(/*log_level=*/8); + + if (execution_config_.create_local_scope) { + auto local_scope = &var_scope_.GetMutableScope()->NewScope(); + local_scope_ = local_scope; + } + + // force use outer scope for now + local_scope_ = scope; + static_build_ = true; + + var_scope_.SetLocalScope(local_scope_); + + instruction_scheduling_priority_less = [this](size_t lhs, size_t rhs) { + SchedulingPriority lhs_scheduling_priority = + vec_instruction_[lhs].GetSchedulingPriority(); + SchedulingPriority rhs_scheduling_priority = + vec_instruction_[rhs].GetSchedulingPriority(); + if (lhs_scheduling_priority == rhs_scheduling_priority) { + return lhs < rhs; + } + return lhs_scheduling_priority > rhs_scheduling_priority; + }; + + PrepareForCUDAGraphCapture(); +} + InterpreterCore::~InterpreterCore() { // cancle gc's thread gc_.reset(nullptr); @@ -166,8 +221,9 @@ void InterpreterCore::RunImpl() { interpreter::ResetAtomicGuard guard(&deps_, &refs_); - if ((execution_config_.used_for_jit || execution_config_.used_for_cinn) && - (sync_op_num_ == 0)) { + if (FLAGS_enable_new_ir_in_executor || + ((execution_config_.used_for_jit || execution_config_.used_for_cinn) && + (sync_op_num_ == 0))) { VLOG(4) << "Tracing Instruction List"; TraceInstructionList(vec_instruction_); } else { @@ -196,6 +252,7 @@ paddle::framework::FetchList InterpreterCore::Run( #endif bool is_build = is_build_; + Prepare(feed_names, feed_tensors, is_build); if (is_build) { @@ -235,19 +292,31 @@ paddle::framework::FetchList InterpreterCore::Run( if (!is_build_) { LOG_FIRST_N(INFO, 1) << "New Executor is Running."; - paddle::framework::interpreter::BuildVariableScope( - block_, execution_config_, &var_scope_); + if (FLAGS_enable_new_ir_in_executor) { + ::ir::BuildScope( + ir_program_->block(), local_scope_, &value_2_var_name_map_); + } else { + interpreter::BuildVariableScope(block_, execution_config_, &var_scope_); + } std::vector op_func_nodes; - paddle::framework::interpreter::BuildOpFuncList( - place_, - block_, - execution_config_.skip_gc_vars, - &op_func_nodes, - &var_scope_, - execution_config_, - HasLocalScope(), - static_build_); + if (FLAGS_enable_new_ir_in_executor) { + interpreter::BuildOpFuncList(place_, + ir_program_->block(), + &op_func_nodes, + local_scope_, + value_2_var_name_map_, + execution_config_); + } else { + interpreter::BuildOpFuncList(place_, + block_, + execution_config_.skip_gc_vars, + &op_func_nodes, + &var_scope_, + execution_config_, + HasLocalScope(), + static_build_); + } SetFeedVarsInplaceSkip(feed_names); // convert vec func_list to graph Convert(&op_func_nodes); @@ -656,7 +725,6 @@ void InterpreterCore::Convert( } #endif } - BuildOperatorDependences(); // NOTE(Ruibiao): For cross-step stream synchronization, an event may be @@ -666,13 +734,13 @@ void InterpreterCore::Convert( // work and WaitEvent always return succeed immediately, we omit the // prelude-record for the first step here. stream_analyzer_.ConstructEvents(&vec_instruction_); - // add event for the input var of jit program, since there are async copied // from gpu_pinned place to gpu place on compute stream. for (size_t i = 0; i < dependecy_count_.size(); ++i) { if (dependecy_count_[i] == 0) { auto& inst = vec_instruction_[i]; - if (inst.OpBase()->Type() == interpreter::kMemcpyD2H && + if (inst.OpBaseValid() && + inst.OpBase()->Type() == interpreter::kMemcpyD2H && platform::is_gpu_place(place_)) { for (auto& item : inst.Inputs()) { for (auto var_id : item.second) { @@ -692,61 +760,65 @@ void InterpreterCore::Convert( } // calculate last_live_ops_ - for (size_t op_idx = 0; op_idx < op_nums; ++op_idx) { - Instruction& instr = vec_instruction_[op_idx]; - OpInOutInfo info; - info.Build(instr.OpBase()); + if (!FLAGS_enable_new_ir_in_executor) { + for (size_t op_idx = 0; op_idx < op_nums; ++op_idx) { + Instruction& instr = vec_instruction_[op_idx]; + OpInOutInfo info; + if (instr.OpBaseValid()) { + info.Build(instr.OpBase()); + } - std::set gc_check_vars; + std::set gc_check_vars; - const std::map>& ins = instr.Inputs(); - const std::map>& outs = instr.Outputs(); - std::multimap> ins_and_outs{ins.begin(), - ins.end()}; - ins_and_outs.insert(outs.begin(), outs.end()); + const std::map>& ins = instr.Inputs(); + const std::map>& outs = instr.Outputs(); + std::multimap> ins_and_outs{ins.begin(), + ins.end()}; + ins_and_outs.insert(outs.begin(), outs.end()); - for (auto& item : ins_and_outs) { - for (auto id : item.second) { - if (id == kEmptyVarIndex) { - continue; - } - auto* var_desc = var_scope_.VarDesc(id); - // skip no_need_buffer input vars - if (var_desc && ins.count(item.first) && - !info.IsInArgBufferNeeded(var_desc->Name())) { - continue; - } - // skip when this var is not in block and not a data_transferred var, - // which means this var is managed by other block - const auto& var_name = var_scope_.GetNameById(id); - bool not_owned = !block_.HasVar(var_name); - const auto& transferred_vars = var_scope_.DataTransferAddedVars(); - bool not_transferred = - std::all_of(transferred_vars.begin(), - transferred_vars.end(), - [&](const std::pair& elem) { - return elem.first != var_name; - }); - if (not_owned && not_transferred) { - VLOG(10) << "[gc_check_inputs] skip gc: " << var_name; - continue; + for (auto& item : ins_and_outs) { + for (auto id : item.second) { + if (id == kEmptyVarIndex) { + continue; + } + auto* var_desc = var_scope_.VarDesc(id); + // skip no_need_buffer input vars + if (var_desc && ins.count(item.first) && + !info.IsInArgBufferNeeded(var_desc->Name())) { + continue; + } + // skip when this var is not in block and not a data_transferred var, + // which means this var is managed by other block + const auto& var_name = var_scope_.GetNameById(id); + bool not_owned = !block_.HasVar(var_name); + const auto& transferred_vars = var_scope_.DataTransferAddedVars(); + bool not_transferred = + std::all_of(transferred_vars.begin(), + transferred_vars.end(), + [&](const std::pair& elem) { + return elem.first != var_name; + }); + if (not_owned && not_transferred) { + VLOG(10) << "[gc_check_inputs] skip gc: " << var_name; + continue; + } + gc_check_vars.insert(id); } - gc_check_vars.insert(id); } - } - for (auto var_id : gc_check_vars) { - Scope* inner_scope = - HasLocalScope() ? local_scope_ : var_scope_.GetMutableScope(); - paddle::framework::Variable* var = - inner_scope->FindVar(var_scope_.GetNameById(var_id)); - if (var->IsType() || var->IsType() || - var->IsType()) { - last_live_ops_[var_id].insert(op_idx); - } else { - VLOG(4) << "not clear " << var_scope_.GetNameById(var_id) << " after " - << instr.OpBase()->Type() << " because its type is " - << framework::ToTypeName(var->Type()); + for (auto var_id : gc_check_vars) { + Scope* inner_scope = + HasLocalScope() ? local_scope_ : var_scope_.GetMutableScope(); + paddle::framework::Variable* var = + inner_scope->FindVar(var_scope_.GetNameById(var_id)); + if (var->IsType() || + var->IsType() || var->IsType()) { + last_live_ops_[var_id].insert(op_idx); + } else { + VLOG(4) << "not clear " << var_scope_.GetNameById(var_id) << " after " + << instr.OpBase()->Type() << " because its type is " + << framework::ToTypeName(var->Type()); + } } } } @@ -791,21 +863,22 @@ void InterpreterCore::Convert( last_live_ops_[i] = minumum_last_live_ops; vec_meta_info[i].var_ref_count_ = last_live_ops_[i].size(); } - - for (size_t i = 0; i < vec_instruction_.size(); ++i) { - BuildAndCacheInstructionCtx(&vec_instruction_[i]); + if (!FLAGS_enable_new_ir_in_executor) { + for (size_t i = 0; i < vec_instruction_.size(); ++i) { + BuildAndCacheInstructionCtx(&vec_instruction_[i]); + } } - bool inplaced = false; for (const Instruction& inst : vec_instruction_) { - if (inst.OpBase()->Type() == "share_buffer" || - inst.OpBase()->Type() == "share_data") { + if (inst.OpBaseValid() && (inst.OpBase()->Type() == "share_buffer" || + inst.OpBase()->Type() == "share_data")) { VLOG(4) << "Already inplaced, skip inplace now."; inplaced = true; } } - if (FLAGS_new_executor_use_inplace && !inplaced) { + if (!FLAGS_enable_new_ir_in_executor && FLAGS_new_executor_use_inplace && + !inplaced) { BuildInplace(); } @@ -996,22 +1069,30 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) { ? "kGpuSync" : "kGpuAsync")) << " runs on " << platform::GetCurrentThreadName(); - - auto* op = instr_node.OpBase(); - platform::RecordEvent instruction_event( - op->Type(), platform::TracerEventType::Operator, 1); + OperatorBase* op = nullptr; + if (instr_node.OpBaseValid()) { + op = instr_node.OpBase(); + platform::RecordEvent instruction_event( + op->Type(), platform::TracerEventType::Operator, 1); + } SetDeviceId(instr_node.DeviceContext().GetPlace()); try { instr_node.WaitEvent(place_); - if (!instr_node.IsArtificial()) { + if (instr_node.PreDefineContext()) { + VLOG(5) << "run new ir selected kernel"; + auto op_func_node = const_cast((instr_node.OpFunc())); + op_func_node->infer_shape_interface_->infer_shape_( + &(op_func_node->infer_meta_context_)); + (*(op_func_node->phi_kernel_))(&(op_func_node->kernel_context_)); + + } else if (!instr_node.IsArtificial()) { RunOperator(instr_node); CheckGC(instr_node); interpreter::LogDeviceMemoryStats(place_); } - instr_node.RecordEvent(place_); } catch (platform::EnforceNotMet& ex) { framework::InsertCallStackInfo(op->Type(), op->Attrs(), &ex); @@ -1437,7 +1518,8 @@ void InterpreterCore::TraceInstructionList( void InterpreterCore::RecordMemcpyD2H(const Instruction& instr_node) { // NOTE(zhiqiu): hot fix for jit input var - if (instr_node.OpBase()->Type() == interpreter::kMemcpyD2H) { + if (instr_node.OpBaseValid() && + instr_node.OpBase()->Type() == interpreter::kMemcpyD2H) { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto* default_dev_ctx = pool.Get(place_); for (auto& event : instr_node.EventsToWait()) { diff --git a/paddle/fluid/framework/new_executor/interpretercore.h b/paddle/fluid/framework/new_executor/interpretercore.h index 904bfc5ec69..afaa09db516 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.h +++ b/paddle/fluid/framework/new_executor/interpretercore.h @@ -32,6 +32,8 @@ #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/memory/allocation/spin_lock.h" #include "paddle/fluid/platform/device_event.h" +#include "paddle/ir/core/program.h" +#include "paddle/ir/core/value.h" DECLARE_bool(new_executor_use_local_scope); @@ -52,6 +54,12 @@ class InterpreterCore { Scope* scope, const ExecutionConfig& execution_config = ExecutionConfig()); + InterpreterCore(const platform::Place& place, + const BlockDesc& block, + Scope* scope, + ::ir::Program* ir_prog, + const ExecutionConfig& execution_config = ExecutionConfig()); + ~InterpreterCore(); paddle::framework::FetchList Run( @@ -191,6 +199,11 @@ class InterpreterCore { InstructionSchedulingPriorityLess instruction_scheduling_priority_less; std::vector hookfuncs_; + + // The next only for new IR + ::ir::Program* ir_program_{nullptr}; + + std::unordered_map<::ir::Value, std::string> value_2_var_name_map_; }; } // namespace framework diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.cc b/paddle/fluid/framework/new_executor/new_executor_defs.cc index cd4e12748b6..eeb5142546f 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.cc +++ b/paddle/fluid/framework/new_executor/new_executor_defs.cc @@ -152,10 +152,18 @@ void VariableScope::CheckExist(const std::string& name) const { Instruction::Instruction(size_t id, OpFuncNode&& op_func_node, const platform::DeviceContext& dev_ctx) - : is_artificial_(op_func_node.operator_base_->Type() == "depend"), + : is_artificial_(false), id_(id), op_func_node_(op_func_node), dev_ctx_(dev_ctx) { + if (op_func_node.operator_base_ != nullptr && + op_func_node.operator_base_->Type() == "depend") { + is_artificial_ = true; + } + + if (op_func_node_.infer_shape_interface_ != nullptr) { + pre_define_context_ = true; + } PADDLE_ENFORCE_GE(id, 0, platform::errors::PreconditionNotMet( @@ -218,6 +226,10 @@ OperatorBase* Instruction::OpBase() const { return op_base.get(); } +bool Instruction::OpBaseValid() const { + return op_func_node_.operator_base_ != nullptr; +} + void Instruction::AddGCCheckVar(size_t id) { gc_check_vars_.push_back(id); } const std::vector& Instruction::GCCheckVars() const { diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.h b/paddle/fluid/framework/new_executor/new_executor_defs.h index d8219fac28b..0742568e939 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.h +++ b/paddle/fluid/framework/new_executor/new_executor_defs.h @@ -20,6 +20,7 @@ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/variable_helper.h" +#include "paddle/fluid/ir/interface/infershape.h" #include "paddle/fluid/platform/device_event_base.h" #include "paddle/fluid/platform/event.h" #include "paddle/phi/core/utils/rw_lock.h" @@ -164,13 +165,20 @@ struct OpFuncNode { std::map> output_index; // TODO(zhiqiu): Better make it unique_ptr - std::shared_ptr operator_base_; + std::shared_ptr operator_base_{nullptr}; std::string execution_stream_{kDefaultStream}; OpFuncType type_; OpKernelComputeFunc kernel_func_; SchedulingPriority scheduling_priority_{0}; // lower value, higher priority + + // the next only for new IR + phi::KernelContext kernel_context_; + phi::InferMetaContext infer_meta_context_; + std::string phi_op_name_; + paddle::dialect::InferShapeInterface::Concept* infer_shape_interface_{ + nullptr}; }; class Instruction { @@ -234,6 +242,8 @@ class Instruction { OperatorBase* OpBase() const; + bool OpBaseValid() const; + void AddGCCheckVar(size_t id); const std::vector& GCCheckVars() const; @@ -263,6 +273,10 @@ class Instruction { return op_func_node_.scheduling_priority_; } + bool PreDefineContext() const { return pre_define_context_; } + + const OpFuncNode* OpFunc() const { return &op_func_node_; } + private: bool is_artificial_; // Instruction is artificial means that it is only used // to assist scheduling and no need to be executed. @@ -285,6 +299,8 @@ class Instruction { std::vector gc_check_vars_; std::vector> vec_inplace_in_to_out_; + + bool pre_define_context_{false}; }; namespace interpreter { diff --git a/paddle/fluid/framework/new_executor/standalone_executor.cc b/paddle/fluid/framework/new_executor/standalone_executor.cc index 6745fec7ca3..0a8a38b794b 100644 --- a/paddle/fluid/framework/new_executor/standalone_executor.cc +++ b/paddle/fluid/framework/new_executor/standalone_executor.cc @@ -16,11 +16,29 @@ #include "paddle/fluid/framework/new_executor/interpreter/interpreter_util.h" #include "paddle/fluid/platform/profiler/event_tracing.h" +#include "paddle/fluid/ir/pass/pd_op_to_kernel_pass.h" + +#include "paddle/fluid/ir_adaptor/translator/translate.h" + +PHI_DECLARE_bool(enable_new_ir_in_executor); + namespace paddle { namespace framework { StandaloneExecutor::StandaloneExecutor(const platform::Place& place, const std::vector& programs) - : place_(place), programs_(programs) {} + : place_(place), programs_(programs) { + if (FLAGS_enable_new_ir_in_executor) { + for (size_t i = 0; i < programs_.size(); ++i) { + VLOG(6) << "begin to translate" << std::endl; + auto base_progrm = paddle::TranslateLegacyProgramToProgram(programs_[i]); + + auto kernel_program = + paddle::dialect::PdOpLowerToKernelPass(base_progrm.get()); + + ir_programs_.emplace_back(std::move(kernel_program)); + } + } +} paddle::framework::FetchList StandaloneExecutor::Run( Scope* scope, @@ -39,6 +57,7 @@ paddle::framework::FetchList StandaloneExecutor::Run( 0, interpreter::ExecutionConfig()); VLOG(4) << "StandaloneExecutor: " << this << ", InterpreterCore: " << core; + return core->Run(feed_names); } else { // run multiple programs VLOG(6) << "Run multiple program, programs_.size() " << programs_.size(); @@ -97,8 +116,17 @@ std::shared_ptr StandaloneExecutor::GetInterpreterCore( if (iter == interpretercores_.end()) { VLOG(3) << "create interpreter_core for " << oss.str() << " on place " << place_; - std::shared_ptr core = std::make_shared( - place_, program.Block(0), scope, execution_config); + std::shared_ptr core = nullptr; + if (FLAGS_enable_new_ir_in_executor) { + core = std::make_shared(place_, + program.Block(0), + scope, + ir_programs_[program_idx].get(), + execution_config); + } else { + core = std::make_shared( + place_, program.Block(0), scope, execution_config); + } interpretercores_.emplace(oss.str(), core); return core; } else { diff --git a/paddle/fluid/framework/new_executor/standalone_executor.h b/paddle/fluid/framework/new_executor/standalone_executor.h index 76de149e952..ee3a9af5b6f 100644 --- a/paddle/fluid/framework/new_executor/standalone_executor.h +++ b/paddle/fluid/framework/new_executor/standalone_executor.h @@ -23,6 +23,7 @@ #include "paddle/fluid/framework/new_executor/new_executor_defs.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/platform/place.h" +#include "paddle/ir/core/program.h" namespace paddle { namespace framework { @@ -54,6 +55,7 @@ class StandaloneExecutor { const platform::Place place_; const std::vector programs_; + std::vector> ir_programs_; std::vector microbatch_scopes_; std::unordered_map> diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index 598997360d5..e1de44fa37b 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -58,14 +58,14 @@ set(KERNEL_LIST #windows GPU static library over the limit, so not create_static_lib, and cc_library is dummy if(WIN32 AND WITH_GPU) - cc_library(paddle_inference DEPS ${fluid_modules} ${STATIC_INFERENCE_API} - ${utils_modules}) + cc_library(paddle_inference DEPS ${fluid_modules} new_ir + ${STATIC_INFERENCE_API} ${utils_modules}) else() # message("${fluid_modules}") # message("${STATIC_INFERENCE_API}") # message("${utils_modules}") # message("${phi_modules}") - create_static_lib(paddle_inference ${phi_modules} ${fluid_modules} + create_static_lib(paddle_inference ${phi_modules} ${fluid_modules} new_ir ${STATIC_INFERENCE_API} ${utils_modules}) endif() @@ -96,7 +96,7 @@ set(SHARED_INFERENCE_SRCS # shared inference library deps list(REMOVE_ITEM fluid_modules standalone_executor interpretercore_garbage_collector) -set(SHARED_INFERENCE_DEPS phi ${fluid_modules} analysis_predictor +set(SHARED_INFERENCE_DEPS phi new_ir ${fluid_modules} analysis_predictor ${utils_modules}) if(WITH_CRYPTO) diff --git a/paddle/fluid/ir/CMakeLists.txt b/paddle/fluid/ir/CMakeLists.txt index 19c5c5d7310..f4e88d5dc1e 100644 --- a/paddle/fluid/ir/CMakeLists.txt +++ b/paddle/fluid/ir/CMakeLists.txt @@ -1,3 +1,4 @@ add_subdirectory(interface) add_subdirectory(dialect) add_subdirectory(pass) +add_subdirectory(phi_kernel_adaptor) diff --git a/paddle/fluid/ir/dialect/kernel_dialect.cc b/paddle/fluid/ir/dialect/kernel_dialect.cc index ecf2e3996a0..be7ebceda75 100644 --- a/paddle/fluid/ir/dialect/kernel_dialect.cc +++ b/paddle/fluid/ir/dialect/kernel_dialect.cc @@ -24,9 +24,12 @@ #include "paddle/fluid/ir/dialect/kernel_type_storage.h" #include "paddle/fluid/ir/dialect/pd_op.h" #include "paddle/fluid/ir/dialect/utils.h" +#include "paddle/fluid/platform/init_phi.h" #include "paddle/ir/core/dialect_interface.h" #include "paddle/phi/core/dense_tensor.h" +REGISTER_FILE_SYMBOLS(kernel_dialect); + namespace paddle { namespace dialect { diff --git a/paddle/fluid/ir/interface/infershape.h b/paddle/fluid/ir/interface/infershape.h index 4c5b72b6127..7b803abb721 100644 --- a/paddle/fluid/ir/interface/infershape.h +++ b/paddle/fluid/ir/interface/infershape.h @@ -16,6 +16,8 @@ #include "paddle/ir/core/op_base.h" #include "paddle/phi/core/infermeta_utils.h" +namespace paddle { +namespace dialect { class InferShapeInterface : public ir::OpInterfaceBase { public: struct Concept { @@ -43,3 +45,6 @@ class InferShapeInterface : public ir::OpInterfaceBase { private: Concept *impl_; }; + +} // namespace dialect +} // namespace paddle diff --git a/paddle/fluid/ir/pass/pd_op_to_kernel_pass.cc b/paddle/fluid/ir/pass/pd_op_to_kernel_pass.cc index b9f7e9f591b..13834e4d9cb 100644 --- a/paddle/fluid/ir/pass/pd_op_to_kernel_pass.cc +++ b/paddle/fluid/ir/pass/pd_op_to_kernel_pass.cc @@ -160,12 +160,16 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog) { // only for single output // need update new kernel key layout and data tyep - auto allocated_dense_tensor_dtype = - paddle::dialect::AllocatedDenseTensorType::get( - ctx, - phi::TransToPhiPlace(kernel_key.backend()), - (*it)->result(0).type().dyn_cast()); + std::vector op_output_types; + if ((*it)->num_results() > 0) { + auto allocated_dense_tensor_dtype = + paddle::dialect::AllocatedDenseTensorType::get( + ctx, + phi::TransToPhiPlace(kernel_key.backend()), + (*it)->result(0).type().dyn_cast()); + op_output_types.push_back(allocated_dense_tensor_dtype); + } // constuct input std::vector vec_inputs; if ((*it)->name() != "pd.full_" && (*it)->num_operands() > 0) { @@ -195,10 +199,14 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog) { } ir::Operation* op1 = ir::Operation::Create( - vec_inputs, op1_attribute, {allocated_dense_tensor_dtype}, op1_info); + vec_inputs, op1_attribute, op_output_types, op1_info); map_op_pair[*it] = op1; - map_value_pair[(*it)->result(0)] = op1->result(0); + + // only deal with single output + if ((*it)->num_results() > 0) { + map_value_pair[(*it)->result(0)] = op1->result(0); + } program->block()->push_back(op1); } diff --git a/paddle/fluid/ir/phi_kernel_adaptor/CMakeLists.txt b/paddle/fluid/ir/phi_kernel_adaptor/CMakeLists.txt new file mode 100644 index 00000000000..dc048d2917b --- /dev/null +++ b/paddle/fluid/ir/phi_kernel_adaptor/CMakeLists.txt @@ -0,0 +1,7 @@ +# All source files of pd_dialect, except for the source file of op, which is generated in the compilation directory. +file(GLOB PHI_KERNEL_ADAPTOR_SRCS "*.cc") + +cc_library( + phi_kernel_adaptor + SRCS ${PHI_KERNEL_ADAPTOR_SRCS} + DEPS new_ir phi_utils) diff --git a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_adaptor.h b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_adaptor.h new file mode 100644 index 00000000000..35b85e750db --- /dev/null +++ b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_adaptor.h @@ -0,0 +1,168 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/ir/dialect/pd_dialect.h" +#include "paddle/fluid/ir/dialect/pd_op.h" +#include "paddle/fluid/ir/dialect/pd_type.h" +#include "paddle/fluid/ir/dialect/utils.h" +#include "paddle/fluid/ir/interface/infershape.h" +#include "paddle/fluid/ir/interface/op_yaml_info.h" +#include "paddle/ir/core/builtin_attribute.h" +#include "paddle/ir/core/builtin_dialect.h" +#include "paddle/ir/core/builtin_op.h" +#include "paddle/ir/core/ir_context.h" +#include "paddle/ir/core/program.h" +#include "paddle/ir/core/utils.h" +#include "paddle/phi/core/meta_tensor.h" +#include "paddle/phi/infermeta/binary.h" +#include "paddle/phi/kernels/elementwise_add_kernel.h" + +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/framework/variable.h" +#include "paddle/fluid/framework/variable_helper.h" + +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/kernel_context.h" +#include "paddle/phi/core/kernel_factory.h" + +#include "paddle/fluid/platform/init.h" + +#include "paddle/fluid/ir/dialect/kernel_attribute.h" +#include "paddle/fluid/ir/dialect/pd_attribute.h" + +#include "glog/logging.h" +#include "paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h" + +class PhiKernelAdaptor { + public: + explicit PhiKernelAdaptor(paddle::framework::Scope* scope) : scope_(scope) {} + + void run(ir::Program* program) { + auto block = program->block(); + std::unordered_map name_map; + std::cerr << "run here" << std::endl; + ir::BuildScope(block, scope_, &name_map); + std::cerr << "after buid scope" << std::endl; + auto* dev_ctx = phi::DeviceContextPool::Instance().Get(phi::CPUPlace()); + phi::Place cpu_place(phi::AllocationType::CPU); + for (auto it = block->begin(); it != block->end(); ++it) { + VLOG(6) << "begin to run op " << (*it)->name(); + + std::cerr << (*it)->name() << std::endl; + auto attr_map = (*it)->attributes(); + + paddle::dialect::OpYamlInfoInterface op_info_interface = + (*it)->dyn_cast(); + auto op_info_res = op_info_interface.GetOpInfo(); + + paddle::dialect::InferShapeInterface interface = + (*it)->dyn_cast(); + phi::InferMetaContext ctx; + + ir::BuildInferMetaContext((*it), name_map, scope_, op_info_res, &ctx); + + interface.InferShape(&ctx); + + auto runtime_info = std::get<3>(op_info_res); + + auto phi_kernels = phi::KernelFactory::Instance().SelectKernelMap( + runtime_info.kernel_func[0]); + + phi::KernelKey kernel_key(phi::TransToPhiBackend(cpu_place), + phi::DataLayout::ANY, + phi::DataType::FLOAT32); + if (runtime_info.kernel_func[0] == "full_int_array") { + kernel_key.set_dtype(phi::DataType::INT64); + } + auto found_it = phi_kernels.find(kernel_key); + if (found_it == phi_kernels.end()) { + std::cerr << "kernel name " << runtime_info.kernel_func[0] << std::endl; + std::cerr << "kernel key " << kernel_key.backend() << "\t" + << kernel_key.dtype() << "\t" << kernel_key.layout() + << std::endl; + PADDLE_THROW(paddle::platform::errors::NotFound( + "can not found kerenl for [%s]", (*it)->name())); + } else { + phi::KernelContext kernel_ctx(dev_ctx); + + ir::BuildPhiKernelContext( + (*it), name_map, scope_, op_info_res, &kernel_ctx); + found_it->second(&kernel_ctx); + + auto out_value = (*it)->result(0); + out_name = name_map[out_value]; + } + } + } + + void run_kernel_prog(ir::Program* program) { + auto block = program->block(); + std::unordered_map name_map; + BuildScope(block, scope_, &name_map); + ir::IrContext* ctx = ir::IrContext::Instance(); + + ctx->GetOrRegisterDialect(); + + auto* dev_ctx = phi::DeviceContextPool::Instance().Get(phi::CPUPlace()); + phi::Place cpu_place(phi::AllocationType::CPU); + for (auto it = block->begin(); it != block->end(); ++it) { + auto attr_map = (*it)->attributes(); + + auto op_name = attr_map.at("op_name").dyn_cast().data(); + + ir::OpInfo op1_info = ctx->GetRegisteredOpInfo(op_name); + + auto impl = + op1_info.GetInterfaceImpl(); + auto yaml_info = impl->get_op_info_(); + + auto attr_info = std::get<1>(yaml_info); + + auto infer_shape_impl = + op1_info.GetInterfaceImpl(); + + phi::InferMetaContext ctx; + + ir::BuildInferMetaContext((*it), name_map, scope_, yaml_info, &ctx); + + infer_shape_impl->infer_shape_(&ctx); + + auto kernel_name = + attr_map.at("kernel_name").dyn_cast().data(); + auto kernel_key = attr_map.at("kernel_key") + .dyn_cast() + .data(); + + auto kernel_fn = + phi::KernelFactory::Instance().SelectKernel(kernel_name, kernel_key); + + phi::KernelContext kernel_ctx(dev_ctx); + + ir::BuildPhiKernelContext( + (*it), name_map, scope_, yaml_info, &kernel_ctx); + kernel_fn(&kernel_ctx); + + auto out_value = (*it)->result(0); + out_name = name_map[out_value]; + } + } + + std::string out_name; + + private: + paddle::framework::Scope* scope_; +}; diff --git a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc new file mode 100644 index 00000000000..6273d703971 --- /dev/null +++ b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc @@ -0,0 +1,313 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h" + +#include "paddle/fluid/ir/dialect/pd_dialect.h" +#include "paddle/fluid/ir/dialect/pd_type.h" +#include "paddle/fluid/ir/dialect/utils.h" +#include "paddle/fluid/ir/interface/op_yaml_info.h" +#include "paddle/ir/core/builtin_attribute.h" +#include "paddle/ir/core/ir_context.h" +#include "paddle/ir/core/program.h" +#include "paddle/ir/core/utils.h" +#include "paddle/phi/core/meta_tensor.h" + +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/framework/variable.h" +#include "paddle/fluid/framework/variable_helper.h" +#include "paddle/phi/core/kernel_context.h" + +#include "paddle/fluid/ir/dialect/kernel_attribute.h" +#include "paddle/fluid/ir/dialect/pd_attribute.h" + +#include "glog/logging.h" + +namespace ir { + +void BuildScope(ir::Block* block, + paddle::framework::Scope* scope, + std::unordered_map* name_map) { + std::unordered_map map_test; + + // int count = name_map->size(); + int count = 0; + for (auto it = block->begin(); it != block->end(); ++it) { + size_t input_num = (*it)->num_operands(); + auto attr_map = (*it)->attributes(); + std::string op_name = (*it)->name(); + if (attr_map.count("op_name")) { + auto op_name = attr_map.at("op_name").dyn_cast().data(); + } + if (op_name == "pd.fetch") { + // fetch is a very special op, with no output + for (size_t i = 0; i < input_num; ++i) { + auto ptr = (*it)->operand(i).source(); + auto var_name = attr_map.at("name").dyn_cast().data(); + + PADDLE_ENFORCE_EQ( + name_map->count(ptr), + true, + phi::errors::PreconditionNotMet( + "input of fetch op should in name mape, var_name is [%s]", + var_name)); + + scope->Rename(name_map->at(ptr), var_name); + (*name_map)[ptr] = var_name; + } + continue; + } + + if (input_num > 0) { + for (size_t i = 0; i < input_num; ++i) { + auto ptr = (*it)->operand(i).source(); + std::string name; + if (name_map->find(ptr) != name_map->end()) { + name = name_map->at(ptr); + } else { + PADDLE_THROW(phi::errors::PreconditionNotMet( + "input should in name map, [%d] 'th input of [%s] op", + i, + op_name)); + } + } + } + + int out_num = (*it)->num_results(); + + if (out_num > 0) { + for (int i = 0; i < out_num; ++i) { + ir::Value ptr = (*it)->result(i); + std::string name; + if (name_map->find(ptr) != name_map->end()) { + name = name_map->at(ptr); + } else { + name = "inner_var_" + std::to_string(count++); + name_map->emplace(ptr, name); + } + auto var = scope->Var(name); + + // need to update here, only support DenseTensor + var->GetMutable(); + } + } + } +} + +void BuildInferMetaContext( + ir::Operation* op, + const std::unordered_map& name_map, + paddle::framework::Scope* scope, + const OpInfoTuple& op_yaml_info, + phi::InferMetaContext* ctx) { + // inputs include input and mutable attributes + auto input_info = std::get<0>(op_yaml_info); + std::map input_index_map; + std::map mutable_attr_type_map; + int input_index = 0; + for (auto& t : input_info) { + VLOG(6) << t.name << "\t" << t.type_name; + input_index_map[t.name] = input_index++; + if (t.is_mutable_attribute) { + mutable_attr_type_map[t.name] = t.type_name; + } + } + + auto attr_info = std::get<1>(op_yaml_info); + std::map attr_type_map; + for (auto& t : attr_info) { + VLOG(6) << t.name << "\t" << t.type_name; + attr_type_map[t.name] = t.type_name; + } + + auto attr_map = op->attributes(); + auto runtime_info = std::get<3>(op_yaml_info); + + // int input_index = 0; + std::vector vec_param_list = runtime_info.infer_meta_param; + + for (auto& t : vec_param_list) { + if (input_index_map.count(t)) { + // get information from input + ir::Value ptr = op->operand(input_index_map[t]).source(); + auto in_var_name = name_map.at(ptr); + + if (mutable_attr_type_map.count(t)) { + VLOG(6) << "ctx->EmplaceBack mutable attr: " << t << "\t" + << in_var_name; + if (mutable_attr_type_map[t] == "paddle::dialect::IntArrayAttribute") { + ctx->EmplaceBackAttr(phi::IntArray( + *(scope->Var(in_var_name)->GetMutable()))); + } else if (mutable_attr_type_map[t] == + "paddle::dialect::ScalarAttribute") { + ctx->EmplaceBackAttr(phi::Scalar( + *(scope->Var(in_var_name)->GetMutable()))); + } else { + PADDLE_THROW(phi::errors::Unimplemented("attr type not support [%s] ", + mutable_attr_type_map[t])); + } + + } else { + VLOG(6) << "ctx->EmplaceBackInput: " << t << "\t" << in_var_name; + auto var = scope->Var(in_var_name); + const phi::TensorBase* tensor_in = &(var->Get()); + ctx->EmplaceBackInput(const_cast(tensor_in)); + } + } + + if (attr_type_map.count(t)) { + auto type_name = attr_type_map[t]; + if (type_name == "paddle::dialect::IntArrayAttribute") { + ctx->EmplaceBackAttr( + attr_map[t].dyn_cast().data()); + } else if (type_name == "paddle::dialect::DataTypeAttribute") { + ctx->EmplaceBackAttr( + attr_map[t].dyn_cast().data()); + } else if (type_name == "ir::Int32Attribute") { + ctx->EmplaceBackAttr(attr_map[t].dyn_cast().data()); + } else if (type_name == "paddle::dialect::PlaceAttribute") { + ctx->EmplaceBackAttr( + attr_map[t].dyn_cast().data()); + } else if (type_name == "paddle::dialect::ScalarAttribute") { + ctx->EmplaceBackAttr( + attr_map[t].dyn_cast().data()); + } else { + PADDLE_THROW(phi::errors::Unimplemented("attr type not support [%s] ", + type_name)); + } + VLOG(6) << "ctx->EmplaceBackAttr: " << t; + } + } + + ir::Value out_ptr = op->result(0); + auto name = name_map.at(out_ptr); + + ctx->EmplaceBackOutput(scope->Var(name)->Get()); +} + +void BuildPhiKernelContext( + ir::Operation* op, + const std::unordered_map& name_map, + paddle::framework::Scope* scope, + const OpInfoTuple& op_yaml_info, + phi::KernelContext* ctx, + std::map>* input_map, + std::map>* output_map) { + // inputs include input and mutable attributes + auto input_info = std::get<0>(op_yaml_info); + std::map input_index_map; + std::map mutable_attr_type_map; + int input_index = 0; + for (auto& t : input_info) { + VLOG(6) << t.name << "\t" << t.type_name; + input_index_map[t.name] = input_index++; + if (t.is_mutable_attribute) { + mutable_attr_type_map[t.name] = t.type_name; + } + } + + auto attr_info = std::get<1>(op_yaml_info); + std::map attr_type_map; + for (auto& t : attr_info) { + VLOG(6) << t.name << "\t" << t.type_name; + attr_type_map[t.name] = t.type_name; + } + + auto attr_map = op->attributes(); + auto runtime_info = std::get<3>(op_yaml_info); + + // int input_index = 0; + std::vector vec_param_list = runtime_info.kernel_param; + for (auto& t : vec_param_list) { + if (input_index_map.count(t)) { + // get information from input + ir::Value ptr = op->operand(input_index_map[t]).source(); + auto in_var_name = name_map.at(ptr); + + if (input_map != nullptr) { + // only deal with single input for now, [todo] need support multi input + // like concat + size_t tmp_id = std::atol(in_var_name.substr(4, 100).c_str()); + (*input_map)[std::to_string(input_index_map.at(t))].push_back(tmp_id); + } + + if (mutable_attr_type_map.count(t)) { + VLOG(6) << "ctx->EmplaceBack mutable attr: " << t << "\t" + << in_var_name; + if (mutable_attr_type_map[t] == "paddle::dialect::IntArrayAttribute") { + ctx->EmplaceBackAttr(phi::IntArray( + *(scope->Var(in_var_name)->GetMutable()))); + } else if (mutable_attr_type_map[t] == + "paddle::dialect::ScalarAttribute") { + ctx->EmplaceBackAttr(phi::Scalar( + *(scope->Var(in_var_name)->GetMutable()))); + } else { + PADDLE_THROW(phi::errors::Unimplemented("attr type not support [%s] ", + mutable_attr_type_map[t])); + } + + } else { + VLOG(6) << "ctx->EmplaceBackInput: " << t << "\t" << in_var_name; + + PADDLE_ENFORCE_NOT_NULL( + scope->FindLocalVar(in_var_name), + phi::errors::PreconditionNotMet("can not find var[%s] in scope", + in_var_name)); + + auto var = scope->Var(in_var_name); + const phi::TensorBase* tensor_in = &(var->Get()); + ctx->EmplaceBackInput(tensor_in); + } + } + + if (attr_type_map.count(t)) { + auto type_name = attr_type_map[t]; + if (type_name == "paddle::dialect::IntArrayAttribute") { + ctx->EmplaceBackAttr( + attr_map[t].dyn_cast().data()); + } else if (type_name == "paddle::dialect::DataTypeAttribute") { + ctx->EmplaceBackAttr( + attr_map[t].dyn_cast().data()); + } else if (type_name == "ir::Int32Attribute") { + ctx->EmplaceBackAttr(attr_map[t].dyn_cast().data()); + } else if (type_name == "paddle::dialect::PlaceAttribute") { + ctx->EmplaceBackAttr( + attr_map[t].dyn_cast().data()); + } else if (type_name == "paddle::dialect::ScalarAttribute") { + ctx->EmplaceBackAttr( + attr_map[t].dyn_cast().data()); + } else { + PADDLE_THROW(phi::errors::Unimplemented("attr type not support [%s] ", + type_name)); + } + VLOG(6) << "ctx->EmplaceBackAttr: " << t; + } + } + + ir::Value out_ptr = op->result(0); + auto name = name_map.at(out_ptr); + + ctx->EmplaceBackOutput(const_cast( + &(scope->Var(name)->Get()))); + + if (output_map != nullptr) { + // only deal with single input for now, [todo] need support multi input like + // concat + size_t tmp_id = std::atol(name.substr(4, 100).c_str()); + (*output_map)["out"].push_back(tmp_id); + } +} + +} // namespace ir diff --git a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h new file mode 100644 index 00000000000..ebafce68a2b --- /dev/null +++ b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h @@ -0,0 +1,61 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/ir/dialect/pd_dialect.h" +#include "paddle/fluid/ir/dialect/pd_type.h" +#include "paddle/fluid/ir/dialect/utils.h" +#include "paddle/fluid/ir/interface/op_yaml_info.h" +#include "paddle/ir/core/builtin_attribute.h" +#include "paddle/ir/core/ir_context.h" +#include "paddle/ir/core/program.h" +#include "paddle/ir/core/utils.h" +#include "paddle/phi/core/meta_tensor.h" + +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/framework/variable.h" +#include "paddle/fluid/framework/variable_helper.h" +#include "paddle/phi/core/kernel_context.h" + +#include "paddle/fluid/ir/dialect/kernel_attribute.h" +#include "paddle/fluid/ir/dialect/pd_attribute.h" +#include "paddle/phi/core/infermeta_utils.h" + +#include "glog/logging.h" + +namespace ir { + +void BuildScope(ir::Block* block, + paddle::framework::Scope* scope, + std::unordered_map* name_map); + +void BuildInferMetaContext( + ir::Operation* op, + const std::unordered_map& name_map, + paddle::framework::Scope* scope, + const OpInfoTuple& op_yaml_info, + phi::InferMetaContext* ctx); + +void BuildPhiKernelContext( + ir::Operation* op, + const std::unordered_map& name_map, + paddle::framework::Scope* scope, + const OpInfoTuple& op_yaml_info, + phi::KernelContext* ctx, + std::map>* input_map = nullptr, + std::map>* output_map = nullptr); + +} // namespace ir diff --git a/paddle/fluid/ir_adaptor/translator/attribute_translator.cc b/paddle/fluid/ir_adaptor/translator/attribute_translator.cc index 95e61d849f2..e2b34a2f9bb 100644 --- a/paddle/fluid/ir_adaptor/translator/attribute_translator.cc +++ b/paddle/fluid/ir_adaptor/translator/attribute_translator.cc @@ -24,6 +24,7 @@ #include "paddle/phi/common/layout.h" #include "paddle/phi/common/place.h" #include "paddle/phi/common/scalar.h" +#include "paddle/phi/core/utils/data_type.h" #include "paddle/utils/variant.h" namespace paddle { @@ -166,8 +167,9 @@ class DataTypeAttributeVisitor : public AttributeVisitor { using AttributeVisitor::AttributeVisitor; ir::Attribute operator()(int i) override { VLOG(10) << "translating int to DataType: " << i; - phi::DataType data = static_cast(i); - return paddle::dialect::DataTypeAttribute::get(ctx, data); + + auto phi_dtype = phi::TransToPhiDataType(i); + return paddle::dialect::DataTypeAttribute::get(ctx, phi_dtype); } }; diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index e2efc315ca5..5daf484e70d 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -104,7 +104,7 @@ endif() cc_library( init SRCS init.cc - DEPS device_context phi memcpy) + DEPS device_context phi memcpy pd_dialect new_ir) # memcpy depends on device_context, here add deps individually for # avoiding cycle dependencies diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc index 81714cf9eb2..b5f31fd8584 100644 --- a/paddle/fluid/platform/init.cc +++ b/paddle/fluid/platform/init.cc @@ -51,9 +51,13 @@ limitations under the License. */ #include "paddle/fluid/platform/device/ipu/ipu_info.h" #endif +#include "paddle/fluid/ir/dialect/pd_dialect.h" #include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/platform/flags.h" +#include "paddle/ir/core/builtin_dialect.h" +#include "paddle/ir/core/ir_context.h" +#include "paddle/ir/core/program.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/core/custom_kernel.h" @@ -198,6 +202,9 @@ void InitDevices() { } void InitDevices(const std::vector devices) { + ir::IrContext *ctx = ir::IrContext::Instance(); + ctx->GetOrRegisterDialect(); + std::vector places; for (size_t i = 0; i < devices.size(); ++i) { diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 7bce2ba2144..97842928ac5 100755 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -38,6 +38,10 @@ set(PYBIND_DEPS global_utils phi_utils phi + phi_kernel_adaptor + pd_dialect + new_ir + program_translator new_profiler jit_layer jit_property diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index fe2a2cedc42..2ceff1cca17 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -210,6 +210,7 @@ PYBIND11_MAKE_OPAQUE(paddle::framework::FetchList); PYBIND11_MAKE_OPAQUE(paddle::framework::FetchType); DECLARE_FILE_SYMBOLS(init_phi); +DECLARE_FILE_SYMBOLS(kernel_dialect); namespace paddle { namespace pybind { diff --git a/paddle/phi/core/flags.cc b/paddle/phi/core/flags.cc index 35aeaaed078..0c581fb0991 100644 --- a/paddle/phi/core/flags.cc +++ b/paddle/phi/core/flags.cc @@ -1256,3 +1256,15 @@ PHI_DEFINE_EXPORTED_bool(use_shm_cache, PHI_DEFINE_EXPORTED_string(tensor_operants_mode, "eager", "Tensor operants mode"); + +/** + * Using new IR in executor FLAG + * Name: enable_new_ir_in_executor + * Since Version: 2.6.0 + * Value Range: bool, default=false + * Example: + * Note: If Ture, executor will use new IR + */ +PHI_DEFINE_EXPORTED_bool(enable_new_ir_in_executor, + false, + "Enable new IR in executor"); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 975446b6002..f21c8b263a6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -214,6 +214,14 @@ if(TARGET standalone_executor_test) endif() endif() +if(TARGET standalone_executor_new_ir_test) + if(NOT WIN32) + set_tests_properties( + standalone_executor_new_ir_test + PROPERTIES ENVIRONMENT "FLAGS_enable_new_ir_in_executor=true") + endif() +endif() + if(TARGET layer_test) add_dependencies(layer_test jit_download_program) add_dependencies(layer_test_new jit_download_program) diff --git a/test/cpp/ir/core/CMakeLists.txt b/test/cpp/ir/core/CMakeLists.txt index d033303fd25..b452bfbfe54 100644 --- a/test/cpp/ir/core/CMakeLists.txt +++ b/test/cpp/ir/core/CMakeLists.txt @@ -39,6 +39,7 @@ cc_test_old( DEPS new_ir pd_dialect + phi_kernel_adaptor phi gtest) diff --git a/test/cpp/ir/core/ir_exe_test.cc b/test/cpp/ir/core/ir_exe_test.cc index 56b49d60b07..05a557e7ca3 100644 --- a/test/cpp/ir/core/ir_exe_test.cc +++ b/test/cpp/ir/core/ir_exe_test.cc @@ -41,8 +41,8 @@ #include "paddle/fluid/ir/dialect/pd_attribute.h" +#include "paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_adaptor.h" #include "paddle/phi/core/kernel_registry.h" -#include "test/cpp/ir/core/phi_kernel_adaptor.h" PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(full_int_array, CPU, ALL_LAYOUT); diff --git a/test/cpp/ir/core/ir_infershape_test.cc b/test/cpp/ir/core/ir_infershape_test.cc index df155b22234..e23226b15c4 100644 --- a/test/cpp/ir/core/ir_infershape_test.cc +++ b/test/cpp/ir/core/ir_infershape_test.cc @@ -38,7 +38,8 @@ #include "paddle/phi/infermeta/nullary.h" // Define op -class OperationTest : public ir::Op { +class OperationTest + : public ir::Op { public: using Op::Op; static const char *name() { return "test.operation2"; } @@ -84,7 +85,8 @@ TEST(infershape_test, infershape_test) { ir::Operation *op = ir::Operation::Create(op_inputs, {}, op_output_types, op_info); - InferShapeInterface interface = op->dyn_cast(); + paddle::dialect::InferShapeInterface interface = + op->dyn_cast(); phi::InferMetaContext infer_meta_ctx; infer_meta_ctx.EmplaceBackAttr(phi::IntArray({5, 6})); infer_meta_ctx.EmplaceBackAttr(phi::DataType::FLOAT32); diff --git a/test/cpp/ir/core/phi_kernel_adaptor.h b/test/cpp/ir/core/phi_kernel_adaptor.h index 3d7a5a93090..e8847977bc4 100644 --- a/test/cpp/ir/core/phi_kernel_adaptor.h +++ b/test/cpp/ir/core/phi_kernel_adaptor.h @@ -46,9 +46,9 @@ #include "glog/logging.h" -void build_scope(ir::Block* block, - paddle::framework::Scope* scope, - std::unordered_map* name_map) { +void BuildScope(ir::Block* block, + paddle::framework::Scope* scope, + std::unordered_map* name_map) { std::unordered_map map_test; int count = 0; @@ -192,7 +192,7 @@ class PhiKernelAdaptor { void run(ir::Program* program) { auto block = program->block(); std::unordered_map name_map; - build_scope(block, scope_, &name_map); + BuildScope(block, scope_, &name_map); auto* dev_ctx = phi::DeviceContextPool::Instance().Get(phi::CPUPlace()); phi::Place cpu_place(phi::AllocationType::CPU); @@ -248,7 +248,7 @@ class PhiKernelAdaptor { void run_kernel_prog(ir::Program* program) { auto block = program->block(); std::unordered_map name_map; - build_scope(block, scope_, &name_map); + BuildScope(block, scope_, &name_map); ir::IrContext* ctx = ir::IrContext::Instance(); ctx->GetOrRegisterDialect(); diff --git a/test/cpp/ir/core/type_test.cc b/test/cpp/ir/core/type_test.cc index e3c9bcd20dd..9a75cf69134 100644 --- a/test/cpp/ir/core/type_test.cc +++ b/test/cpp/ir/core/type_test.cc @@ -210,7 +210,7 @@ TEST(type_test, custom_type_dialect) { EXPECT_EQ(int8.dialect().id(), ir::TypeId::get()); std::vector dialect_list = ctx->GetRegisteredDialects(); - EXPECT_EQ(dialect_list.size() == 3, 1); // integer, builtin, fake + EXPECT_EQ(dialect_list.size() == 4, 1); // integer, builtin, fake ir::Dialect *dialect_builtin1 = ctx->GetRegisteredDialect("builtin"); ir::Dialect *dialect_builtin2 = diff --git a/test/cpp/ir/kernel_dialect/CMakeLists.txt b/test/cpp/ir/kernel_dialect/CMakeLists.txt index d15725f68c2..2ef1c31a795 100644 --- a/test/cpp/ir/kernel_dialect/CMakeLists.txt +++ b/test/cpp/ir/kernel_dialect/CMakeLists.txt @@ -6,5 +6,6 @@ cc_test_old( pd_op_to_kernel_pass new_ir pd_dialect + phi_kernel_adaptor phi gtest) diff --git a/test/cpp/ir/kernel_dialect/ir_kernel_dialect_pass_test.cc b/test/cpp/ir/kernel_dialect/ir_kernel_dialect_pass_test.cc index f517ca0de53..31f79b70fc0 100644 --- a/test/cpp/ir/kernel_dialect/ir_kernel_dialect_pass_test.cc +++ b/test/cpp/ir/kernel_dialect/ir_kernel_dialect_pass_test.cc @@ -15,38 +15,32 @@ #include #include +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/framework/variable.h" +#include "paddle/fluid/framework/variable_helper.h" +#include "paddle/fluid/ir/dialect/kernel_dialect.h" +#include "paddle/fluid/ir/dialect/pd_attribute.h" #include "paddle/fluid/ir/dialect/pd_dialect.h" #include "paddle/fluid/ir/dialect/pd_type.h" #include "paddle/fluid/ir/dialect/utils.h" #include "paddle/fluid/ir/interface/op_yaml_info.h" #include "paddle/fluid/ir/pass/pd_op_to_kernel_pass.h" +#include "paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_adaptor.h" +#include "paddle/fluid/platform/init.h" #include "paddle/ir/core/builtin_attribute.h" #include "paddle/ir/core/builtin_dialect.h" #include "paddle/ir/core/builtin_op.h" #include "paddle/ir/core/ir_context.h" #include "paddle/ir/core/program.h" #include "paddle/ir/core/utils.h" -#include "paddle/phi/core/meta_tensor.h" -#include "paddle/phi/infermeta/binary.h" -#include "paddle/phi/kernels/elementwise_add_kernel.h" - -#include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/framework/variable.h" -#include "paddle/fluid/framework/variable_helper.h" - #include "paddle/phi/common/place.h" #include "paddle/phi/core/kernel_context.h" #include "paddle/phi/core/kernel_factory.h" - -#include "paddle/fluid/platform/init.h" - -#include "paddle/fluid/ir/dialect/pd_attribute.h" -#include "test/cpp/ir/core/phi_kernel_adaptor.h" - #include "paddle/phi/core/kernel_registry.h" - -#include "paddle/fluid/ir/dialect/kernel_dialect.h" +#include "paddle/phi/core/meta_tensor.h" +#include "paddle/phi/infermeta/binary.h" +#include "paddle/phi/kernels/elementwise_add_kernel.h" PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(full_int_array, CPU, ALL_LAYOUT); diff --git a/test/cpp/new_executor/CMakeLists.txt b/test/cpp/new_executor/CMakeLists.txt index 3c53e763134..14e3672fd1c 100644 --- a/test/cpp/new_executor/CMakeLists.txt +++ b/test/cpp/new_executor/CMakeLists.txt @@ -1,4 +1,35 @@ +if(NOT WIN32) + cc_test( + standalone_executor_new_ir_test + SRCS standalone_executor_new_ir_test.cc + DEPS phi_kernel_adaptor new_ir pd_dialect) +endif() + # skip win32 since wget is not installed by default on windows machine. + +set(OPS + fill_constant_op + uniform_random_op + lookup_table_op + transpose_op + reshape_op + split_op + slice_op + concat_op + matmul_op + elementwise_add_op + elementwise_mul_op + softmax_with_cross_entropy_op + reduce_mean_op + activation_op + sum_op + elementwise_div_op + generated_op + generated_static_op + squared_l2_norm_op + memcpy_h2d_op + memcpy_d2h_op + fetch_v2_op) if(WITH_GPU AND WITH_TESTING AND NOT WIN32) @@ -11,29 +42,6 @@ if(WITH_GPU WORKING_DIRECTORY "${CC_TESTS_DIR}") # all operators used in the program - set(OPS - fill_constant_op - uniform_random_op - lookup_table_op - transpose_op - reshape_op - split_op - slice_op - concat_op - matmul_op - elementwise_add_op - elementwise_mul_op - softmax_with_cross_entropy_op - reduce_mean_op - activation_op - sum_op - elementwise_div_op - generated_op - generated_static_op - squared_l2_norm_op - memcpy_h2d_op - memcpy_d2h_op - fetch_v2_op) # All deps of the operators above, part of GLOB_OPERATOR_DEPS. set(OP_DEPS phi concat_and_split cross_entropy) diff --git a/test/cpp/new_executor/standalone_executor_new_ir_test.cc b/test/cpp/new_executor/standalone_executor_new_ir_test.cc new file mode 100644 index 00000000000..f878b41cec2 --- /dev/null +++ b/test/cpp/new_executor/standalone_executor_new_ir_test.cc @@ -0,0 +1,83 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/new_executor/standalone_executor.h" + +#include + +#include +#include +#include + +#include "paddle/phi/core/kernel_registry.h" + +#include "paddle/fluid/ir/dialect/pd_dialect.h" +#include "paddle/fluid/ir/dialect/pd_op.h" +#include "paddle/fluid/ir/pass/pd_op_to_kernel_pass.h" +#include "paddle/ir/core/builder.h" +#include "paddle/ir/core/ir_context.h" +#include "paddle/ir/core/program.h" + +#include "paddle/fluid/platform/init_phi.h" + +DECLARE_FILE_SYMBOLS(kernel_dialect); + +PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(full_int_array, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(uniform, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); + +namespace paddle { +namespace framework { + +TEST(StandaloneExecutor, run) { + std::cerr << "here" << std::endl; + + ir::IrContext* ctx = ir::IrContext::Instance(); + ir::Program program((ctx)); + + ctx->GetOrRegisterDialect(); + + ir::Builder builder = ir::Builder(ctx, program.block()); + + paddle::dialect::FullOp op1 = builder.Build( + std::vector{2, 2}, 1.0, phi::DataType::FLOAT32, phi::CPUPlace()); + + paddle::dialect::FullOp op2 = builder.Build( + std::vector{2, 2}, 1.0, phi::DataType::FLOAT32, phi::CPUPlace()); + + builder.Build(op1->result(0), op2->result(0)); + + program.Print(std::cout); + + auto kernel_program = paddle::dialect::PdOpLowerToKernelPass(&program); + + kernel_program->Print(std::cout); + + auto place = platform::CPUPlace(); + Scope scope; + + ProgramDesc prog_desc; + InterpreterCore test_core( + place, prog_desc.Block(0), &scope, kernel_program.get()); + + test_core.Run({}); + + auto tensor = scope.Var("inner_var_2")->Get(); + + std::cerr << "uot" << tensor << std::endl; +} + +} // namespace framework +} // namespace paddle diff --git a/test/ir/CMakeLists.txt b/test/ir/CMakeLists.txt index 732a70069a1..fab15cc488c 100644 --- a/test/ir/CMakeLists.txt +++ b/test/ir/CMakeLists.txt @@ -16,6 +16,7 @@ foreach(target ${TEST_IR_PASSES}) endforeach() add_subdirectory(inference) +add_subdirectory(new_ir) set_tests_properties(test_fuse_resnet_unit PROPERTIES TIMEOUT 120) set_tests_properties(test_convert_to_mixed_precision PROPERTIES TIMEOUT 300) diff --git a/test/ir/new_ir/CMakeLists.txt b/test/ir/new_ir/CMakeLists.txt new file mode 100644 index 00000000000..393bc869d9b --- /dev/null +++ b/test/ir/new_ir/CMakeLists.txt @@ -0,0 +1,10 @@ +file( + GLOB TEST_INTERP_CASES + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") +string(REPLACE ".py" "" TEST_INTERP_CASES "${TEST_INTERP_CASES}") + +foreach(target ${TEST_INTERP_CASES}) + py_test_modules(${target} MODULES ${target} ENVS GLOG_v=1 + FLAGS_enable_new_ir_in_executor=true) +endforeach() diff --git a/test/ir/new_ir/test_standalone_new_ir.py b/test/ir/new_ir/test_standalone_new_ir.py new file mode 100644 index 00000000000..a2a4b0c2d46 --- /dev/null +++ b/test/ir/new_ir/test_standalone_new_ir.py @@ -0,0 +1,54 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import unittest + +import numpy as np + +import paddle + +paddle.enable_static() + + +class TestNewIr(unittest.TestCase): + def test_with_new_ir(self): + place = paddle.CPUPlace() + exe = paddle.static.Executor(place) + + x = paddle.ones([2, 2], dtype="float32") + y = paddle.ones([2, 2], dtype="float32") + + z = x + y + out = exe.run( + paddle.static.default_main_program(), {}, fetch_list=[z.name] + ) + + gold_res = np.ones([2, 2], dtype="float32") * 2 + + self.assertEqual( + np.array_equal( + np.array( + paddle.static.global_scope() + .find_var("inner_var_2") + .get_tensor() + ), + gold_res, + ), + True, + ) + + +if __name__ == "__main__": + unittest.main() -- GitLab