diff --git a/paddle/cinn/hlir/framework/new_ir/group.h b/paddle/cinn/hlir/framework/new_ir/group.h index 2462fb8c4ce91eb52583255c5d46afc45aeb96b2..b62c315873c70d88bbc0f73f2c030064139c9e29 100644 --- a/paddle/cinn/hlir/framework/new_ir/group.h +++ b/paddle/cinn/hlir/framework/new_ir/group.h @@ -16,6 +16,7 @@ #include #include +#include "paddle/cinn/hlir/framework/new_ir/utils.h" #include "paddle/cinn/hlir/framework/op.h" #include "paddle/ir/core/operation.h" @@ -30,20 +31,26 @@ struct Group { public: explicit Group(const std::vector<::ir::Operation*>& group_ops) : ops(group_ops) { - op_pattern_kind = OpPatternKind::kElementWise; - fn_name = "fn_"; - for (auto& op : group_ops) { - fn_name += "_" + op->name(); - } + Initialize(); + } + + explicit Group(std::initializer_list<::ir::Operation*> group_ops) + : ops(group_ops) { + Initialize(); } + int group_id; + std::string fn_name; + OpPatternKind op_pattern_kind; std::vector<::ir::Operation*> ops; std::vector input_names; std::vector output_names; - int group_id; - // FIXME(Aurelius84): This should be refactored with CinnGroupOp - OpPatternKind op_pattern_kind; - std::string fn_name; + + private: + void Initialize() { + op_pattern_kind = OpPatternKind::kElementWise; + fn_name = CompatibleInfo::GroupOpsName(ops); + } }; } // namespace newir diff --git a/paddle/cinn/hlir/framework/new_ir/op_lowering_impl.cc b/paddle/cinn/hlir/framework/new_ir/op_lowering_impl.cc index 040ea9cf87c989c651f757db17810f70f9c55bcf..d291aba2e406e351993dc7daff12d38a7645da6f 100644 --- a/paddle/cinn/hlir/framework/new_ir/op_lowering_impl.cc +++ b/paddle/cinn/hlir/framework/new_ir/op_lowering_impl.cc @@ -43,7 +43,7 @@ ir::Tensor GetTensor(const ::ir::Value& value) { auto type_info = value.type().dyn_cast(); auto in_shape = phi::vectorize(type_info.dims()); auto dtype = type_info.dtype(); - std::string input_id = CompatibleInfo::InputName(value); + std::string input_id = CompatibleInfo::ValueName(value); return lang::CreatePlaceHolder( in_shape, utils::ConvertIRType(dtype), input_id); } @@ -56,15 +56,16 @@ std::vector CollectInputTensor( for (auto& operand : op->operands()) { CHECK(operand); auto in_value = operand.source(); - ir::Tensor tensor; + VLOG(4) << "input tensor name: " << CompatibleInfo::ValueName(in_value); + // NOTE(Aurelius84): Need always to create placeholder for input tensor. + ir::Tensor tensor = details::GetTensor(in_value); if (!tensor_map->count(in_value)) { - tensor = details::GetTensor(in_value); // record tensor. (*tensor_map)[in_value] = tensor; // record func input args - if (func_args != nullptr) func_args->push_back(tensor); - } else { - tensor = tensor_map->at(in_value); + if (func_args != nullptr) { + func_args->push_back(tensor); + } } tensors.push_back(tensor); } @@ -76,7 +77,7 @@ void CollectOutputInfo(const ::ir::Operation* op, std::vector>* out_shapes) { auto op_results = op->results(); for (auto& out_value : op_results) { - std::string output_id = CompatibleInfo::OutputName(out_value); + std::string output_id = CompatibleInfo::ValueName(out_value); // group->output_names.push_back(output_id); auto type_info = out_value.type().dyn_cast(); @@ -265,11 +266,11 @@ std::vector OpLowererImpl::PostProcess( // output arg tensors group_func_arg_tensors->push_back(tensor); // output args + group->output_names.push_back(tensor->name); group_func_args.emplace_back(tensor->buffer, ir::Argument::IO::kOutput); arg_name_set.insert(tensor->buffer->name); } } - if (!done_op_schedule) { std::unordered_set args_set; for (auto arg : group_func_args) { @@ -329,6 +330,8 @@ std::vector OpLowererImpl::LowerOps( std::vector op_func_arg_tensors = details::CollectInputTensor(op, group_func_arg_tensors, tensor_map); + VLOG(4) << "input size:" << op_func_arg_tensors.size(); + std::string cinn_op_name = CompatibleInfo::OpName(*op); const hlir::framework::Operator* cinn_op = Operator::Get(cinn_op_name); auto op_impl = OpStrategy::SelectImpl(strategy[cinn_op]( @@ -348,6 +351,9 @@ std::vector OpLowererImpl::LowerOps( } } + VLOG(4) << "group_func_arg_tensors.size(): " + << group_func_arg_tensors->size(); + return func_bodies; } @@ -364,7 +370,7 @@ std::vector OpLowererImpl::DoOpLower( // set tensor name = operand hash name auto op_results = op->results(); for (const auto& result : op_results) { - std::string output_id = CompatibleInfo::OutputName(result); + std::string output_id = CompatibleInfo::ValueName(result); cinn_inputs.push_back(common::CINNValue(output_id)); } @@ -400,6 +406,8 @@ std::vector OpLowererImpl::DoOpLower( } } + VLOG(4) << "op_func_arg_tensors.size(): " << op_func_arg_tensors->size(); + // 2.Do lower std::string lower_fn_name = CompatibleInfo::OpFuncName(*op); std::vector funcs = lang::LowerVec(lower_fn_name, diff --git a/paddle/cinn/hlir/framework/new_ir/utils.cc b/paddle/cinn/hlir/framework/new_ir/utils.cc index 12b3783e7c85d20b22454417197309b100e5c1fb..38bfcf05776e05e70bd510dc6880980475efceaf 100644 --- a/paddle/cinn/hlir/framework/new_ir/utils.cc +++ b/paddle/cinn/hlir/framework/new_ir/utils.cc @@ -36,13 +36,8 @@ std::string CompatibleInfo::OpName(const ::ir::Operation& op) { return cinn_op_name; } -std::string CompatibleInfo::InputName(const ::ir::Value& value) { - return CompatibleInfo::kInputPrefix + - std::to_string(std::hash<::ir::Value>()(value)); -} - -std::string CompatibleInfo::OutputName(const ::ir::Value& value) { - return CompatibleInfo::kOutputPrefix + +std::string CompatibleInfo::ValueName(const ::ir::Value& value) { + return CompatibleInfo::kNamePrefix + std::to_string(std::hash<::ir::Value>()(value)); } @@ -55,10 +50,10 @@ std::string CompatibleInfo::OpFuncName(const ::ir::Operation& op) { std::string CompatibleInfo::GroupOpsName( const std::vector<::ir::Operation*>& ops) { - std::string name = "fn_"; + std::string name = "fn"; for (auto* op : ops) { std::string op_name = OpName(*op); - name += cinn::common::Context::Global().NewName(op_name); + name += "_" + cinn::common::Context::Global().NewName(op_name); } return name; } @@ -69,7 +64,7 @@ std::vector CompatibleInfo::InputNames(const ::ir::Operation& op, std::unordered_set repeat; for (int i = 0; i < op.num_operands(); ++i) { auto value = op.operand_source(i); - std::string name = CompatibleInfo::InputName(value); + std::string name = CompatibleInfo::ValueName(value); if (!allow_duplicate && repeat.count(name)) { continue; } @@ -84,7 +79,7 @@ std::vector CompatibleInfo::OutputNames( std::vector names; for (int i = 0; i < op.num_results(); ++i) { auto value = op.result(i); - std::string name = CompatibleInfo::OutputName(value); + std::string name = CompatibleInfo::ValueName(value); names.push_back(std::move(name)); } return names; diff --git a/paddle/cinn/hlir/framework/new_ir/utils.h b/paddle/cinn/hlir/framework/new_ir/utils.h index 7796899ce3482bd8ad52abfc506f50e15332019e..4c437dd19ef8a31418948e77b73a8f84db04cb92 100644 --- a/paddle/cinn/hlir/framework/new_ir/utils.h +++ b/paddle/cinn/hlir/framework/new_ir/utils.h @@ -24,17 +24,14 @@ namespace framework { namespace newir { struct CompatibleInfo { - static constexpr char* kInputPrefix = "input_"; - static constexpr char* kOutputPrefix = "output_"; + static constexpr char* kNamePrefix = "var_"; // TODO(Aurelius): Need add name mapping logic in REGISTER_CINN_OP // macros or attempt to unify Op name with Paddle and CINN. static const std::unordered_map OP_NAMES; static std::string OpName(const ::ir::Operation& op); - static std::string InputName(const ::ir::Value& value); - - static std::string OutputName(const ::ir::Value& value); + static std::string ValueName(const ::ir::Value& value); static std::string OpFuncName(const ::ir::Operation& op); diff --git a/paddle/cinn/hlir/framework/new_ir_compiler.cc b/paddle/cinn/hlir/framework/new_ir_compiler.cc index f6954514ace6f4f0001546b69029b6c35e2b33ea..bcc7c0f1c2a05d9c6eaec2c9f6ad79b2843b1e8c 100644 --- a/paddle/cinn/hlir/framework/new_ir_compiler.cc +++ b/paddle/cinn/hlir/framework/new_ir_compiler.cc @@ -35,7 +35,6 @@ std::unique_ptr NewIRCompiler::Build() { ++it) { std::vector<::ir::Operation*> ops = {*it}; groups.push_back(std::make_shared(ops)); - groups.back()->fn_name = CompatibleInfo::GroupOpsName(groups.back()->ops); } VLOG(4) << "Groups size: " << groups.size(); return std::move(Build(groups)); @@ -103,23 +102,20 @@ std::vector> NewIRCompiler::BuildInstructions( const std::vector& groups) { std::vector> instructions; for (int idx = 0; idx < groups.size(); ++idx) { - // TODO(Aurelius84): only support single op in groups - auto& op = *(groups[idx]->ops[0]); - auto& fn_name = groups[idx]->fn_name; - auto instr = std::unique_ptr( - new Instruction(target_, - scope_.get(), - CompatibleInfo::InputNames(op), - CompatibleInfo::OutputNames(op), - fn_name)); + auto instr = + std::unique_ptr(new Instruction(target_, + scope_.get(), + groups[idx]->input_names, + groups[idx]->output_names, + fn_name)); VLOG(1) << "Lookup kernel name: " << fn_name; auto* fn_ptr = compiler_->Lookup(fn_name); CHECK(fn_ptr); instr->SetLoweredFunc(reinterpret_cast(fn_ptr), fn_name); // As some instruction like reduce, will generate more than one kernel. // So try to find the rest kernel, if it exists. - // SetSubKernels(instr.get(), op_func_name); + // SetSubKernels(instr.get(), fn_name); instr->Finalize(); instructions.push_back(std::move(instr)); } @@ -131,16 +127,15 @@ std::shared_ptr BuildScope(const Target& target, std::unordered_set<::ir::Value> visited; auto scope = std::make_shared(); - auto create_var = [&](const std::string& name_prefix, ::ir::Value value) { + auto create_var = [&](::ir::Value value) { if (visited.count(value) > 0) return; visited.emplace(value); - std::string name = - name_prefix + std::to_string(std::hash<::ir::Value>()(value)); + std::string name = CompatibleInfo::ValueName(value); auto type_info = value.type().dyn_cast(); auto* var = scope->Var(name); auto& tensor = absl::get(*var); - // NOTE: can be replaced with phi::vectorized ? + std::vector shape; for (auto i = 0; i < type_info.dims().size(); ++i) { shape.push_back(Shape::dim_t(type_info.dims()[i])); @@ -150,14 +145,12 @@ std::shared_ptr BuildScope(const Target& target, }; for (auto it = program.block()->begin(); it != program.block()->end(); ++it) { - for (auto i = 0; i < (*it)->num_operands(); ++i) { - auto in_value = (*it)->operand_source(i); - create_var(CompatibleInfo::kInputPrefix, in_value); + for (auto& oprand : (*it)->operands()) { + create_var(oprand.source()); } - for (auto i = 0; i < (*it)->num_results(); ++i) { - auto out_value = (*it)->result(i); - create_var(CompatibleInfo::kOutputPrefix, out_value); + for (auto& result : (*it)->results()) { + create_var(result); } } return scope; diff --git a/paddle/cinn/hlir/framework/new_ir_compiler.h b/paddle/cinn/hlir/framework/new_ir_compiler.h index c9a430e39c57a97c93569283bef7e0ceda736cea..bb18da54bc4f30683cf3f54196f648161491d0f5 100644 --- a/paddle/cinn/hlir/framework/new_ir_compiler.h +++ b/paddle/cinn/hlir/framework/new_ir_compiler.h @@ -40,11 +40,11 @@ class NewIRCompiler final { std::unique_ptr Build(); + std::unique_ptr Build(const std::vector& groups); + private: CINN_DISALLOW_COPY_AND_ASSIGN(NewIRCompiler); - std::unique_ptr Build(const std::vector& groups); - std::vector GetOpFunc(const ::ir::Operation& op, int idx); void ProcessFunction(const std::vector& lowered_funcs); diff --git a/test/cpp/ir/cinn/new_ir_compiler_test.cc b/test/cpp/ir/cinn/new_ir_compiler_test.cc index 1ea7a455e12175cca9631fe3ad2751a197faee7a..91fb7cb13cc090bb1da31de840d034355975c69f 100644 --- a/test/cpp/ir/cinn/new_ir_compiler_test.cc +++ b/test/cpp/ir/cinn/new_ir_compiler_test.cc @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "paddle/fluid/ir/dialect/paddle_dialect/ir/pd_dialect.h" @@ -31,10 +32,15 @@ #include "paddle/cinn/hlir/framework/convert_to_dialect.h" #include "paddle/cinn/hlir/framework/new_ir_compiler.h" -std::unique_ptr<::ir::Program> BuildProgram() { +using cinn::hlir::framework::newir::Group; +using cinn::hlir::framework::newir::GroupPtr; + +using ProgramInfo = + std::tuple, std::vector>; +ProgramInfo BuildProgram() { ::ir::IrContext* ctx = ::ir::IrContext::Instance(); ctx->GetOrRegisterDialect(); - auto program = std::make_unique<::ir::Program>(ctx); + auto program = std::make_shared<::ir::Program>(ctx); ::ir::Builder builder = ::ir::Builder(ctx, program->block()); const float value_one = 1.0; // relu(tan(1.)) = 1.5; @@ -51,17 +57,30 @@ std::unique_ptr<::ir::Program> BuildProgram() { phi::DataType::FLOAT32, phi::GPUPlace()); - auto tanh_op_x = builder.Build(full_op_x->result(0)); - auto relu_op_x = builder.Build(tanh_op_x->result(0)); - auto tanh_op_y = builder.Build(full_op_y->result(0)); - auto relu_op_y = builder.Build(tanh_op_y->result(0)); - - return std::move(program); + auto tan_op_x = builder.Build(full_op_x->result(0)); + auto relu_op_x = builder.Build(tan_op_x->result(0)); + auto tan_op_y = builder.Build(relu_op_x->result(0)); + auto relu_op_y = builder.Build(tan_op_y->result(0)); + + std::vector groups; + groups.emplace_back( + std::make_shared(std::initializer_list<::ir::Operation*>( + {full_op_x.operation()}))); // For coverage + groups.emplace_back(std::make_shared( + std::initializer_list<::ir::Operation*>({full_op_y.operation()}))); + groups.emplace_back(std::make_shared( + std::vector<::ir::Operation*>({tan_op_x.operation(), + relu_op_x.operation(), + tan_op_y.operation(), + relu_op_y.operation()}))); + + return {program, groups}; } TEST(NewIRCompier, CompilerAndRun) { // Step 1: Construct ir::Program - std::unique_ptr<::ir::Program> program = BuildProgram(); + auto prog_info = BuildProgram(); + std::shared_ptr<::ir::Program> program = std::get<0>(prog_info); EXPECT_EQ(program->block()->size(), 6u); LOG(INFO) << program->block()->size(); @@ -89,9 +108,42 @@ TEST(NewIRCompier, CompilerAndRun) { } } +TEST(NewIRCompier, CompileGroupOps) { + // Step 1: Construct ir::Program + auto prog_info = BuildProgram(); + std::shared_ptr<::ir::Program> program = std::get<0>(prog_info); + std::vector groups = std::get<1>(prog_info); + EXPECT_EQ(program->block()->size(), 6u); + LOG(INFO) << program->block()->size(); + + std::stringstream ss; + program->Print(ss); + LOG(INFO) << ss.str(); + + // Step 2: Compiler New ir::Program into Runtime Program + auto target = cinn::common::DefaultNVGPUTarget(); + auto scope = cinn::hlir::framework::BuildScope(target, *program); + ASSERT_EQ(scope->var_names().size(), 6); + + cinn::hlir::framework::NewIRCompiler ir_compiler(*program, target, scope); + auto runtime_program = ir_compiler.Build(groups); + + // Step 3: Execute Runtime Instruction and check Scope. + ASSERT_NO_THROW(runtime_program->Execute()); + for (auto& var_name : scope->var_names()) { + std::string name = {var_name.begin(), var_name.end()}; + std::vector data = + cinn::GetTensorData(scope->GetTensor(name), target); + for (int i = 0; i < 1; ++i) { + LOG_FIRST_N(INFO, 10) << "data: " << data[i]; + } + } +} + TEST(RuntimeDialect, CompilerAndRun) { // Step 1: Construct ir::Program - std::unique_ptr<::ir::Program> program = BuildProgram(); + auto prog_info = BuildProgram(); + std::shared_ptr<::ir::Program> program = std::get<0>(prog_info); EXPECT_EQ(program->block()->size(), 6u); // Step 2: Compiler New ir::Program into Runtime Program @@ -103,7 +155,7 @@ TEST(RuntimeDialect, CompilerAndRun) { auto runtime_program = ir_compiler.Build(); // Step 3: Convert into cinn::dialect::RuntimeDialect - std::unique_ptr<::ir::Program> ir_runtime_program = + std::shared_ptr<::ir::Program> ir_runtime_program = cinn::hlir::framework::ConvertToRuntimeDialect(*runtime_program); // Step 4: Run cinn::dialect::RuntimeDialect