// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/cinn/hlir/framework/new_ir_compiler.h" #include #include "paddle/cinn/common/context.h" #include "paddle/cinn/hlir/framework/op_strategy.h" #include "paddle/cinn/lang/lower.h" #include "paddle/cinn/lang/placeholder.h" #include "paddle/cinn/utils/attribute_util.h" #include "paddle/fluid/ir/dialect/pd_type.h" #include "paddle/ir/core/builtin_type.h" namespace cinn { namespace hlir { namespace framework { const std::unordered_map CompatibleInfo::OP_NAMES = { {"pd.full", "fill_constant"}, {"pd.matmul", "matmul"}}; // TODO(Aurelius84): Need abstract this logic to implement Proxy for // the co-existance with GraphCompiler. std::unique_ptr NewIRCompiler::Build() { m_builder_.Clear(); // NOTE(Aurelius84): Currently only support each op for one group std::vector> groups; for (auto it = program_.block()->begin(); it != program_.block()->end(); ++it) { groups.push_back({*it}); } VLOG(4) << "Groups size: " << groups.size(); std::vector> lowered_funcs; for (int i = 0; i < groups.size(); ++i) { lowered_funcs.emplace_back(GetOpFunc(*groups[i][0], i)); } for (auto&& lowered_func : lowered_funcs) { ProcessFunction(lowered_func); } compiler_ = backends::Compiler::Create(target_); auto build_module = m_builder_.Build(); compiler_->Build(build_module, ""); auto instructions = BuildInstructions(groups); // TODO(Aurelius84): Instantiate all tensors on compile-time, which is // controlled by 'options.with_instantiate_variables' in GraphCompiler. // Moreover, it's better to implement InsertBufferHandlers() logic // to automatically insert Malloc and Free instructions. for (auto& name : scope_->var_names()) { std::string var_name({name.data(), name.size()}); VLOG(4) << "Instantiate " << var_name << " on compile-time"; auto* var = scope_->Var(var_name); auto& tensor = absl::get(*var); tensor->mutable_data(target_, tensor->type()); } return std::make_unique(scope_, std::move(instructions)); } std::vector NewIRCompiler::GetOpFunc(const ::ir::Operation& op, int idx) { std::vector inputs; std::vector cinn_inputs; auto op_name = op.name(); VLOG(4) << "GetOpFunc for op: " << op_name; // step 1: Deal with Oprands for (int i = 0; i < op.num_operands(); ++i) { auto in_value = op.operand(i); // TODO(Aurelius84): For now, use addr as name but it's not wise. std::string input_id = CompatibleInfo::kInputPrefix + std::to_string(std::hash<::ir::Value>()(in_value)); auto type_info = in_value.type().dyn_cast(); auto in_shape = phi::vectorize(type_info.dims()); auto dtype = type_info.dtype(); ir::Tensor temp = lang::CreatePlaceHolder( in_shape, utils::ConvertIRType(dtype), input_id); inputs.push_back(temp); cinn_inputs.push_back(common::CINNValue(temp)); } for (auto out_name : OpGetOutputNames(op)) { cinn_inputs.push_back(common::CINNValue(out_name)); } VLOG(4) << "inputs.size(): " << inputs.size(); // step 2: Deal with OpResult std::vector out_types; std::vector> out_shapes; for (int i = 0; i < op.num_results(); ++i) { auto out_value = op.result(i); auto type_info = out_value.type().dyn_cast(); out_types.push_back(utils::ConvertIRType(type_info.dtype())); auto out_shape = phi::vectorize(type_info.dims()); out_shapes.push_back(std::move(out_shape)); } VLOG(4) << "out_types.size(): " << out_types.size(); NodeAttr node_attrs; { VLOG(4) << "op.attributes():" << op.attributes().size(); auto attrs = utils::ConvertAttributes(op.attributes()); node_attrs.node_name = CompatibleInfo::OP_NAMES.at(op_name); node_attrs.attr_store = std::move(attrs); } auto& strategy = Operator::GetAttrs("CINNStrategy"); // NOTE(Aurelius84): Do we need replace all hlir::framework Operator with // ::ir::Program ? const hlir::framework::Operator* cinn_op = Operator::Get(CompatibleInfo::OP_NAMES.at(op_name)); auto impl = OpStrategy::SelectImpl( strategy[cinn_op](node_attrs, inputs, out_types, out_shapes, target_)); common::CINNValuePack C = impl->fcompute(common::CINNValuePack{cinn_inputs}); poly::StageMap stages = C.back(); // make sure all the tensors in the stages before schedule launch. for (int i = 0; i < C->size() - 1; i++) { ir::Expr temp = C[i]; stages->InsertLazily(temp.as_tensor_ref()); } C = impl->fschedule(C); for (int i = 0; i < C->size() - 1; i++) { ir::Expr temp = C[i]; // checkout whether the tensor is with buffer. if ((!temp.as_tensor_ref()->buffer.defined() || this->target_ != common::DefaultNVGPUTarget()) && !stages[temp.as_tensor_ref()]->inlined()) { inputs.push_back(temp.as_tensor_ref()); } } auto func = lang::LowerVec( GenOpFuncName(op, idx), stages, inputs, {}, {}, nullptr, target_); return func; } void NewIRCompiler::ProcessFunction( const std::vector& lowered_funcs) { for (auto&& func : lowered_funcs) { for (auto&& arg : func->args) { std::string arg_name = arg.name(); if (arg_name[0] == '_') arg_name = arg_name.substr(1); auto* var = scope_->FindVar(arg_name); // For argument buffer not in scope, create it. if (!var && arg.is_buffer()) { auto* new_var = scope_->Var(arg_name); auto& tensor = absl::get(*new_var); std::vector shape; for (auto& shape_dim : arg.buffer_arg()->shape) { CHECK(shape_dim.is_constant()); shape.push_back(static_cast(shape_dim.get_constant())); } tensor->Resize(Shape{shape}); tensor->set_type(arg.buffer_arg()->dtype); } } m_builder_.AddFunction(func); } } std::vector> NewIRCompiler::BuildInstructions( const std::vector>& groups) { std::vector> instructions; for (int idx = 0; idx < groups.size(); ++idx) { // TODO(Aurelius84): only support single op in groups auto& op = *groups[idx][0]; auto instr_name = op.name(); auto instr = std::unique_ptr(new Instruction(target_, scope_.get(), OpGetInputNames(op), OpGetOutputNames(op), instr_name)); auto& op_func_name = GenOpFuncName(op, idx); auto* fn_ptr = compiler_->Lookup(op_func_name); CHECK(fn_ptr); instr->SetLoweredFunc(reinterpret_cast(fn_ptr), op_func_name); // As some instruction like reduce, will generate more than one kernel. // So try to find the rest kernel, if it exists. // SetSubKernels(instr.get(), op_func_name); instr->Finalize(); instructions.push_back(std::move(instr)); } return instructions; } const std::string& NewIRCompiler::GenOpFuncName(const ::ir::Operation& op, int idx) { // TODO(Aurelius84): . will raise compiler error in pd.xxx, need more // elegant way to generate function name. std::string op_name = op.name().substr(3) + "_" + std::to_string(idx); std::string func_name = Context::Global().NewName("fn_" + op_name); func_names_.try_emplace(op_name, func_name); return func_names_.at(op_name); } std::vector NewIRCompiler::OpGetInputNames( const ::ir::Operation& op) { std::vector names; std::unordered_set repeat; for (int i = 0; i < op.num_operands(); ++i) { auto value = op.operand(i); std::string name = CompatibleInfo::kInputPrefix + std::to_string(std::hash<::ir::Value>()(value)); if (repeat.count(name)) { continue; } repeat.insert(name); names.push_back(name); } return names; } std::vector NewIRCompiler::OpGetOutputNames( const ::ir::Operation& op) { std::vector names; for (int i = 0; i < op.num_results(); ++i) { auto value = op.result(i); std::string name = CompatibleInfo::kOutputPrefix + std::to_string(std::hash<::ir::Value>()(value)); names.push_back(std::move(name)); } return names; } std::shared_ptr BuildScope(const Target& target, const ::ir::Program& program) { std::unordered_set<::ir::Value> visited; auto scope = std::make_shared(); auto create_var = [&](const std::string& name_prefix, ::ir::Value value) { if (visited.count(value) > 0) return; visited.emplace(value); std::string name = name_prefix + std::to_string(std::hash<::ir::Value>()(value)); auto type_info = value.type().dyn_cast(); auto* var = scope->Var(name); auto& tensor = absl::get(*var); // NOTE: can be replaced with phi::vectorized ? std::vector shape; for (auto i = 0; i < type_info.dims().size(); ++i) { shape.push_back(Shape::dim_t(type_info.dims()[i])); } tensor->Resize(Shape{shape}); tensor->set_type(utils::ConvertIRType(type_info.dtype())); }; for (auto it = program.block()->begin(); it != program.block()->end(); ++it) { for (auto i = 0; i < (*it)->num_operands(); ++i) { auto in_value = (*it)->operand(i); create_var(CompatibleInfo::kInputPrefix, in_value); } for (auto i = 0; i < (*it)->num_results(); ++i) { auto out_value = (*it)->result(i); create_var(CompatibleInfo::kOutputPrefix, out_value); } } return scope; } } // namespace framework } // namespace hlir } // namespace cinn