// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include "paddle/fluid/ir/pass/pd_op_to_kernel_pass.h" #include "paddle/fluid/ir/dialect/kernel_attribute.h" #include "paddle/fluid/ir/dialect/kernel_dialect.h" #include "paddle/fluid/ir/dialect/kernel_op.h" #include "paddle/fluid/ir/dialect/kernel_type.h" #include "paddle/fluid/ir/dialect/op_yaml_info_util.h" #include "paddle/fluid/ir/dialect/pd_attribute.h" #include "paddle/fluid/ir/dialect/pd_dialect.h" #include "paddle/fluid/ir/dialect/utils.h" #include "paddle/fluid/ir/interface/op_yaml_info.h" #include "paddle/phi/api/lib/data_transform.h" #include "paddle/phi/api/lib/kernel_dispatch.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/kernel_factory.h" namespace paddle { namespace dialect { const int init_on_gpu_threashold = 1000; phi::KernelKey GetKernelKey( ir::Operation* op, const phi::Place& place, const std::unordered_map& map_value_pair) { if (op->name() == "pd.feed") { return {phi::Backend::CPU, phi::DataLayout::ANY, phi::DataType::FLOAT32}; } phi::Backend kernel_backend = phi::Backend::UNDEFINED; phi::DataLayout kernel_layout = phi::DataLayout::UNDEFINED; phi::DataType kernel_data_type = phi::DataType::UNDEFINED; paddle::dialect::OpYamlInfoInterface op_info_interface = op->dyn_cast(); std::vector input_info; if (op_info_interface) { auto op_info_res = op_info_interface.GetOpInfo(); input_info = std::get<0>(op_info_res); // only suppurt non vector input for now std::map input_map; int index = 0; int tensor_input_number = 0; for (auto& t : input_info) { // todo filter attribute tensor input_map[t.name] = index++; if (!t.is_mutable_attribute) { tensor_input_number += 1; } } std::map attr_type_map; auto attr_info = std::get<1>(op_info_res); for (auto& t : attr_info) { VLOG(6) << t.name << "\t" << t.type_name; attr_type_map[t.name] = t.type_name; } auto runtime_info = std::get<3>(op_info_res); auto attr_map = op->attributes(); auto data_type_info = runtime_info.kernel_key_dtype; if (data_type_info.size() > 0 && data_type_info[0] != "") { // only support single input and attribute auto slot_name = data_type_info[0]; if (input_map.count(slot_name)) { // parse from input int in_index = input_map.at(slot_name); dialect::DenseTensorType type = op->operand(in_index) .source() .type() .dyn_cast(); kernel_data_type = TransToPhiDataType(type.dtype()); } else { PADDLE_ENFORCE_EQ(attr_type_map.count(slot_name), true, phi::errors::PreconditionNotMet( "[%s] MUST in attr map", slot_name)); kernel_data_type = attr_map.at(slot_name) .dyn_cast() .data(); } } // parse all the input tensor if (tensor_input_number == 0 || op->name() == "pd.full_") { // all the information have to get from attribute and context if (op->name() == "pd.uniform") { // try to process uniform, use shape to determin backend // TODO(phlrain): shuold support other initilize op auto define_op = op->operand(0).source().GetDefiningOp(); if (define_op->name() == "pd.full_int_array") { auto shape = define_op->attributes() .at("value") .dyn_cast() .data() .GetData(); size_t numel = 1; for (auto& s : shape) { numel *= s; } if (numel > init_on_gpu_threashold) { kernel_backend = phi::Backend::GPU; } } } if (kernel_backend == phi::Backend::UNDEFINED) { kernel_backend = paddle::experimental::ParseBackend(place); } } } if (op->num_operands() > 0) { paddle::experimental::detail::KernelKeyParser kernel_key_parser; for (size_t i = 0; i < op->num_operands(); ++i) { // todo filter attribute tensor if ((input_info.size() > i) && input_info[i].is_mutable_attribute) { continue; } auto input_tmp = op->operand(i).source(); auto new_input_tmp = map_value_pair.at(input_tmp); auto input_type = new_input_tmp.type(); dialect::AllocatedDenseTensorType type; if (input_type.isa()) { type = input_type.dyn_cast(); } else if (input_type.isa()) { type = input_type.dyn_cast()[0] .dyn_cast(); } // fake tensor here auto ptr = new phi::Allocation(nullptr, 0, type.place()); std::shared_ptr holder(ptr); auto dtype = TransToPhiDataType(type.dtype()); phi::DenseTensorMeta meta( dtype, type.dims(), type.data_layout(), type.lod(), type.offset()); phi::DenseTensor fake_tensor(holder, meta); kernel_key_parser.AssignKernelKeySet(fake_tensor); } auto kernel_key_set = kernel_key_parser.key_set; auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); if (kernel_backend == phi::Backend::UNDEFINED) { kernel_backend = kernel_key.backend(); } if (kernel_layout == phi::DataLayout::UNDEFINED) { kernel_layout = kernel_key.layout(); } if (kernel_data_type == phi::DataType::UNDEFINED) { kernel_data_type = kernel_key.dtype(); } } phi::KernelKey res(kernel_backend, kernel_layout, kernel_data_type); return res; } std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog) { auto program = std::make_unique(ir::IrContext::Instance()); auto block = prog->block(); phi::Place cpu_place(phi::AllocationType::CPU); ir::IrContext* ctx = ir::IrContext::Instance(); ctx->GetOrRegisterDialect(); ctx->GetOrRegisterDialect(); std::unordered_map map_op_pair; std::unordered_map map_value_pair; std::string op1_name = paddle::dialect::PhiKernelOp::name(); ir::OpInfo op1_info = ctx->GetRegisteredOpInfo(op1_name); for (auto it = block->begin(); it != block->end(); ++it) { VLOG(6) << "op name " << (*it)->name(); auto kernel_key = GetKernelKey(*it, cpu_place, map_value_pair); VLOG(6) << "kernel type " << kernel_key; // create new Op // only for single output // need update new kernel key layout and data tyep std::vector op_output_types; if ((*it)->num_results() > 0) { for (size_t i = 0; i < (*it)->num_results(); ++i) { auto result_type = (*it)->result(i).type(); if (result_type.isa()) { auto allocated_dense_tensor_dtype = paddle::dialect::AllocatedDenseTensorType::get( ctx, phi::TransToPhiPlace(kernel_key.backend()), result_type.dyn_cast()); op_output_types.push_back(allocated_dense_tensor_dtype); } else if (result_type.isa()) { auto pos1 = result_type.dyn_cast().data()[0]; if (pos1.isa()) { auto allocated_dense_tensor_dtype = paddle::dialect::AllocatedDenseTensorType::get( ctx, phi::TransToPhiPlace(kernel_key.backend()), pos1.dyn_cast()); op_output_types.push_back(allocated_dense_tensor_dtype); } else { PADDLE_THROW(phi::errors::Unimplemented( "only support dense tensor in vector type for now")); } ir::Type t1 = ir::VectorType::get(ctx, op_output_types); op_output_types.clear(); op_output_types.push_back(t1); } } } // constuct input std::vector vec_inputs; paddle::dialect::OpYamlInfoInterface op_info_interface = (*it)->dyn_cast(); std::string kernel_fn_str; std::vector input_info; if (op_info_interface) { auto op_info_res = op_info_interface.GetOpInfo(); auto runtime_info = std::get<3>(op_info_res); kernel_fn_str = runtime_info.kernel_func[0]; input_info = std::get<0>(op_info_res); } if ((*it)->num_operands() > 0) { for (size_t i = 0; i < (*it)->num_operands(); ++i) { auto cur_in = (*it)->operand(i).source(); auto new_in = map_value_pair.at(cur_in); auto new_in_type = new_in.type(); auto& kernel = phi::KernelFactory::Instance().SelectKernelWithGPUDNN( kernel_fn_str, kernel_key); if (kernel.IsValid()) { if (new_in_type.isa()) { // allocated type auto place = new_in_type.dyn_cast() .place(); if ((i < input_info.size()) && (!input_info[i].is_mutable_attribute) && (place != phi::TransToPhiPlace(kernel_key.backend()))) { if (paddle::experimental::NeedTransformPlace( place, kernel.InputAt(i).backend, {})) { VLOG(6) << "need trans from " << place << " to " << kernel_key.backend(); // build memcopy op auto copy_kernel_key = kernel_key; copy_kernel_key.set_backend(phi::Backend::GPU); std::unordered_map op1_attribute{ {"op_name", ir::StrAttribute::get(ctx, "pd.memcpy_h2d")}, {"kernel_name", ir::StrAttribute::get(ctx, "memcpy_h2d")}, {"kernel_key", dialect::KernelAttribute::get(ctx, copy_kernel_key)}, {"dst_place_type", ir::Int32Attribute::get(ctx, 1)}}; ir::Operation* op1 = ir::Operation::Create( {new_in}, op1_attribute, {new_in_type}, op1_info); program->block()->push_back(op1); new_in = op1->result(0); } } } else if (new_in_type.isa()) { // [ todo need update here, support combine data transfomer] } else { PADDLE_THROW(phi::errors::Unimplemented( "only support allocated dense tensor type for now")); } } vec_inputs.push_back(new_in); } } std::unordered_map op1_attribute{ {"op_name", ir::StrAttribute::get(ctx, (*it)->name())}, {"kernel_name", ir::StrAttribute::get(ctx, kernel_fn_str)}, {"kernel_key", dialect::KernelAttribute::get(ctx, kernel_key)}}; auto op_attr_map = (*it)->attributes(); for (auto it1 = op_attr_map.begin(); it1 != op_attr_map.end(); ++it1) { op1_attribute.emplace(it1->first, it1->second); } ir::Operation* op1 = ir::Operation::Create( vec_inputs, op1_attribute, op_output_types, op1_info); map_op_pair[*it] = op1; // only deal with single output if ((*it)->num_results() > 0) { for (size_t i = 0; i < (*it)->num_results(); ++i) { map_value_pair[(*it)->result(i)] = op1->result(i); } } program->block()->push_back(op1); } return program; } } // namespace dialect } // namespace paddle