diff --git a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc index 7e97d82c78eaefe40c5c58caf81caa8c52973c53..9e96e9cb403ac4478c4f39f337a7e7f218797d22 100644 --- a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc +++ b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc @@ -20,6 +20,7 @@ #include "paddle/phi/core/kernel_factory.h" #ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/operators/ops_extra_info.h" #include "paddle/phi/backends/onednn/onednn_context.h" #endif @@ -469,119 +470,234 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key, bool transfered = false; DataTranferHelper data_transfer_helper(place, var_scope, local_scope); - for (auto& var_name_item : *ins_map_temp) { - bool should_skip_input = - no_buffer_ins && no_buffer_ins->count(var_name_item.first) > 0; - for (size_t i = 0; i < var_name_item.second.size(); ++i) { - auto var = var_name_item.second[i]; - auto var_name = new_ins[var_name_item.first].at(i); - const phi::DenseTensor* tensor_in; - std::string new_var_name; - bool is_transferred = false; - if (var->IsType() || var->IsType()) { - tensor_in = GetLoDTensorOrSelectedRowsValueFromVar(*var); - } else if (var->IsType()) { - if (var->Get().size() == 0) { - continue; - } - tensor_in = static_cast( - &(var->Get()[0])); - } else { - continue; - } - // special case - if (!tensor_in->IsInitialized()) { - if (should_skip_input == true) { -#ifdef PADDLE_WITH_MKLDNN - // Var without buffer may be needed - // for some situation like InferShape(). - // In this situation We cannot skip Var analysis, as - // MKL-DNN shape of Var may differ from kNHWC Var - // In such situation corressponding resized Var - // has to be created and registered - if ((tensor_in->layout() == DataLayout::ONEDNN) && - (var->IsType() == true) && - (expected_kernel_key.data_layout_ != DataLayout::ONEDNN) && - (phi::OneDNNContext::tls().get_cur_paddle_data_layout() == - DataLayout::kNHWC)) { - VLOG(7) << "Created reshaped dummy input based on MKL-DNN " - "phi::DenseTensor , " - "but kNHWC layout" - << var_name_item.first << " in Operator " - << op_base->Type(); - auto op = TransferLayout(var_name, - &new_var_name, - tensor_in->layout(), - DataLayout::kNHWC, - var_scope, - local_scope, - op_base->Type() == "fetch_v2"); - if (op) { - data_transfer_helper.RunAndConstructOpFuncNode( - op, var_name, new_var_name, new_op_func_nodes, skip_run); + auto apply_data_transform_for_one_parameter = + [&](const std::string& parameter_name, + const std::vector& argument_names, + const phi::TensorArgDef* argument_def, + bool should_skip_input, + std::vector* arguments) { + PADDLE_ENFORCE_EQ(argument_names.size(), + arguments->size(), + phi::errors::InvalidArgument( + "The size of argument_names (%d) should equal to " + "the size of arguments (%d).", + argument_names.size(), + arguments->size())); + for (size_t i = 0; i < arguments->size(); ++i) { + const std::string var_name = argument_names[i]; + Variable* var = arguments->at(i); + + const phi::DenseTensor* tensor_in; + if (var->IsType() || + var->IsType()) { + tensor_in = GetLoDTensorOrSelectedRowsValueFromVar(*var); + } else if (var->IsType()) { + if (var->Get().size() == 0) { + continue; } - is_transferred = true; + tensor_in = static_cast( + &(var->Get()[0])); } else { - VLOG(7) << "Skip scanning input " << var_name_item.first - << " in Operator " << op_base->Type(); + continue; } + + bool is_transferred = false; + std::string new_var_name; + // special case + if (!tensor_in->IsInitialized()) { + if (should_skip_input) { +#ifdef PADDLE_WITH_MKLDNN + // Var without buffer may be needed + // for some situation like InferShape(). + // In this situation We cannot skip Var analysis, as + // MKL-DNN shape of Var may differ from kNHWC Var + // In such situation corressponding resized Var + // has to be created and registered + if ((tensor_in->layout() == DataLayout::ONEDNN) && + (var->IsType() == true) && + (expected_kernel_key.data_layout_ != DataLayout::ONEDNN) && + (phi::OneDNNContext::tls().get_cur_paddle_data_layout() == + DataLayout::kNHWC)) { + VLOG(7) << "Created reshaped dummy input based on MKL-DNN " + "phi::DenseTensor , " + "but kNHWC layout" + << parameter_name << " in Operator " << op_base->Type(); + auto op = TransferLayout(var_name, + &new_var_name, + tensor_in->layout(), + DataLayout::kNHWC, + var_scope, + local_scope, + op_base->Type() == "fetch_v2"); + if (op) { + data_transfer_helper.RunAndConstructOpFuncNode( + op, var_name, new_var_name, new_op_func_nodes, skip_run); + } + is_transferred = true; + } else { + VLOG(7) << "Skip scanning input " << parameter_name + << " in Operator " << op_base->Type(); + } #endif - } else { - continue; - } - } else { - auto kernel_type_for_var = - static_cast(op_base) - ->GetKernelTypeForVar( - var_name_item.first, - *tensor_in, - framework::TransOpKernelTypeToPhiKernelKey( - expected_kernel_key)); - // apply data transform - is_transferred = - data_transfer_helper.apply(kernel_type_for_var, - expected_kernel_key, - tensor_in, - var_name, - &new_var_name, - new_op_func_nodes, - use_local_scope, - op_base->Type() == "fetch_v2", - skip_run); - } + } else { + continue; + } + } else { + auto kernel_key_for_var = + static_cast(op_base) + ->GetKernelTypeForVar( + parameter_name, + *tensor_in, + framework::TransOpKernelTypeToPhiKernelKey( + expected_kernel_key)); + + std::unique_ptr + expected_kernel_key_for_argument_def = nullptr; + if (argument_def && + argument_def->backend != phi::Backend::ALL_BACKEND) { + const phi::Backend& tensor_backend = + phi::TransToPhiBackend(tensor_in->place()); + const phi::Backend& def_backend = argument_def->backend; + if ((def_backend != tensor_backend && + !(def_backend == phi::Backend::GPUDNN && + tensor_backend == phi::Backend::GPU) && + !(def_backend == phi::Backend::KPS && + tensor_backend == phi::Backend::XPU) && + !(def_backend == phi::Backend::ONEDNN && + tensor_backend == phi::Backend::CPU)) || + tensor_in->place().GetType() == AllocationType::GPUPINNED) { + expected_kernel_key_for_argument_def = + std::make_unique( + def_backend, + expected_kernel_key.data_layout_, + framework::TransToPhiDataType( + expected_kernel_key.data_type_)); + + VLOG(6) << "argument " << var_name + << " use new expected kernel key : " + << *expected_kernel_key_for_argument_def; + } + } - if (is_transferred) { - transfered = true; - // update RuntimeContext.inputs and original op_func_node inputs - op_func_node->input_index[var_name_item.first][i] = - var_scope->VarId(new_var_name); - var_name_item.second[i] = local_scope->FindVar(new_var_name); - new_ins[var_name_item.first][i] = new_var_name; - for (auto& pair : new_outs) { - for (size_t j = 0; j < pair.second.size(); ++j) { - VLOG(4) << pair.second[j] << " " << var_name; - if (pair.second[j] == var_name) { - VLOG(4) << "Found inplace between input(" << var_name_item.first - << ") and output(" << pair.first - << "), the variable name is " << var_name; - (*outs_map_temp)[pair.first][j] = - local_scope->FindVar(new_var_name); - new_outs[pair.first][j] = new_var_name; - op_func_node - ->inplace_back_map[var_scope->GetIdByName(new_var_name)] = - var_scope->GetIdByName(var_name); - op_func_node->output_index[pair.first][j] = - var_scope->VarId(new_var_name); + // apply data transform + is_transferred = data_transfer_helper.apply( + kernel_key_for_var, + (expected_kernel_key_for_argument_def + ? TransPhiKernelKeyToOpKernelType( + *expected_kernel_key_for_argument_def.get()) + : expected_kernel_key), + tensor_in, + var_name, + &new_var_name, + new_op_func_nodes, + use_local_scope, + op_base->Type() == "fetch_v2", + skip_run); + } + + if (is_transferred) { + transfered = true; + // update RuntimeContext.inputs and original op_func_node inputs + op_func_node->input_index[parameter_name][i] = + var_scope->VarId(new_var_name); + arguments->at(i) = local_scope->FindVar(new_var_name); + new_ins[parameter_name][i] = new_var_name; + for (auto& pair : new_outs) { + for (size_t j = 0; j < pair.second.size(); ++j) { + VLOG(4) << pair.second[j] << " " << var_name; + if (pair.second[j] == var_name) { + VLOG(4) << "Found inplace between input(" << parameter_name + << ") and output(" << pair.first + << "), the variable name is " << var_name; + (*outs_map_temp)[pair.first][j] = + local_scope->FindVar(new_var_name); + new_outs[pair.first][j] = new_var_name; + op_func_node + ->inplace_back_map[var_scope->GetIdByName(new_var_name)] = + var_scope->GetIdByName(var_name); + op_func_node->output_index[pair.first][j] = + var_scope->VarId(new_var_name); + } + } + } + // NOTE(Aurelius84): avoid deepcopy twice if we already insert data + // transfer op. + if (op_base->Type() == "fetch_v2") { + op_base->SetAttr("deepcopy", false); } } } - // NOTE(Aurelius84): avoid deepcopy twice if we already insert data - // transfer op. - if (op_base->Type() == "fetch_v2") { - op_base->SetAttr("deepcopy", false); - } + }; + + phi::Kernel* phi_kernel = op_func_node->phi_kernel_; + if (phi_kernel && phi_kernel->IsValid() && + phi_kernel->GetKernelRegisteredType() == + phi::KernelRegisteredType::FUNCTION) { + framework::OperatorWithKernel* op_with_kernel = + dynamic_cast(op_base); + PADDLE_ENFORCE_NOT_NULL( + op_with_kernel, + phi::errors::Unavailable("Failed to cast op_base (%p) from Operator* " + "to OperatorWithKernel*.", + op_base)); + const auto& input_names = op_with_kernel->PhiKernelSignature()->input_names; + const auto& input_defs = phi_kernel->args_def().input_defs(); + PADDLE_ENFORCE_EQ(input_names.size(), + input_defs.size(), + platform::errors::InvalidArgument( + "The size of inputs_args names (%d) must be equal to " + "the size of kernel input_defs (%d).", + input_names.size(), + input_defs.size())); + + for (size_t i = 0; i < input_defs.size(); ++i) { + const std::string& parameter_name = input_names[i]; + auto iter = ins_map_temp->find(parameter_name); + if (iter == ins_map_temp->end()) { + continue; + } + std::vector& arguments = iter->second; + bool should_skip_input = + no_buffer_ins && no_buffer_ins->count(parameter_name) > 0; + + apply_data_transform_for_one_parameter(parameter_name, + new_ins[parameter_name], + &input_defs.at(i), + should_skip_input, + &arguments); + } +#ifdef PADDLE_WITH_MKLDNN + // For input that is Extra, only MKLDNN will use Extra Inputs + auto& extra_input_names = + paddle::operators::ExtraInfoUtils::Instance().GetExtraInputNamesMap( + op_with_kernel->Type()); + for (const auto& parameter_name : extra_input_names) { + auto iter = ins_map_temp->find(parameter_name); + if (iter == ins_map_temp->end()) { + continue; } + std::vector& arguments = iter->second; + bool should_skip_input = + no_buffer_ins && no_buffer_ins->count(parameter_name) > 0; + apply_data_transform_for_one_parameter(parameter_name, + new_ins[parameter_name], + /*argument_def=*/nullptr, + should_skip_input, + &arguments); + } +#endif + } else { + for (auto& var_name_item : *ins_map_temp) { + const std::string& parameter_name = var_name_item.first; + std::vector& arguments = var_name_item.second; + bool should_skip_input = + no_buffer_ins && no_buffer_ins->count(parameter_name) > 0; + apply_data_transform_for_one_parameter(parameter_name, + new_ins[parameter_name], + /*argument_def=*/nullptr, + should_skip_input, + &arguments); } }