未验证 提交 af1ace59 编写于 作者: R Ruibiao Chen 提交者: GitHub

Consider kernel argument def for data device transform in standalone Executor (#50471)

* Consider kernel argument def for data device transform in standalone executor

* Fix ALL_BACKEND errors

* Fix CI errors
上级 3adee6c9
......@@ -20,6 +20,7 @@
#include "paddle/phi/core/kernel_factory.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/operators/ops_extra_info.h"
#include "paddle/phi/backends/onednn/onednn_context.h"
#endif
......@@ -469,119 +470,234 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key,
bool transfered = false;
DataTranferHelper data_transfer_helper(place, var_scope, local_scope);
for (auto& var_name_item : *ins_map_temp) {
bool should_skip_input =
no_buffer_ins && no_buffer_ins->count(var_name_item.first) > 0;
for (size_t i = 0; i < var_name_item.second.size(); ++i) {
auto var = var_name_item.second[i];
auto var_name = new_ins[var_name_item.first].at(i);
const phi::DenseTensor* tensor_in;
std::string new_var_name;
bool is_transferred = false;
if (var->IsType<phi::DenseTensor>() || var->IsType<phi::SelectedRows>()) {
tensor_in = GetLoDTensorOrSelectedRowsValueFromVar(*var);
} else if (var->IsType<LoDTensorArray>()) {
if (var->Get<LoDTensorArray>().size() == 0) {
continue;
}
tensor_in = static_cast<const phi::DenseTensor*>(
&(var->Get<LoDTensorArray>()[0]));
} else {
continue;
}
// special case
if (!tensor_in->IsInitialized()) {
if (should_skip_input == true) {
#ifdef PADDLE_WITH_MKLDNN
// Var without buffer may be needed
// for some situation like InferShape().
// In this situation We cannot skip Var analysis, as
// MKL-DNN shape of Var may differ from kNHWC Var
// In such situation corressponding resized Var
// has to be created and registered
if ((tensor_in->layout() == DataLayout::ONEDNN) &&
(var->IsType<phi::DenseTensor>() == true) &&
(expected_kernel_key.data_layout_ != DataLayout::ONEDNN) &&
(phi::OneDNNContext::tls().get_cur_paddle_data_layout() ==
DataLayout::kNHWC)) {
VLOG(7) << "Created reshaped dummy input based on MKL-DNN "
"phi::DenseTensor , "
"but kNHWC layout"
<< var_name_item.first << " in Operator "
<< op_base->Type();
auto op = TransferLayout(var_name,
&new_var_name,
tensor_in->layout(),
DataLayout::kNHWC,
var_scope,
local_scope,
op_base->Type() == "fetch_v2");
if (op) {
data_transfer_helper.RunAndConstructOpFuncNode(
op, var_name, new_var_name, new_op_func_nodes, skip_run);
auto apply_data_transform_for_one_parameter =
[&](const std::string& parameter_name,
const std::vector<std::string>& argument_names,
const phi::TensorArgDef* argument_def,
bool should_skip_input,
std::vector<Variable*>* arguments) {
PADDLE_ENFORCE_EQ(argument_names.size(),
arguments->size(),
phi::errors::InvalidArgument(
"The size of argument_names (%d) should equal to "
"the size of arguments (%d).",
argument_names.size(),
arguments->size()));
for (size_t i = 0; i < arguments->size(); ++i) {
const std::string var_name = argument_names[i];
Variable* var = arguments->at(i);
const phi::DenseTensor* tensor_in;
if (var->IsType<phi::DenseTensor>() ||
var->IsType<phi::SelectedRows>()) {
tensor_in = GetLoDTensorOrSelectedRowsValueFromVar(*var);
} else if (var->IsType<LoDTensorArray>()) {
if (var->Get<LoDTensorArray>().size() == 0) {
continue;
}
is_transferred = true;
tensor_in = static_cast<const phi::DenseTensor*>(
&(var->Get<LoDTensorArray>()[0]));
} else {
VLOG(7) << "Skip scanning input " << var_name_item.first
<< " in Operator " << op_base->Type();
continue;
}
bool is_transferred = false;
std::string new_var_name;
// special case
if (!tensor_in->IsInitialized()) {
if (should_skip_input) {
#ifdef PADDLE_WITH_MKLDNN
// Var without buffer may be needed
// for some situation like InferShape().
// In this situation We cannot skip Var analysis, as
// MKL-DNN shape of Var may differ from kNHWC Var
// In such situation corressponding resized Var
// has to be created and registered
if ((tensor_in->layout() == DataLayout::ONEDNN) &&
(var->IsType<phi::DenseTensor>() == true) &&
(expected_kernel_key.data_layout_ != DataLayout::ONEDNN) &&
(phi::OneDNNContext::tls().get_cur_paddle_data_layout() ==
DataLayout::kNHWC)) {
VLOG(7) << "Created reshaped dummy input based on MKL-DNN "
"phi::DenseTensor , "
"but kNHWC layout"
<< parameter_name << " in Operator " << op_base->Type();
auto op = TransferLayout(var_name,
&new_var_name,
tensor_in->layout(),
DataLayout::kNHWC,
var_scope,
local_scope,
op_base->Type() == "fetch_v2");
if (op) {
data_transfer_helper.RunAndConstructOpFuncNode(
op, var_name, new_var_name, new_op_func_nodes, skip_run);
}
is_transferred = true;
} else {
VLOG(7) << "Skip scanning input " << parameter_name
<< " in Operator " << op_base->Type();
}
#endif
} else {
continue;
}
} else {
auto kernel_type_for_var =
static_cast<const framework::OperatorWithKernel*>(op_base)
->GetKernelTypeForVar(
var_name_item.first,
*tensor_in,
framework::TransOpKernelTypeToPhiKernelKey(
expected_kernel_key));
// apply data transform
is_transferred =
data_transfer_helper.apply(kernel_type_for_var,
expected_kernel_key,
tensor_in,
var_name,
&new_var_name,
new_op_func_nodes,
use_local_scope,
op_base->Type() == "fetch_v2",
skip_run);
}
} else {
continue;
}
} else {
auto kernel_key_for_var =
static_cast<const framework::OperatorWithKernel*>(op_base)
->GetKernelTypeForVar(
parameter_name,
*tensor_in,
framework::TransOpKernelTypeToPhiKernelKey(
expected_kernel_key));
std::unique_ptr<phi::KernelKey>
expected_kernel_key_for_argument_def = nullptr;
if (argument_def &&
argument_def->backend != phi::Backend::ALL_BACKEND) {
const phi::Backend& tensor_backend =
phi::TransToPhiBackend(tensor_in->place());
const phi::Backend& def_backend = argument_def->backend;
if ((def_backend != tensor_backend &&
!(def_backend == phi::Backend::GPUDNN &&
tensor_backend == phi::Backend::GPU) &&
!(def_backend == phi::Backend::KPS &&
tensor_backend == phi::Backend::XPU) &&
!(def_backend == phi::Backend::ONEDNN &&
tensor_backend == phi::Backend::CPU)) ||
tensor_in->place().GetType() == AllocationType::GPUPINNED) {
expected_kernel_key_for_argument_def =
std::make_unique<phi::KernelKey>(
def_backend,
expected_kernel_key.data_layout_,
framework::TransToPhiDataType(
expected_kernel_key.data_type_));
VLOG(6) << "argument " << var_name
<< " use new expected kernel key : "
<< *expected_kernel_key_for_argument_def;
}
}
if (is_transferred) {
transfered = true;
// update RuntimeContext.inputs and original op_func_node inputs
op_func_node->input_index[var_name_item.first][i] =
var_scope->VarId(new_var_name);
var_name_item.second[i] = local_scope->FindVar(new_var_name);
new_ins[var_name_item.first][i] = new_var_name;
for (auto& pair : new_outs) {
for (size_t j = 0; j < pair.second.size(); ++j) {
VLOG(4) << pair.second[j] << " " << var_name;
if (pair.second[j] == var_name) {
VLOG(4) << "Found inplace between input(" << var_name_item.first
<< ") and output(" << pair.first
<< "), the variable name is " << var_name;
(*outs_map_temp)[pair.first][j] =
local_scope->FindVar(new_var_name);
new_outs[pair.first][j] = new_var_name;
op_func_node
->inplace_back_map[var_scope->GetIdByName(new_var_name)] =
var_scope->GetIdByName(var_name);
op_func_node->output_index[pair.first][j] =
var_scope->VarId(new_var_name);
// apply data transform
is_transferred = data_transfer_helper.apply(
kernel_key_for_var,
(expected_kernel_key_for_argument_def
? TransPhiKernelKeyToOpKernelType(
*expected_kernel_key_for_argument_def.get())
: expected_kernel_key),
tensor_in,
var_name,
&new_var_name,
new_op_func_nodes,
use_local_scope,
op_base->Type() == "fetch_v2",
skip_run);
}
if (is_transferred) {
transfered = true;
// update RuntimeContext.inputs and original op_func_node inputs
op_func_node->input_index[parameter_name][i] =
var_scope->VarId(new_var_name);
arguments->at(i) = local_scope->FindVar(new_var_name);
new_ins[parameter_name][i] = new_var_name;
for (auto& pair : new_outs) {
for (size_t j = 0; j < pair.second.size(); ++j) {
VLOG(4) << pair.second[j] << " " << var_name;
if (pair.second[j] == var_name) {
VLOG(4) << "Found inplace between input(" << parameter_name
<< ") and output(" << pair.first
<< "), the variable name is " << var_name;
(*outs_map_temp)[pair.first][j] =
local_scope->FindVar(new_var_name);
new_outs[pair.first][j] = new_var_name;
op_func_node
->inplace_back_map[var_scope->GetIdByName(new_var_name)] =
var_scope->GetIdByName(var_name);
op_func_node->output_index[pair.first][j] =
var_scope->VarId(new_var_name);
}
}
}
// NOTE(Aurelius84): avoid deepcopy twice if we already insert data
// transfer op.
if (op_base->Type() == "fetch_v2") {
op_base->SetAttr("deepcopy", false);
}
}
}
// NOTE(Aurelius84): avoid deepcopy twice if we already insert data
// transfer op.
if (op_base->Type() == "fetch_v2") {
op_base->SetAttr("deepcopy", false);
}
};
phi::Kernel* phi_kernel = op_func_node->phi_kernel_;
if (phi_kernel && phi_kernel->IsValid() &&
phi_kernel->GetKernelRegisteredType() ==
phi::KernelRegisteredType::FUNCTION) {
framework::OperatorWithKernel* op_with_kernel =
dynamic_cast<framework::OperatorWithKernel*>(op_base);
PADDLE_ENFORCE_NOT_NULL(
op_with_kernel,
phi::errors::Unavailable("Failed to cast op_base (%p) from Operator* "
"to OperatorWithKernel*.",
op_base));
const auto& input_names = op_with_kernel->PhiKernelSignature()->input_names;
const auto& input_defs = phi_kernel->args_def().input_defs();
PADDLE_ENFORCE_EQ(input_names.size(),
input_defs.size(),
platform::errors::InvalidArgument(
"The size of inputs_args names (%d) must be equal to "
"the size of kernel input_defs (%d).",
input_names.size(),
input_defs.size()));
for (size_t i = 0; i < input_defs.size(); ++i) {
const std::string& parameter_name = input_names[i];
auto iter = ins_map_temp->find(parameter_name);
if (iter == ins_map_temp->end()) {
continue;
}
std::vector<Variable*>& arguments = iter->second;
bool should_skip_input =
no_buffer_ins && no_buffer_ins->count(parameter_name) > 0;
apply_data_transform_for_one_parameter(parameter_name,
new_ins[parameter_name],
&input_defs.at(i),
should_skip_input,
&arguments);
}
#ifdef PADDLE_WITH_MKLDNN
// For input that is Extra, only MKLDNN will use Extra Inputs
auto& extra_input_names =
paddle::operators::ExtraInfoUtils::Instance().GetExtraInputNamesMap(
op_with_kernel->Type());
for (const auto& parameter_name : extra_input_names) {
auto iter = ins_map_temp->find(parameter_name);
if (iter == ins_map_temp->end()) {
continue;
}
std::vector<Variable*>& arguments = iter->second;
bool should_skip_input =
no_buffer_ins && no_buffer_ins->count(parameter_name) > 0;
apply_data_transform_for_one_parameter(parameter_name,
new_ins[parameter_name],
/*argument_def=*/nullptr,
should_skip_input,
&arguments);
}
#endif
} else {
for (auto& var_name_item : *ins_map_temp) {
const std::string& parameter_name = var_name_item.first;
std::vector<Variable*>& arguments = var_name_item.second;
bool should_skip_input =
no_buffer_ins && no_buffer_ins->count(parameter_name) > 0;
apply_data_transform_for_one_parameter(parameter_name,
new_ins[parameter_name],
/*argument_def=*/nullptr,
should_skip_input,
&arguments);
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册