未验证 提交 782454bd 编写于 作者: Z zyfncg 提交者: GitHub

[PHI] Register custom kernel for all type of custom device (#51262)

* register custom kernel for all type of custom device

* fix bug

* fix GetKernelInputArgDef

* fix amp bug

* fix TransToPhiPlace

* adapt interpreter_util
上级 2847980c
......@@ -665,9 +665,17 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key,
bool should_skip_input =
no_buffer_ins && no_buffer_ins->count(parameter_name) > 0;
phi::TensorArgDef in_def = input_defs.at(i);
#ifdef PADDLE_WITH_CUSTOM_DEVICE
// When the backend of input tensor arg_def is CUSTOM, we need to set it
// to the actual backend by expected_kernel_key.
if (in_def.backend == phi::Backend::CUSTOM) {
in_def.SetBackend(phi::TransToPhiBackend(expected_kernel_key.place_));
}
#endif
apply_data_transform_for_one_parameter(parameter_name,
new_ins[parameter_name],
&input_defs.at(i),
&in_def,
should_skip_input,
&arguments);
}
......
......@@ -1090,7 +1090,9 @@ void FakeInitializeOutputsForFunctionKernel(
if (out_tensor && !out_tensor->initialized()) {
phi::TensorArgDef& tensor_arg_def = output_defs[start_idx + offset];
phi::DataType dtype = tensor_arg_def.dtype;
phi::Place place = phi::TransToPhiPlace(tensor_arg_def.backend);
phi::Place place = tensor_arg_def.backend == phi::Backend::CUSTOM
? dev_ctx.GetPlace()
: phi::TransToPhiPlace(tensor_arg_def.backend);
if (dtype == DataType::UNDEFINED ||
OpsNeedSetOutputDtypeWhenRegisterPhiKernel.count(
......
......@@ -2650,7 +2650,6 @@ Scope* OperatorWithKernel::PrepareData(
input_names.size(),
input_defs.size()));
for (size_t i = 0; i < input_defs.size(); ++i) {
auto& in_def = input_defs.at(i);
std::string input_name = input_names[i];
auto iter = ctx->inputs.find(input_name);
if (iter == ctx->inputs.end()) {
......@@ -2659,6 +2658,15 @@ Scope* OperatorWithKernel::PrepareData(
auto& ins_vector = iter->second;
bool should_skip_input =
no_buffer_ins && no_buffer_ins->count(input_name) > 0;
phi::TensorArgDef in_def = input_defs.at(i);
#ifdef PADDLE_WITH_CUSTOM_DEVICE
// When the backend of input tensor arg_def is CUSTOM, we need to set it
// to the actual backend by expected_kernel_key.
if (in_def.backend == phi::Backend::CUSTOM) {
in_def.SetBackend(expected_kernel_key.backend());
}
#endif
prepare_input_data(input_name, &ins_vector, &in_def, should_skip_input);
}
#ifdef PADDLE_WITH_MKLDNN
......
......@@ -78,14 +78,31 @@ OpSupportedInfos(const std::string& place,
}
}
#ifdef PADDLE_WITH_CUSTOM_DEVICE
auto is_custom_place = [&](const std::string& place) {
return is_target_place.count(place) && place != "CPU" && place != "GPU" &&
place != "XPU";
};
#endif
auto phi_kernels = phi::KernelFactory::Instance().kernels();
for (auto& kernel_pair : phi_kernels) {
auto op_type = phi::TransToFluidOpName(kernel_pair.first);
for (auto& info_pair : kernel_pair.second) {
framework::OpKernelType kernel_type =
framework::TransPhiKernelKeyToOpKernelType(info_pair.first);
if (is_target_place[query_place](kernel_type.place_) &&
kernel_type.data_type_ == dtype && all_ops.count(op_type)) {
if (dtype != framework::TransToProtoVarType(info_pair.first.dtype()) ||
all_ops.count(op_type) == 0) {
continue;
}
#ifdef PADDLE_WITH_CUSTOM_DEVICE
if (info_pair.first.backend() == phi::Backend::CUSTOM) {
if (is_custom_place(query_place)) {
VLOG(4) << op_type << " " << supported_ops.size();
supported_ops.emplace(op_type);
}
continue;
}
#endif
if (is_target_place[query_place](
phi::TransToPhiPlace(info_pair.first.backend(), false))) {
VLOG(4) << op_type << " " << supported_ops.size();
supported_ops.emplace(op_type);
}
......
......@@ -273,13 +273,16 @@ PD_REGISTER_GENERAL_KERNEL(
ALL_DTYPE) {}
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
namespace paddle {
namespace operators {
template void FeedDenseTensorKernel<phi::CustomContext>(
const phi::CustomContext& dev_ctx,
const phi::ExtendedTensor& x,
int col,
phi::DenseTensor* out);
} // namespace operators
} // namespace paddle
PD_REGISTER_GENERAL_KERNEL(
feed_dense_tensor,
Custom,
ALL_LAYOUT,
paddle::operators::FeedDenseTensorKernel<phi::CustomContext>,
ALL_DTYPE) {}
PD_REGISTER_GENERAL_KERNEL(
feed_strings,
Custom,
ALL_LAYOUT,
paddle::operators::FeedStringsKernel<phi::CustomContext>,
ALL_DTYPE) {}
#endif
......@@ -87,11 +87,6 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) {
LoadCombineOpKernel<paddle::platform::CustomDeviceContext, int8_t>,
paddle::operators::
LoadCombineOpKernel<paddle::platform::CustomDeviceContext, int64_t>);
REGISTER_CUSTOM_DEVICE_GENERAL_KERNEL(
feed_dense_tensor,
device_type,
ALL_LAYOUT,
paddle::operators::FeedDenseTensorKernel<phi::CustomContext>);
#endif
}
......
......@@ -62,6 +62,19 @@ class TransformFlag {
bool trans_layout_ = true;
};
static inline phi::TensorArgDef GetKernelInputArgDef(
const phi::TensorArgDef& input_def, phi::Backend kernel_backend) {
phi::TensorArgDef input_actual_def = input_def;
#ifdef PADDLE_WITH_CUSTOM_DEVICE
// When the backend of input tensor arg_def is CUSTOM, we need to set it to
// the actual backend by expected_kernel_key.
if (input_actual_def.backend == phi::Backend::CUSTOM) {
input_actual_def.SetBackend(kernel_backend);
}
#endif
return input_actual_def;
}
std::shared_ptr<phi::DenseTensor> PrepareData(
const Tensor& input,
const phi::TensorArgDef& target_args_def,
......
......@@ -701,7 +701,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
input_tensor_code = (
input_tensor_code
+ f"""
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = PrepareData({input_name}, kernel.InputAt({kernel_param.index(input_name)}), {trans_flag});"""
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = PrepareData({input_name}, GetKernelInputArgDef(kernel.InputAt({kernel_param.index(input_name)}), kernel_backend), {trans_flag});"""
)
return input_tensor_code
......@@ -722,7 +722,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
input_tensor_code = (
input_tensor_code
+ f"""
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = PrepareDataForSelectedRows({input_name}, kernel.InputAt({kernel_param.index(input_name)}), {trans_flag});
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = PrepareDataForSelectedRows({input_name}, GetKernelInputArgDef(kernel.InputAt({kernel_param.index(input_name)}), kernel_backend), {trans_flag});
"""
)
return input_tensor_code
......@@ -753,7 +753,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
input_tensor_code = (
input_tensor_code
+ f"""
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_vec = PrepareData({input_name}, kernel.InputAt({kernel_param.index(input_name)}), {trans_flag});
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_vec = PrepareData({input_name}, GetKernelInputArgDef(kernel.InputAt({kernel_param.index(input_name)}), kernel_backend), {trans_flag});
{code_indent} paddle::optional<std::vector<const phi::DenseTensor*>> {PREFIX_TENSOR_NAME}{input_name};
{code_indent} if ({PREFIX_TENSOR_NAME}{input_name}_vec){{
{code_indent} {PREFIX_TENSOR_NAME}{input_name} = paddle::optional<std::vector<const phi::DenseTensor*>>({PREFIX_TENSOR_NAME}{input_name}_vec->size());
......@@ -791,7 +791,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
input_tensor_code = (
input_tensor_code
+ f"""
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_vec = PrepareData({input_name}, kernel.InputAt({kernel_param.index(input_name)}), {trans_flag});
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_vec = PrepareData({input_name}, GetKernelInputArgDef(kernel.InputAt({kernel_param.index(input_name)}), kernel_backend), {trans_flag});
{code_indent} std::vector<const phi::DenseTensor*> {PREFIX_TENSOR_NAME}{input_name}({PREFIX_TENSOR_NAME}{input_name}_vec->size());
{code_indent} for (size_t i = 0; i < {PREFIX_TENSOR_NAME}{input_name}.size(); ++i) {{
{code_indent} {PREFIX_TENSOR_NAME}{input_name}[i] = &{PREFIX_TENSOR_NAME}{input_name}_vec->at(i);
......
......@@ -134,6 +134,9 @@ inline std::ostream& operator<<(std::ostream& os, Backend backend) {
case Backend::IPU:
os << "IPU";
break;
case Backend::CUSTOM:
os << "CUSTOM";
break;
default: {
size_t device_type_id_ = static_cast<size_t>(backend) -
static_cast<size_t>(Backend::NUM_BACKENDS);
......@@ -181,6 +184,8 @@ inline Backend StringToBackend(const char* backend_cstr) {
#endif
} else if (s == std::string("IPU")) {
return Backend::IPU;
} else if (s == std::string("Custom")) {
return Backend::CUSTOM;
} else {
return static_cast<Backend>(static_cast<size_t>(Backend::NUM_BACKENDS) +
phi::CustomRegisteredDeviceMap::Instance()
......
......@@ -99,6 +99,8 @@ phi::Place TransToPhiPlace(const Backend& backend, bool set_device_id) {
return phi::CustomPlace(
device_type,
set_device_id ? phi::DeviceManager::GetDevice(device_type) : 0);
} else if (backend == Backend::CUSTOM) {
return phi::CustomPlace();
}
#endif
PADDLE_THROW(phi::errors::Unimplemented(
......
......@@ -93,6 +93,14 @@ const Kernel& KernelFactory::SelectKernel(const std::string& kernel_name,
kernel_key.backend(), phi::DataLayout::ALL_LAYOUT, kernel_key.dtype());
kernel_iter = iter->second.find(any_layout_kernel_key);
}
#if defined(PADDLE_WITH_CUSTOM_DEVICE)
if (kernel_iter == iter->second.end() &&
kernel_key.backend() > phi::Backend::NUM_BACKENDS) {
kernel_iter = iter->second.find({phi::Backend::CUSTOM,
phi::DataLayout::ALL_LAYOUT,
kernel_key.dtype()});
}
#endif
if (kernel_iter == iter->second.end()) {
return empty_kernel;
......@@ -220,6 +228,12 @@ KernelResult KernelFactory::SelectKernelOrThrowError(
!phi::backends::xpu::is_xpu_support_op(TransToFluidOpName(kernel_name),
kernel_key.dtype())
#elif defined(PADDLE_WITH_CUSTOM_DEVICE)
if (kernel_iter == iter->second.end() &&
kernel_key.backend() > phi::Backend::NUM_BACKENDS) {
kernel_iter = iter->second.find({phi::Backend::CUSTOM,
phi::DataLayout::ALL_LAYOUT,
kernel_key.dtype()});
}
if (FLAGS_enable_api_kernel_fallback &&
(kernel_iter == iter->second.end() ||
phi::backends::custom_device::is_in_custom_black_list(
......
......@@ -61,15 +61,15 @@ struct KernelArgsParseFunctor<Return_ (*)(Args_...)> {
|| arg_type == std::type_index(typeid(const OneDNNContext&))
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
|| arg_type == std::type_index(typeid(const GPUContext&))) {
|| arg_type == std::type_index(typeid(const GPUContext&))
#elif defined(PADDLE_WITH_XPU) && !defined(PADDLE_WITH_XPU_KP)
|| arg_type == std::type_index(typeid(const XPUContext&))) {
|| arg_type == std::type_index(typeid(const XPUContext&))
#elif defined(PADDLE_WITH_XPU) && defined(PADDLE_WITH_XPU_KP)
|| arg_type == std::type_index(typeid(const KPSContext&))) {
#elif defined(PADDLE_WITH_CUSTOM_DEVICE)
|| arg_type == std::type_index(typeid(const KPSContext&))
#endif
#if defined(PADDLE_WITH_CUSTOM_DEVICE)
|| arg_type == std::type_index(typeid(const CustomContext&))) {
#else
) {
#endif
// do nothing, skip context arg now
......
......@@ -142,3 +142,34 @@ PD_REGISTER_KERNEL(empty_like,
kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND);
}
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
PD_REGISTER_KERNEL(empty,
Custom,
ALL_LAYOUT,
phi::EmptyKernel,
float,
double,
int8_t,
uint8_t,
int16_t,
int,
int64_t,
bool,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(empty_like,
Custom,
ALL_LAYOUT,
phi::EmptyLikeKernel,
float,
double,
int8_t,
uint8_t,
int16_t,
int,
int64_t,
bool,
phi::dtype::float16) {
kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND);
}
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册