未验证 提交 782454bd 编写于 作者: Z zyfncg 提交者: GitHub

[PHI] Register custom kernel for all type of custom device (#51262)

* register custom kernel for all type of custom device

* fix bug

* fix GetKernelInputArgDef

* fix amp bug

* fix TransToPhiPlace

* adapt interpreter_util
上级 2847980c
...@@ -665,9 +665,17 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key, ...@@ -665,9 +665,17 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key,
bool should_skip_input = bool should_skip_input =
no_buffer_ins && no_buffer_ins->count(parameter_name) > 0; no_buffer_ins && no_buffer_ins->count(parameter_name) > 0;
phi::TensorArgDef in_def = input_defs.at(i);
#ifdef PADDLE_WITH_CUSTOM_DEVICE
// When the backend of input tensor arg_def is CUSTOM, we need to set it
// to the actual backend by expected_kernel_key.
if (in_def.backend == phi::Backend::CUSTOM) {
in_def.SetBackend(phi::TransToPhiBackend(expected_kernel_key.place_));
}
#endif
apply_data_transform_for_one_parameter(parameter_name, apply_data_transform_for_one_parameter(parameter_name,
new_ins[parameter_name], new_ins[parameter_name],
&input_defs.at(i), &in_def,
should_skip_input, should_skip_input,
&arguments); &arguments);
} }
......
...@@ -1090,7 +1090,9 @@ void FakeInitializeOutputsForFunctionKernel( ...@@ -1090,7 +1090,9 @@ void FakeInitializeOutputsForFunctionKernel(
if (out_tensor && !out_tensor->initialized()) { if (out_tensor && !out_tensor->initialized()) {
phi::TensorArgDef& tensor_arg_def = output_defs[start_idx + offset]; phi::TensorArgDef& tensor_arg_def = output_defs[start_idx + offset];
phi::DataType dtype = tensor_arg_def.dtype; phi::DataType dtype = tensor_arg_def.dtype;
phi::Place place = phi::TransToPhiPlace(tensor_arg_def.backend); phi::Place place = tensor_arg_def.backend == phi::Backend::CUSTOM
? dev_ctx.GetPlace()
: phi::TransToPhiPlace(tensor_arg_def.backend);
if (dtype == DataType::UNDEFINED || if (dtype == DataType::UNDEFINED ||
OpsNeedSetOutputDtypeWhenRegisterPhiKernel.count( OpsNeedSetOutputDtypeWhenRegisterPhiKernel.count(
......
...@@ -2650,7 +2650,6 @@ Scope* OperatorWithKernel::PrepareData( ...@@ -2650,7 +2650,6 @@ Scope* OperatorWithKernel::PrepareData(
input_names.size(), input_names.size(),
input_defs.size())); input_defs.size()));
for (size_t i = 0; i < input_defs.size(); ++i) { for (size_t i = 0; i < input_defs.size(); ++i) {
auto& in_def = input_defs.at(i);
std::string input_name = input_names[i]; std::string input_name = input_names[i];
auto iter = ctx->inputs.find(input_name); auto iter = ctx->inputs.find(input_name);
if (iter == ctx->inputs.end()) { if (iter == ctx->inputs.end()) {
...@@ -2659,6 +2658,15 @@ Scope* OperatorWithKernel::PrepareData( ...@@ -2659,6 +2658,15 @@ Scope* OperatorWithKernel::PrepareData(
auto& ins_vector = iter->second; auto& ins_vector = iter->second;
bool should_skip_input = bool should_skip_input =
no_buffer_ins && no_buffer_ins->count(input_name) > 0; no_buffer_ins && no_buffer_ins->count(input_name) > 0;
phi::TensorArgDef in_def = input_defs.at(i);
#ifdef PADDLE_WITH_CUSTOM_DEVICE
// When the backend of input tensor arg_def is CUSTOM, we need to set it
// to the actual backend by expected_kernel_key.
if (in_def.backend == phi::Backend::CUSTOM) {
in_def.SetBackend(expected_kernel_key.backend());
}
#endif
prepare_input_data(input_name, &ins_vector, &in_def, should_skip_input); prepare_input_data(input_name, &ins_vector, &in_def, should_skip_input);
} }
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
......
...@@ -78,14 +78,31 @@ OpSupportedInfos(const std::string& place, ...@@ -78,14 +78,31 @@ OpSupportedInfos(const std::string& place,
} }
} }
#ifdef PADDLE_WITH_CUSTOM_DEVICE
auto is_custom_place = [&](const std::string& place) {
return is_target_place.count(place) && place != "CPU" && place != "GPU" &&
place != "XPU";
};
#endif
auto phi_kernels = phi::KernelFactory::Instance().kernels(); auto phi_kernels = phi::KernelFactory::Instance().kernels();
for (auto& kernel_pair : phi_kernels) { for (auto& kernel_pair : phi_kernels) {
auto op_type = phi::TransToFluidOpName(kernel_pair.first); auto op_type = phi::TransToFluidOpName(kernel_pair.first);
for (auto& info_pair : kernel_pair.second) { for (auto& info_pair : kernel_pair.second) {
framework::OpKernelType kernel_type = if (dtype != framework::TransToProtoVarType(info_pair.first.dtype()) ||
framework::TransPhiKernelKeyToOpKernelType(info_pair.first); all_ops.count(op_type) == 0) {
if (is_target_place[query_place](kernel_type.place_) && continue;
kernel_type.data_type_ == dtype && all_ops.count(op_type)) { }
#ifdef PADDLE_WITH_CUSTOM_DEVICE
if (info_pair.first.backend() == phi::Backend::CUSTOM) {
if (is_custom_place(query_place)) {
VLOG(4) << op_type << " " << supported_ops.size();
supported_ops.emplace(op_type);
}
continue;
}
#endif
if (is_target_place[query_place](
phi::TransToPhiPlace(info_pair.first.backend(), false))) {
VLOG(4) << op_type << " " << supported_ops.size(); VLOG(4) << op_type << " " << supported_ops.size();
supported_ops.emplace(op_type); supported_ops.emplace(op_type);
} }
......
...@@ -273,13 +273,16 @@ PD_REGISTER_GENERAL_KERNEL( ...@@ -273,13 +273,16 @@ PD_REGISTER_GENERAL_KERNEL(
ALL_DTYPE) {} ALL_DTYPE) {}
#endif #endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE #ifdef PADDLE_WITH_CUSTOM_DEVICE
namespace paddle { PD_REGISTER_GENERAL_KERNEL(
namespace operators { feed_dense_tensor,
template void FeedDenseTensorKernel<phi::CustomContext>( Custom,
const phi::CustomContext& dev_ctx, ALL_LAYOUT,
const phi::ExtendedTensor& x, paddle::operators::FeedDenseTensorKernel<phi::CustomContext>,
int col, ALL_DTYPE) {}
phi::DenseTensor* out); PD_REGISTER_GENERAL_KERNEL(
} // namespace operators feed_strings,
} // namespace paddle Custom,
ALL_LAYOUT,
paddle::operators::FeedStringsKernel<phi::CustomContext>,
ALL_DTYPE) {}
#endif #endif
...@@ -87,11 +87,6 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { ...@@ -87,11 +87,6 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) {
LoadCombineOpKernel<paddle::platform::CustomDeviceContext, int8_t>, LoadCombineOpKernel<paddle::platform::CustomDeviceContext, int8_t>,
paddle::operators:: paddle::operators::
LoadCombineOpKernel<paddle::platform::CustomDeviceContext, int64_t>); LoadCombineOpKernel<paddle::platform::CustomDeviceContext, int64_t>);
REGISTER_CUSTOM_DEVICE_GENERAL_KERNEL(
feed_dense_tensor,
device_type,
ALL_LAYOUT,
paddle::operators::FeedDenseTensorKernel<phi::CustomContext>);
#endif #endif
} }
......
...@@ -62,6 +62,19 @@ class TransformFlag { ...@@ -62,6 +62,19 @@ class TransformFlag {
bool trans_layout_ = true; bool trans_layout_ = true;
}; };
static inline phi::TensorArgDef GetKernelInputArgDef(
const phi::TensorArgDef& input_def, phi::Backend kernel_backend) {
phi::TensorArgDef input_actual_def = input_def;
#ifdef PADDLE_WITH_CUSTOM_DEVICE
// When the backend of input tensor arg_def is CUSTOM, we need to set it to
// the actual backend by expected_kernel_key.
if (input_actual_def.backend == phi::Backend::CUSTOM) {
input_actual_def.SetBackend(kernel_backend);
}
#endif
return input_actual_def;
}
std::shared_ptr<phi::DenseTensor> PrepareData( std::shared_ptr<phi::DenseTensor> PrepareData(
const Tensor& input, const Tensor& input,
const phi::TensorArgDef& target_args_def, const phi::TensorArgDef& target_args_def,
......
...@@ -701,7 +701,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d ...@@ -701,7 +701,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
input_tensor_code = ( input_tensor_code = (
input_tensor_code input_tensor_code
+ f""" + f"""
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = PrepareData({input_name}, kernel.InputAt({kernel_param.index(input_name)}), {trans_flag});""" {code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = PrepareData({input_name}, GetKernelInputArgDef(kernel.InputAt({kernel_param.index(input_name)}), kernel_backend), {trans_flag});"""
) )
return input_tensor_code return input_tensor_code
...@@ -722,7 +722,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d ...@@ -722,7 +722,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
input_tensor_code = ( input_tensor_code = (
input_tensor_code input_tensor_code
+ f""" + f"""
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = PrepareDataForSelectedRows({input_name}, kernel.InputAt({kernel_param.index(input_name)}), {trans_flag}); {code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = PrepareDataForSelectedRows({input_name}, GetKernelInputArgDef(kernel.InputAt({kernel_param.index(input_name)}), kernel_backend), {trans_flag});
""" """
) )
return input_tensor_code return input_tensor_code
...@@ -753,7 +753,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d ...@@ -753,7 +753,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
input_tensor_code = ( input_tensor_code = (
input_tensor_code input_tensor_code
+ f""" + f"""
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_vec = PrepareData({input_name}, kernel.InputAt({kernel_param.index(input_name)}), {trans_flag}); {code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_vec = PrepareData({input_name}, GetKernelInputArgDef(kernel.InputAt({kernel_param.index(input_name)}), kernel_backend), {trans_flag});
{code_indent} paddle::optional<std::vector<const phi::DenseTensor*>> {PREFIX_TENSOR_NAME}{input_name}; {code_indent} paddle::optional<std::vector<const phi::DenseTensor*>> {PREFIX_TENSOR_NAME}{input_name};
{code_indent} if ({PREFIX_TENSOR_NAME}{input_name}_vec){{ {code_indent} if ({PREFIX_TENSOR_NAME}{input_name}_vec){{
{code_indent} {PREFIX_TENSOR_NAME}{input_name} = paddle::optional<std::vector<const phi::DenseTensor*>>({PREFIX_TENSOR_NAME}{input_name}_vec->size()); {code_indent} {PREFIX_TENSOR_NAME}{input_name} = paddle::optional<std::vector<const phi::DenseTensor*>>({PREFIX_TENSOR_NAME}{input_name}_vec->size());
...@@ -791,7 +791,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d ...@@ -791,7 +791,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
input_tensor_code = ( input_tensor_code = (
input_tensor_code input_tensor_code
+ f""" + f"""
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_vec = PrepareData({input_name}, kernel.InputAt({kernel_param.index(input_name)}), {trans_flag}); {code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_vec = PrepareData({input_name}, GetKernelInputArgDef(kernel.InputAt({kernel_param.index(input_name)}), kernel_backend), {trans_flag});
{code_indent} std::vector<const phi::DenseTensor*> {PREFIX_TENSOR_NAME}{input_name}({PREFIX_TENSOR_NAME}{input_name}_vec->size()); {code_indent} std::vector<const phi::DenseTensor*> {PREFIX_TENSOR_NAME}{input_name}({PREFIX_TENSOR_NAME}{input_name}_vec->size());
{code_indent} for (size_t i = 0; i < {PREFIX_TENSOR_NAME}{input_name}.size(); ++i) {{ {code_indent} for (size_t i = 0; i < {PREFIX_TENSOR_NAME}{input_name}.size(); ++i) {{
{code_indent} {PREFIX_TENSOR_NAME}{input_name}[i] = &{PREFIX_TENSOR_NAME}{input_name}_vec->at(i); {code_indent} {PREFIX_TENSOR_NAME}{input_name}[i] = &{PREFIX_TENSOR_NAME}{input_name}_vec->at(i);
......
...@@ -134,6 +134,9 @@ inline std::ostream& operator<<(std::ostream& os, Backend backend) { ...@@ -134,6 +134,9 @@ inline std::ostream& operator<<(std::ostream& os, Backend backend) {
case Backend::IPU: case Backend::IPU:
os << "IPU"; os << "IPU";
break; break;
case Backend::CUSTOM:
os << "CUSTOM";
break;
default: { default: {
size_t device_type_id_ = static_cast<size_t>(backend) - size_t device_type_id_ = static_cast<size_t>(backend) -
static_cast<size_t>(Backend::NUM_BACKENDS); static_cast<size_t>(Backend::NUM_BACKENDS);
...@@ -181,6 +184,8 @@ inline Backend StringToBackend(const char* backend_cstr) { ...@@ -181,6 +184,8 @@ inline Backend StringToBackend(const char* backend_cstr) {
#endif #endif
} else if (s == std::string("IPU")) { } else if (s == std::string("IPU")) {
return Backend::IPU; return Backend::IPU;
} else if (s == std::string("Custom")) {
return Backend::CUSTOM;
} else { } else {
return static_cast<Backend>(static_cast<size_t>(Backend::NUM_BACKENDS) + return static_cast<Backend>(static_cast<size_t>(Backend::NUM_BACKENDS) +
phi::CustomRegisteredDeviceMap::Instance() phi::CustomRegisteredDeviceMap::Instance()
......
...@@ -99,6 +99,8 @@ phi::Place TransToPhiPlace(const Backend& backend, bool set_device_id) { ...@@ -99,6 +99,8 @@ phi::Place TransToPhiPlace(const Backend& backend, bool set_device_id) {
return phi::CustomPlace( return phi::CustomPlace(
device_type, device_type,
set_device_id ? phi::DeviceManager::GetDevice(device_type) : 0); set_device_id ? phi::DeviceManager::GetDevice(device_type) : 0);
} else if (backend == Backend::CUSTOM) {
return phi::CustomPlace();
} }
#endif #endif
PADDLE_THROW(phi::errors::Unimplemented( PADDLE_THROW(phi::errors::Unimplemented(
......
...@@ -93,6 +93,14 @@ const Kernel& KernelFactory::SelectKernel(const std::string& kernel_name, ...@@ -93,6 +93,14 @@ const Kernel& KernelFactory::SelectKernel(const std::string& kernel_name,
kernel_key.backend(), phi::DataLayout::ALL_LAYOUT, kernel_key.dtype()); kernel_key.backend(), phi::DataLayout::ALL_LAYOUT, kernel_key.dtype());
kernel_iter = iter->second.find(any_layout_kernel_key); kernel_iter = iter->second.find(any_layout_kernel_key);
} }
#if defined(PADDLE_WITH_CUSTOM_DEVICE)
if (kernel_iter == iter->second.end() &&
kernel_key.backend() > phi::Backend::NUM_BACKENDS) {
kernel_iter = iter->second.find({phi::Backend::CUSTOM,
phi::DataLayout::ALL_LAYOUT,
kernel_key.dtype()});
}
#endif
if (kernel_iter == iter->second.end()) { if (kernel_iter == iter->second.end()) {
return empty_kernel; return empty_kernel;
...@@ -220,6 +228,12 @@ KernelResult KernelFactory::SelectKernelOrThrowError( ...@@ -220,6 +228,12 @@ KernelResult KernelFactory::SelectKernelOrThrowError(
!phi::backends::xpu::is_xpu_support_op(TransToFluidOpName(kernel_name), !phi::backends::xpu::is_xpu_support_op(TransToFluidOpName(kernel_name),
kernel_key.dtype()) kernel_key.dtype())
#elif defined(PADDLE_WITH_CUSTOM_DEVICE) #elif defined(PADDLE_WITH_CUSTOM_DEVICE)
if (kernel_iter == iter->second.end() &&
kernel_key.backend() > phi::Backend::NUM_BACKENDS) {
kernel_iter = iter->second.find({phi::Backend::CUSTOM,
phi::DataLayout::ALL_LAYOUT,
kernel_key.dtype()});
}
if (FLAGS_enable_api_kernel_fallback && if (FLAGS_enable_api_kernel_fallback &&
(kernel_iter == iter->second.end() || (kernel_iter == iter->second.end() ||
phi::backends::custom_device::is_in_custom_black_list( phi::backends::custom_device::is_in_custom_black_list(
......
...@@ -61,15 +61,15 @@ struct KernelArgsParseFunctor<Return_ (*)(Args_...)> { ...@@ -61,15 +61,15 @@ struct KernelArgsParseFunctor<Return_ (*)(Args_...)> {
|| arg_type == std::type_index(typeid(const OneDNNContext&)) || arg_type == std::type_index(typeid(const OneDNNContext&))
#endif #endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
|| arg_type == std::type_index(typeid(const GPUContext&))) { || arg_type == std::type_index(typeid(const GPUContext&))
#elif defined(PADDLE_WITH_XPU) && !defined(PADDLE_WITH_XPU_KP) #elif defined(PADDLE_WITH_XPU) && !defined(PADDLE_WITH_XPU_KP)
|| arg_type == std::type_index(typeid(const XPUContext&))) { || arg_type == std::type_index(typeid(const XPUContext&))
#elif defined(PADDLE_WITH_XPU) && defined(PADDLE_WITH_XPU_KP) #elif defined(PADDLE_WITH_XPU) && defined(PADDLE_WITH_XPU_KP)
|| arg_type == std::type_index(typeid(const KPSContext&))) { || arg_type == std::type_index(typeid(const KPSContext&))
#elif defined(PADDLE_WITH_CUSTOM_DEVICE) #endif
#if defined(PADDLE_WITH_CUSTOM_DEVICE)
|| arg_type == std::type_index(typeid(const CustomContext&))) { || arg_type == std::type_index(typeid(const CustomContext&))) {
#else #else
) { ) {
#endif #endif
// do nothing, skip context arg now // do nothing, skip context arg now
......
...@@ -142,3 +142,34 @@ PD_REGISTER_KERNEL(empty_like, ...@@ -142,3 +142,34 @@ PD_REGISTER_KERNEL(empty_like,
kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND); kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND);
} }
#endif #endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
PD_REGISTER_KERNEL(empty,
Custom,
ALL_LAYOUT,
phi::EmptyKernel,
float,
double,
int8_t,
uint8_t,
int16_t,
int,
int64_t,
bool,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(empty_like,
Custom,
ALL_LAYOUT,
phi::EmptyLikeKernel,
float,
double,
int8_t,
uint8_t,
int16_t,
int,
int64_t,
bool,
phi::dtype::float16) {
kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND);
}
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册