From 782454bd1d31762994307a340f8d0a9503fea802 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Thu, 9 Mar 2023 14:16:33 +0800 Subject: [PATCH] [PHI] Register custom kernel for all type of custom device (#51262) * register custom kernel for all type of custom device * fix bug * fix GetKernelInputArgDef * fix amp bug * fix TransToPhiPlace * adapt interpreter_util --- .../new_executor/interpreter/data_transfer.cc | 10 +++++- .../interpreter/interpreter_util.cc | 4 ++- paddle/fluid/framework/operator.cc | 10 +++++- paddle/fluid/imperative/amp_auto_cast.cc | 25 ++++++++++++--- paddle/fluid/operators/controlflow/feed_op.cc | 21 +++++++------ .../custom_device_common_op_registry.cc | 5 --- paddle/phi/api/lib/data_transform.h | 13 ++++++++ paddle/phi/api/yaml/generator/api_base.py | 8 ++--- paddle/phi/common/backend.h | 5 +++ paddle/phi/core/compat/convert_utils.cc | 2 ++ paddle/phi/core/kernel_factory.cc | 14 +++++++++ paddle/phi/core/kernel_registry.h | 10 +++--- paddle/phi/kernels/empty_kernel.cc | 31 +++++++++++++++++++ 13 files changed, 128 insertions(+), 30 deletions(-) diff --git a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc index b31b81db9f8..6e866cdf8c5 100644 --- a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc +++ b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc @@ -665,9 +665,17 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key, bool should_skip_input = no_buffer_ins && no_buffer_ins->count(parameter_name) > 0; + phi::TensorArgDef in_def = input_defs.at(i); +#ifdef PADDLE_WITH_CUSTOM_DEVICE + // When the backend of input tensor arg_def is CUSTOM, we need to set it + // to the actual backend by expected_kernel_key. + if (in_def.backend == phi::Backend::CUSTOM) { + in_def.SetBackend(phi::TransToPhiBackend(expected_kernel_key.place_)); + } +#endif apply_data_transform_for_one_parameter(parameter_name, new_ins[parameter_name], - &input_defs.at(i), + &in_def, should_skip_input, &arguments); } diff --git a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc index 27fdcefc730..fd5f4cfcf88 100644 --- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc +++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc @@ -1090,7 +1090,9 @@ void FakeInitializeOutputsForFunctionKernel( if (out_tensor && !out_tensor->initialized()) { phi::TensorArgDef& tensor_arg_def = output_defs[start_idx + offset]; phi::DataType dtype = tensor_arg_def.dtype; - phi::Place place = phi::TransToPhiPlace(tensor_arg_def.backend); + phi::Place place = tensor_arg_def.backend == phi::Backend::CUSTOM + ? dev_ctx.GetPlace() + : phi::TransToPhiPlace(tensor_arg_def.backend); if (dtype == DataType::UNDEFINED || OpsNeedSetOutputDtypeWhenRegisterPhiKernel.count( diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index d662e7e8d83..2a6a25928ac 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -2650,7 +2650,6 @@ Scope* OperatorWithKernel::PrepareData( input_names.size(), input_defs.size())); for (size_t i = 0; i < input_defs.size(); ++i) { - auto& in_def = input_defs.at(i); std::string input_name = input_names[i]; auto iter = ctx->inputs.find(input_name); if (iter == ctx->inputs.end()) { @@ -2659,6 +2658,15 @@ Scope* OperatorWithKernel::PrepareData( auto& ins_vector = iter->second; bool should_skip_input = no_buffer_ins && no_buffer_ins->count(input_name) > 0; + + phi::TensorArgDef in_def = input_defs.at(i); +#ifdef PADDLE_WITH_CUSTOM_DEVICE + // When the backend of input tensor arg_def is CUSTOM, we need to set it + // to the actual backend by expected_kernel_key. + if (in_def.backend == phi::Backend::CUSTOM) { + in_def.SetBackend(expected_kernel_key.backend()); + } +#endif prepare_input_data(input_name, &ins_vector, &in_def, should_skip_input); } #ifdef PADDLE_WITH_MKLDNN diff --git a/paddle/fluid/imperative/amp_auto_cast.cc b/paddle/fluid/imperative/amp_auto_cast.cc index bf428ddf9b7..40e3b12cc4b 100644 --- a/paddle/fluid/imperative/amp_auto_cast.cc +++ b/paddle/fluid/imperative/amp_auto_cast.cc @@ -78,14 +78,31 @@ OpSupportedInfos(const std::string& place, } } +#ifdef PADDLE_WITH_CUSTOM_DEVICE + auto is_custom_place = [&](const std::string& place) { + return is_target_place.count(place) && place != "CPU" && place != "GPU" && + place != "XPU"; + }; +#endif auto phi_kernels = phi::KernelFactory::Instance().kernels(); for (auto& kernel_pair : phi_kernels) { auto op_type = phi::TransToFluidOpName(kernel_pair.first); for (auto& info_pair : kernel_pair.second) { - framework::OpKernelType kernel_type = - framework::TransPhiKernelKeyToOpKernelType(info_pair.first); - if (is_target_place[query_place](kernel_type.place_) && - kernel_type.data_type_ == dtype && all_ops.count(op_type)) { + if (dtype != framework::TransToProtoVarType(info_pair.first.dtype()) || + all_ops.count(op_type) == 0) { + continue; + } +#ifdef PADDLE_WITH_CUSTOM_DEVICE + if (info_pair.first.backend() == phi::Backend::CUSTOM) { + if (is_custom_place(query_place)) { + VLOG(4) << op_type << " " << supported_ops.size(); + supported_ops.emplace(op_type); + } + continue; + } +#endif + if (is_target_place[query_place]( + phi::TransToPhiPlace(info_pair.first.backend(), false))) { VLOG(4) << op_type << " " << supported_ops.size(); supported_ops.emplace(op_type); } diff --git a/paddle/fluid/operators/controlflow/feed_op.cc b/paddle/fluid/operators/controlflow/feed_op.cc index ef53bf5b261..4352d58047e 100644 --- a/paddle/fluid/operators/controlflow/feed_op.cc +++ b/paddle/fluid/operators/controlflow/feed_op.cc @@ -273,13 +273,16 @@ PD_REGISTER_GENERAL_KERNEL( ALL_DTYPE) {} #endif #ifdef PADDLE_WITH_CUSTOM_DEVICE -namespace paddle { -namespace operators { -template void FeedDenseTensorKernel( - const phi::CustomContext& dev_ctx, - const phi::ExtendedTensor& x, - int col, - phi::DenseTensor* out); -} // namespace operators -} // namespace paddle +PD_REGISTER_GENERAL_KERNEL( + feed_dense_tensor, + Custom, + ALL_LAYOUT, + paddle::operators::FeedDenseTensorKernel, + ALL_DTYPE) {} +PD_REGISTER_GENERAL_KERNEL( + feed_strings, + Custom, + ALL_LAYOUT, + paddle::operators::FeedStringsKernel, + ALL_DTYPE) {} #endif diff --git a/paddle/fluid/operators/custom_device_common_op_registry.cc b/paddle/fluid/operators/custom_device_common_op_registry.cc index b921fd82166..e59b308d935 100644 --- a/paddle/fluid/operators/custom_device_common_op_registry.cc +++ b/paddle/fluid/operators/custom_device_common_op_registry.cc @@ -87,11 +87,6 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { LoadCombineOpKernel, paddle::operators:: LoadCombineOpKernel); - REGISTER_CUSTOM_DEVICE_GENERAL_KERNEL( - feed_dense_tensor, - device_type, - ALL_LAYOUT, - paddle::operators::FeedDenseTensorKernel); #endif } diff --git a/paddle/phi/api/lib/data_transform.h b/paddle/phi/api/lib/data_transform.h index 7a97bb01f61..c8d30a7df07 100644 --- a/paddle/phi/api/lib/data_transform.h +++ b/paddle/phi/api/lib/data_transform.h @@ -62,6 +62,19 @@ class TransformFlag { bool trans_layout_ = true; }; +static inline phi::TensorArgDef GetKernelInputArgDef( + const phi::TensorArgDef& input_def, phi::Backend kernel_backend) { + phi::TensorArgDef input_actual_def = input_def; +#ifdef PADDLE_WITH_CUSTOM_DEVICE + // When the backend of input tensor arg_def is CUSTOM, we need to set it to + // the actual backend by expected_kernel_key. + if (input_actual_def.backend == phi::Backend::CUSTOM) { + input_actual_def.SetBackend(kernel_backend); + } +#endif + return input_actual_def; +} + std::shared_ptr PrepareData( const Tensor& input, const phi::TensorArgDef& target_args_def, diff --git a/paddle/phi/api/yaml/generator/api_base.py b/paddle/phi/api/yaml/generator/api_base.py index 4b12fedc375..3f682ecfec1 100644 --- a/paddle/phi/api/yaml/generator/api_base.py +++ b/paddle/phi/api/yaml/generator/api_base.py @@ -701,7 +701,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d input_tensor_code = ( input_tensor_code + f""" -{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = PrepareData({input_name}, kernel.InputAt({kernel_param.index(input_name)}), {trans_flag});""" +{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = PrepareData({input_name}, GetKernelInputArgDef(kernel.InputAt({kernel_param.index(input_name)}), kernel_backend), {trans_flag});""" ) return input_tensor_code @@ -722,7 +722,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d input_tensor_code = ( input_tensor_code + f""" -{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = PrepareDataForSelectedRows({input_name}, kernel.InputAt({kernel_param.index(input_name)}), {trans_flag}); +{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = PrepareDataForSelectedRows({input_name}, GetKernelInputArgDef(kernel.InputAt({kernel_param.index(input_name)}), kernel_backend), {trans_flag}); """ ) return input_tensor_code @@ -753,7 +753,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d input_tensor_code = ( input_tensor_code + f""" -{code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_vec = PrepareData({input_name}, kernel.InputAt({kernel_param.index(input_name)}), {trans_flag}); +{code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_vec = PrepareData({input_name}, GetKernelInputArgDef(kernel.InputAt({kernel_param.index(input_name)}), kernel_backend), {trans_flag}); {code_indent} paddle::optional> {PREFIX_TENSOR_NAME}{input_name}; {code_indent} if ({PREFIX_TENSOR_NAME}{input_name}_vec){{ {code_indent} {PREFIX_TENSOR_NAME}{input_name} = paddle::optional>({PREFIX_TENSOR_NAME}{input_name}_vec->size()); @@ -791,7 +791,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d input_tensor_code = ( input_tensor_code + f""" -{code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_vec = PrepareData({input_name}, kernel.InputAt({kernel_param.index(input_name)}), {trans_flag}); +{code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_vec = PrepareData({input_name}, GetKernelInputArgDef(kernel.InputAt({kernel_param.index(input_name)}), kernel_backend), {trans_flag}); {code_indent} std::vector {PREFIX_TENSOR_NAME}{input_name}({PREFIX_TENSOR_NAME}{input_name}_vec->size()); {code_indent} for (size_t i = 0; i < {PREFIX_TENSOR_NAME}{input_name}.size(); ++i) {{ {code_indent} {PREFIX_TENSOR_NAME}{input_name}[i] = &{PREFIX_TENSOR_NAME}{input_name}_vec->at(i); diff --git a/paddle/phi/common/backend.h b/paddle/phi/common/backend.h index de3502e5e54..2cbcb1d0cc1 100644 --- a/paddle/phi/common/backend.h +++ b/paddle/phi/common/backend.h @@ -134,6 +134,9 @@ inline std::ostream& operator<<(std::ostream& os, Backend backend) { case Backend::IPU: os << "IPU"; break; + case Backend::CUSTOM: + os << "CUSTOM"; + break; default: { size_t device_type_id_ = static_cast(backend) - static_cast(Backend::NUM_BACKENDS); @@ -181,6 +184,8 @@ inline Backend StringToBackend(const char* backend_cstr) { #endif } else if (s == std::string("IPU")) { return Backend::IPU; + } else if (s == std::string("Custom")) { + return Backend::CUSTOM; } else { return static_cast(static_cast(Backend::NUM_BACKENDS) + phi::CustomRegisteredDeviceMap::Instance() diff --git a/paddle/phi/core/compat/convert_utils.cc b/paddle/phi/core/compat/convert_utils.cc index 149c62f1167..825bd65be86 100644 --- a/paddle/phi/core/compat/convert_utils.cc +++ b/paddle/phi/core/compat/convert_utils.cc @@ -99,6 +99,8 @@ phi::Place TransToPhiPlace(const Backend& backend, bool set_device_id) { return phi::CustomPlace( device_type, set_device_id ? phi::DeviceManager::GetDevice(device_type) : 0); + } else if (backend == Backend::CUSTOM) { + return phi::CustomPlace(); } #endif PADDLE_THROW(phi::errors::Unimplemented( diff --git a/paddle/phi/core/kernel_factory.cc b/paddle/phi/core/kernel_factory.cc index 6af7ac7b9b7..b199230da9f 100644 --- a/paddle/phi/core/kernel_factory.cc +++ b/paddle/phi/core/kernel_factory.cc @@ -93,6 +93,14 @@ const Kernel& KernelFactory::SelectKernel(const std::string& kernel_name, kernel_key.backend(), phi::DataLayout::ALL_LAYOUT, kernel_key.dtype()); kernel_iter = iter->second.find(any_layout_kernel_key); } +#if defined(PADDLE_WITH_CUSTOM_DEVICE) + if (kernel_iter == iter->second.end() && + kernel_key.backend() > phi::Backend::NUM_BACKENDS) { + kernel_iter = iter->second.find({phi::Backend::CUSTOM, + phi::DataLayout::ALL_LAYOUT, + kernel_key.dtype()}); + } +#endif if (kernel_iter == iter->second.end()) { return empty_kernel; @@ -220,6 +228,12 @@ KernelResult KernelFactory::SelectKernelOrThrowError( !phi::backends::xpu::is_xpu_support_op(TransToFluidOpName(kernel_name), kernel_key.dtype()) #elif defined(PADDLE_WITH_CUSTOM_DEVICE) + if (kernel_iter == iter->second.end() && + kernel_key.backend() > phi::Backend::NUM_BACKENDS) { + kernel_iter = iter->second.find({phi::Backend::CUSTOM, + phi::DataLayout::ALL_LAYOUT, + kernel_key.dtype()}); + } if (FLAGS_enable_api_kernel_fallback && (kernel_iter == iter->second.end() || phi::backends::custom_device::is_in_custom_black_list( diff --git a/paddle/phi/core/kernel_registry.h b/paddle/phi/core/kernel_registry.h index 4d984acd0c3..eb433f39f0b 100644 --- a/paddle/phi/core/kernel_registry.h +++ b/paddle/phi/core/kernel_registry.h @@ -61,15 +61,15 @@ struct KernelArgsParseFunctor { || arg_type == std::type_index(typeid(const OneDNNContext&)) #endif #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - || arg_type == std::type_index(typeid(const GPUContext&))) { + || arg_type == std::type_index(typeid(const GPUContext&)) #elif defined(PADDLE_WITH_XPU) && !defined(PADDLE_WITH_XPU_KP) - || arg_type == std::type_index(typeid(const XPUContext&))) { + || arg_type == std::type_index(typeid(const XPUContext&)) #elif defined(PADDLE_WITH_XPU) && defined(PADDLE_WITH_XPU_KP) - || arg_type == std::type_index(typeid(const KPSContext&))) { -#elif defined(PADDLE_WITH_CUSTOM_DEVICE) + || arg_type == std::type_index(typeid(const KPSContext&)) +#endif +#if defined(PADDLE_WITH_CUSTOM_DEVICE) || arg_type == std::type_index(typeid(const CustomContext&))) { #else - ) { #endif // do nothing, skip context arg now diff --git a/paddle/phi/kernels/empty_kernel.cc b/paddle/phi/kernels/empty_kernel.cc index 6f7500b41f5..f2782a63c90 100644 --- a/paddle/phi/kernels/empty_kernel.cc +++ b/paddle/phi/kernels/empty_kernel.cc @@ -142,3 +142,34 @@ PD_REGISTER_KERNEL(empty_like, kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND); } #endif + +#ifdef PADDLE_WITH_CUSTOM_DEVICE +PD_REGISTER_KERNEL(empty, + Custom, + ALL_LAYOUT, + phi::EmptyKernel, + float, + double, + int8_t, + uint8_t, + int16_t, + int, + int64_t, + bool, + phi::dtype::float16) {} +PD_REGISTER_KERNEL(empty_like, + Custom, + ALL_LAYOUT, + phi::EmptyLikeKernel, + float, + double, + int8_t, + uint8_t, + int16_t, + int, + int64_t, + bool, + phi::dtype::float16) { + kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND); +} +#endif -- GitLab