From 60c4c9cd91770cdbc3b92bacd0fe42c658a7195d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=98=8E=E5=86=AC?= <78149749+winter-wang@users.noreply.github.com> Date: Wed, 30 Mar 2022 10:23:42 +0800 Subject: [PATCH] [Infrt] add infer shape cache for kernel. (#41104) --- paddle/infrt/host_context/kernel_registry.cc | 28 +++++------ paddle/infrt/host_context/kernel_registry.h | 11 +++-- .../host_context/mlir_to_runtime_translate.cc | 6 +-- paddle/infrt/kernel/phi/CMakeLists.txt | 1 - .../phi/infershaped/phi_kernel_launcher.h | 34 +++++++------ paddle/infrt/kernel/phi/registry.cc | 49 +++++++++---------- paddle/infrt/kernel/tensor_kernels.cc | 8 +-- tools/infrt/get_phi_kernel_info.py | 2 +- 8 files changed, 70 insertions(+), 69 deletions(-) diff --git a/paddle/infrt/host_context/kernel_registry.cc b/paddle/infrt/host_context/kernel_registry.cc index 4209b2a964..5693e973a3 100644 --- a/paddle/infrt/host_context/kernel_registry.cc +++ b/paddle/infrt/host_context/kernel_registry.cc @@ -24,30 +24,30 @@ namespace host_context { struct KernelRegistry::Impl { std::unordered_map>> + std::pair>> data; }; KernelRegistry::KernelRegistry() : impl_(std::make_unique()) {} -void KernelRegistry::AddKernel(const std::string &key, - KernelImplementation fn) { - CHECK(!impl_->data.count(key)) << "kernel [" << key - << "] is registered twice"; - impl_->data.emplace( - key, std::make_pair(std::move(fn), std::vector{})); -} - const std::vector &KernelRegistry::GetAttrNameList( const std::string &key) const { CHECK(impl_->data.count(key)); return impl_->data[key].second; } -void KernelRegistry::AddKernelWithAttrs( - const std::string &key, - KernelImplementation fn, - std::vector &&attr_order) { +void KernelRegistry::AddKernel(const std::string &key, + KernelImplementation fn, + const std::vector &attr_order) { + CHECK(!impl_->data.count(key)) << "kernel [" << key + << "] is registered twice"; + impl_->data.emplace( + key, std::make_pair([fn]() { return fn; }, std::move(attr_order))); +} + +void KernelRegistry::AddKernel(const std::string &key, + KernelLauncher fn, + const std::vector &attr_order) { CHECK(!impl_->data.count(key)) << "kernel [" << key << "] is registered twice"; impl_->data.emplace(key, @@ -56,7 +56,7 @@ void KernelRegistry::AddKernelWithAttrs( KernelImplementation KernelRegistry::GetKernel(const std::string &key) const { auto it = impl_->data.find(key); - return it != impl_->data.end() ? it->second.first : KernelImplementation{}; + return it != impl_->data.end() ? it->second.first() : KernelImplementation{}; } std::vector KernelRegistry::GetKernelList() const { diff --git a/paddle/infrt/host_context/kernel_registry.h b/paddle/infrt/host_context/kernel_registry.h index a146b2b3c4..a9f2b407bd 100644 --- a/paddle/infrt/host_context/kernel_registry.h +++ b/paddle/infrt/host_context/kernel_registry.h @@ -25,6 +25,7 @@ namespace host_context { class KernelFrame; using KernelImplementation = std::function; +using KernelLauncher = std::function; /** * Hold the kernels registered in the system. @@ -33,10 +34,12 @@ class KernelRegistry { public: KernelRegistry(); - void AddKernel(const std::string &key, KernelImplementation fn); - void AddKernelWithAttrs(const std::string &key, - KernelImplementation fn, - std::vector &&attrs_order); + void AddKernel(const std::string &key, + KernelImplementation fn, + const std::vector &attrs_order = {}); + void AddKernel(const std::string &key, + KernelLauncher fn, + const std::vector &attrs_order = {}); KernelImplementation GetKernel(const std::string &key) const; const std::vector &GetAttrNameList( diff --git a/paddle/infrt/host_context/mlir_to_runtime_translate.cc b/paddle/infrt/host_context/mlir_to_runtime_translate.cc index 007730151e..05bb28b7c5 100644 --- a/paddle/infrt/host_context/mlir_to_runtime_translate.cc +++ b/paddle/infrt/host_context/mlir_to_runtime_translate.cc @@ -360,8 +360,7 @@ bool MlirToRuntimeTranslator::EmitGeneralOp( if (attrs.size()) { if (attr_names.empty()) { LOG(WARNING) << "The kernel `" << kernel_name - << "` has not been registered with " - "`KernelRegistry::AddKernelWithAttrs()`."; + << "` has not been registered with attributes order "; } else { CHECK_EQ(attr_names.size(), attrs.size()) << "The number of kernel `" << kernel_name @@ -380,8 +379,7 @@ bool MlirToRuntimeTranslator::EmitGeneralOp( } } LOG(WARNING) << "The attribute `" << attr << "` of kernel `" << kernel_name - << "` is not properly registered with " - "`KernelRegistry::AddKernelWithAttrs()`."; + << "` is not properly register"; return -1; }; diff --git a/paddle/infrt/kernel/phi/CMakeLists.txt b/paddle/infrt/kernel/phi/CMakeLists.txt index 50f61c7ba6..22a59ab2fa 100644 --- a/paddle/infrt/kernel/phi/CMakeLists.txt +++ b/paddle/infrt/kernel/phi/CMakeLists.txt @@ -29,7 +29,6 @@ add_custom_target(infrt_register_phi_kernel cc_library(infrt_naive SRCS infershaped/infershaped_kernel_launcher.cc infershaped/infershaped_kernel_launchers.cc DEPS phi wrapped_infermeta) -add_dependencies(infrt_naive infrt_register_phi_kernel) cc_test_tiny(test_infrt_infershape_launchers SRCS infershaped/infershape_launchers_test.cc DEPS infrt) diff --git a/paddle/infrt/kernel/phi/infershaped/phi_kernel_launcher.h b/paddle/infrt/kernel/phi/infershaped/phi_kernel_launcher.h index 34ef4460fc..d870278472 100644 --- a/paddle/infrt/kernel/phi/infershaped/phi_kernel_launcher.h +++ b/paddle/infrt/kernel/phi/infershaped/phi_kernel_launcher.h @@ -17,6 +17,7 @@ #include #include "paddle/infrt/backends/host/phi_context.h" +#include "paddle/infrt/host_context/kernel_registry.h" #include "paddle/infrt/host_context/kernel_utils.h" #include "paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launcher.h" #include "paddle/infrt/kernel/phi/infershaped/infershaped_utils.h" @@ -36,31 +37,36 @@ template -void KernelLauncherFunc(host_context::KernelFrame* frame) { +::infrt::host_context::KernelImplementation KernelLauncherFunc() { InferShapedKernelLauncher launcher(FuncArgStatics::arg_size); static const uint16_t num_input_tensors{InferShapeHelper::count}; static const bool turn_on_infer_shape_cache{true}; + return [=](host_context::KernelFrame* frame) mutable { #ifndef NDEBUG - LOG(INFO) << "Kernel.frame: " << frame->DumpArgTypes(); + LOG(INFO) << "Kernel.frame: " << frame->DumpArgTypes(); #endif - // Build the infershape KernelFrame if needed. - // TODO(Superjomn) add unlikely here. - if (launcher.infershape_kernel_frame_builder.IsEmpty()) { - launcher.CreateKernelFrameForInferShape(frame); + // Build the infershape KernelFrame if needed. + // TODO(Superjomn) add unlikely here. + if (launcher.infershape_kernel_frame_builder.IsEmpty()) { + launcher.CreateKernelFrameForInferShape(frame); #ifndef NDEBUG - LOG(INFO) << "infershape.frame: " - << launcher.infershape_kernel_frame_builder.DumpArgTypes(); + LOG(INFO) << "infershape.frame: " + << launcher.infershape_kernel_frame_builder.DumpArgTypes(); #endif - } - if (turn_on_infer_shape_cache) { - if (launcher.IsShapeChanged(num_input_tensors)) { + } + if (turn_on_infer_shape_cache) { + if (launcher.IsShapeChanged(num_input_tensors)) { + ::infrt::host_context::KernelImpl::Invoke( + &launcher.infershape_kernel_frame_builder); + launcher.BuildInferShapeCache(num_input_tensors); + } + } else { ::infrt::host_context::KernelImpl::Invoke( &launcher.infershape_kernel_frame_builder); - launcher.BuildInferShapeCache(num_input_tensors); } - } - ::infrt::host_context::KernelImpl::Invoke(frame); + ::infrt::host_context::KernelImpl::Invoke(frame); + }; } } // namespace kernel diff --git a/paddle/infrt/kernel/phi/registry.cc b/paddle/infrt/kernel/phi/registry.cc index 0427a2c1e5..0477881125 100644 --- a/paddle/infrt/kernel/phi/registry.cc +++ b/paddle/infrt/kernel/phi/registry.cc @@ -34,45 +34,40 @@ namespace kernel { void RegisterPhiKernels(host_context::KernelRegistry* registry) { registry->AddKernel("phi_dt.create_context.cpu", INFRT_KERNEL(infrt::kernel::phi::CreateCPUContext)); - registry->AddKernelWithAttrs( - "phi_dt.create_dense_tensor.cpu", - INFRT_KERNEL(infrt::kernel::phi::CreateDenseTensor), - {"dims", "lod", "layout", "precision"}); + registry->AddKernel("phi_dt.create_dense_tensor.cpu", + INFRT_KERNEL(infrt::kernel::phi::CreateDenseTensor), + {"dims", "lod", "layout", "precision"}); - registry->AddKernelWithAttrs( + registry->AddKernel( "phi_dt.create_inited_dense_tensor.cpu.f32", INFRT_KERNEL(infrt::kernel::phi::CreateInitedDenseTensorF32), {"dims", "lod", "layout", "value"}); - registry->AddKernelWithAttrs( - "phi_dt.fill_dense_tensor.f32", - INFRT_KERNEL(infrt::kernel::phi::FillDenseTensorF32), - {"value"}); + registry->AddKernel("phi_dt.fill_dense_tensor.f32", + INFRT_KERNEL(infrt::kernel::phi::FillDenseTensorF32), + {"value"}); registry->AddKernel("phi_dt.print_tensor", INFRT_KERNEL(infrt::kernel::phi::PrintDenseTensor)); #ifdef INFRT_WITH_GPU registry->AddKernel("phi_dt.create_context.gpu", INFRT_KERNEL(infrt::kernel::phi::CreateGPUContext)); - registry->AddKernelWithAttrs( - "phi_dt.create_dense_tensor.gpu", - INFRT_KERNEL(infrt::kernel::phi::CreateGPUDenseTensor), - {"dims", "lod", "layout", "precision"}); - registry->AddKernelWithAttrs("phi_dt.memcpy.gpu", - INFRT_KERNEL(infrt::kernel::phi::GpuMemCpy), - {"d2h"}); + registry->AddKernel("phi_dt.create_dense_tensor.gpu", + INFRT_KERNEL(infrt::kernel::phi::CreateGPUDenseTensor), + {"dims", "lod", "layout", "precision"}); + registry->AddKernel("phi_dt.memcpy.gpu", + INFRT_KERNEL(infrt::kernel::phi::GpuMemCpy), + {"d2h"}); #endif - registry->AddKernelWithAttrs("phi_dt.load_params", - INFRT_KERNEL(infrt::kernel::phi::LoadParams), - {"path"}); - registry->AddKernelWithAttrs( - "phi_dt.load_combined_params", - INFRT_KERNEL(infrt::kernel::phi::LoadCombinedParams), - {"model_path", "params_path"}); - registry->AddKernelWithAttrs( - "phi_dt.tensor_map_get_tensor", - INFRT_KERNEL(infrt::kernel::phi::TensorMapGetTensor), - {"name"}); + registry->AddKernel("phi_dt.load_params", + INFRT_KERNEL(infrt::kernel::phi::LoadParams), + {"path"}); + registry->AddKernel("phi_dt.load_combined_params", + INFRT_KERNEL(infrt::kernel::phi::LoadCombinedParams), + {"model_path", "params_path"}); + registry->AddKernel("phi_dt.tensor_map_get_tensor", + INFRT_KERNEL(infrt::kernel::phi::TensorMapGetTensor), + {"name"}); registry->AddKernel("phi_dt.tensor_map_get_size", INFRT_KERNEL(infrt::kernel::phi::TensorMapGetSize)); } diff --git a/paddle/infrt/kernel/tensor_kernels.cc b/paddle/infrt/kernel/tensor_kernels.cc index 407ae16c19..65e137472b 100644 --- a/paddle/infrt/kernel/tensor_kernels.cc +++ b/paddle/infrt/kernel/tensor_kernels.cc @@ -129,9 +129,9 @@ void NaiveMatmul(const DenseHostTensor &x, /// ===== Kernel end ==== void RegisterTensorKernels(host_context::KernelRegistry *registry) { - registry->AddKernelWithAttrs("dt.create_uninit_tensor.f32", - INFRT_KERNEL(CreateUninitTensor), - {"shape"}); + registry->AddKernel("dt.create_uninit_tensor.f32", + INFRT_KERNEL(CreateUninitTensor), + {"shape"}); registry->AddKernel("dt.print_tensor", INFRT_KERNEL(PrintTensor)); registry->AddKernel("dt.fill_tensor_with_constant.f32", INFRT_KERNEL(FillTensorWithConstant)); @@ -146,7 +146,7 @@ void RegisterTensorKernels(host_context::KernelRegistry *registry) { // TensorList related methods. #ifdef INFRT_WITH_PHI - registry->AddKernelWithAttrs( + registry->AddKernel( "dt.tensor_list_get_tensor", INFRT_KERNEL(TensorListGetTensor), {"id"}); registry->AddKernel("dt.tensor_list_get_size", INFRT_KERNEL(TensorListGetSize)); diff --git a/tools/infrt/get_phi_kernel_info.py b/tools/infrt/get_phi_kernel_info.py index 3fb40706e2..c4c02d67cf 100644 --- a/tools/infrt/get_phi_kernel_info.py +++ b/tools/infrt/get_phi_kernel_info.py @@ -287,7 +287,7 @@ def gen_register_code_info(item: List[str], attr_data: Dict[str, List[str]]): attr_names = ', '.join( ["\"" + a + "\"" for a in attr_data[ir_name]]) res += f""" -registry->AddKernelWithAttrs("{ir_name}",""" +registry->AddKernel("{ir_name}",""" res += f""" &KernelLauncherFunc