From c39aa18e0d3fe4eddd72ff1d07839655a8af8dbb Mon Sep 17 00:00:00 2001 From: Aganlengzi Date: Tue, 8 Mar 2022 10:48:33 +0800 Subject: [PATCH] [custom kernel]Upgrade support for multiple libs (#40223) * [custom kernel]Upgade support for multi libs * upgrade phi_custom_kernel deps --- paddle/fluid/framework/CMakeLists.txt | 2 +- paddle/fluid/inference/api/CMakeLists.txt | 2 +- paddle/fluid/platform/CMakeLists.txt | 2 +- paddle/fluid/platform/init.cc | 2 +- paddle/phi/core/CMakeLists.txt | 2 +- paddle/phi/core/custom_kernel.cc | 71 ++++++++------------- paddle/phi/core/custom_kernel.h | 14 ++-- paddle/phi/core/kernel_registry.h | 3 +- paddle/phi/kernels/CMakeLists.txt | 2 +- paddle/phi/kernels/sparse/CMakeLists.txt | 2 +- paddle/phi/tests/core/CMakeLists.txt | 2 +- paddle/phi/tests/core/test_custom_kernel.cc | 4 +- paddle/testing/CMakeLists.txt | 2 +- 13 files changed, 45 insertions(+), 65 deletions(-) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index e486799495c..aa92a3b2226 100755 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -443,7 +443,7 @@ cc_library(custom_operator SRCS custom_operator.cc DEPS tensor attribute framewo #cc_binary(test_executor SRCS test_executor.cc DEPS executor op_registry ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} ) #cc_binary(new_executor SRCS new_exec_test.cc DEPS operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} profiler) -set(FLUID_FRAMEWORK_MODULES proto_desc memory lod_tensor executor data_feed_proto layer dynamic_loader custom_operator phi_custom_kernel) +set(FLUID_FRAMEWORK_MODULES proto_desc memory lod_tensor executor data_feed_proto layer dynamic_loader custom_operator) cc_library(paddle_framework DEPS ${FLUID_FRAMEWORK_MODULES}) diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index 6eeb5d64253..1f83e606c3f 100755 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -31,7 +31,7 @@ cc_library(paddle_infer_contrib SRCS paddle_infer_contrib.cc DEPS zero_copy_tens cc_library(paddle_pass_builder SRCS paddle_pass_builder.cc) set(paddle_inference_api_deps lod_tensor scope reset_tensor_array - analysis_config paddle_infer_contrib zero_copy_tensor trainer_desc_proto custom_operator phi_custom_kernel) + analysis_config paddle_infer_contrib zero_copy_tensor trainer_desc_proto custom_operator) if(WITH_CRYPTO) list(APPEND paddle_inference_api_deps paddle_crypto) diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 5a47443fd0b..04c8a329e5e 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -117,7 +117,7 @@ endif() cc_library(cudnn_workspace_helper SRCS cudnn_workspace_helper.cc DEPS boost) # seperate init from device_context to avoid cycle dependencies -cc_library(init SRCS init.cc DEPS device_context phi_custom_kernel) +cc_library(init SRCS init.cc DEPS device_context custom_kernel) # memcpy depends on device_context, here add deps individually for # avoiding cycle dependencies diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc index cf85dede8e8..293a71dbd96 100644 --- a/paddle/fluid/platform/init.cc +++ b/paddle/fluid/platform/init.cc @@ -154,8 +154,8 @@ void LoadCustomDevice(const std::string &library_dir) { "Fail to open library: %s with error: %s", lib_path, dlerror())); phi::LoadCustomRuntimeLib(lib_path, dso_handle); - phi::LoadCustomKernelLib(lib_path, dso_handle); } + phi::CustomKernelMap::Instance().RegisterCustomKernels(); LOG(INFO) << "Finished in LoadCustomDevice with libs_path: [" << library_dir << "]"; } diff --git a/paddle/phi/core/CMakeLists.txt b/paddle/phi/core/CMakeLists.txt index 424c4ce2ebc..b4a6b54d0fe 100644 --- a/paddle/phi/core/CMakeLists.txt +++ b/paddle/phi/core/CMakeLists.txt @@ -25,7 +25,7 @@ cc_library(infermeta_utils SRCS infermeta_utils.cc DEPS meta_tensor) cc_library(selected_rows SRCS selected_rows_impl.cc DEPS dense_tensor phi_enforce ddim memcpy) cc_library(phi_device_context SRCS device_context.cc DEPS dense_tensor selected_rows) -cc_library(phi_custom_kernel SRCS custom_kernel.cc DEPS kernel_factory convert_utils op_registry phi_tensor_raw) +cc_library(custom_kernel SRCS custom_kernel.cc DEPS kernel_factory) # Will remove once we implemented MKLDNN_Tensor if(WITH_MKLDNN) diff --git a/paddle/phi/core/custom_kernel.cc b/paddle/phi/core/custom_kernel.cc index a333874d03e..bc317da8d98 100644 --- a/paddle/phi/core/custom_kernel.cc +++ b/paddle/phi/core/custom_kernel.cc @@ -12,21 +12,29 @@ // See the License for the specific language governing permissions and // limitations under the License. -#if defined _WIN32 || defined __APPLE__ -#else -#define _LINUX -#endif - #include "paddle/phi/core/custom_kernel.h" namespace phi { -void RegisterCustomKernels(const CustomKernelMap& custom_kernel_map) { - auto& kernel_info_map = custom_kernel_map.GetMap(); - VLOG(3) << "Size of custom_kernel_map: " << kernel_info_map.size(); +void CustomKernelMap::RegisterCustomKernel(const std::string& name, + const KernelKey& key, + const Kernel& kernel) { + PADDLE_ENFORCE_EQ(kernels_[name].find(key), + kernels_[name].end(), + phi::errors::AlreadyExists( + "The custom kernel [%s:%s] has been already existed in " + "CustomKernelMap, please check if any duplicate kernel " + "info in your lib(s) before load again.", + name, + key)); + kernels_[name][key] = kernel; +} + +void CustomKernelMap::RegisterCustomKernels() { + VLOG(3) << "Size of custom_kernel_map: " << kernels_.size(); auto& kernels = KernelFactory::Instance().kernels(); - for (auto& pair : kernel_info_map) { + for (auto& pair : kernels_) { PADDLE_ENFORCE_NE( kernels.find(pair.first), kernels.end(), @@ -38,8 +46,8 @@ void RegisterCustomKernels(const CustomKernelMap& custom_kernel_map) { PADDLE_ENFORCE_EQ( kernels[pair.first].find(info_pair.first), kernels[pair.first].end(), - phi::errors::InvalidArgument( - "The operator <%s>'s kernel: %s has been already existed " + phi::errors::AlreadyExists( + "The kernel [%s:%s] has been already existed " "in Paddle, please contribute PR if it is necessary " "to optimize the kernel code. Custom kernel does NOT support " "to replace existing kernel in Paddle.", @@ -48,43 +56,14 @@ void RegisterCustomKernels(const CustomKernelMap& custom_kernel_map) { kernels[pair.first][info_pair.first] = info_pair.second; - VLOG(3) << "Successed in registering operator <" << pair.first - << ">'s kernel: " << info_pair.first - << " to Paddle. It will be used like native ones."; + VLOG(3) << "Successed in registering kernel [" << pair.first << ":" + << info_pair.first + << "] to Paddle. It will be used like native ones."; } + kernels_[pair.first].clear(); } + LOG(INFO) << "Successed in loading custom kernels."; + kernels_.clear(); } -void LoadCustomKernelLib(const std::string& dso_lib_path, void* dso_handle) { -#ifdef _LINUX - typedef phi::CustomKernelMap& get_custom_kernel_map_t(); - auto* func = reinterpret_cast( - dlsym(dso_handle, "PD_GetCustomKernelMap")); - - if (func == nullptr) { - LOG(WARNING) << "Skipped lib [" << dso_lib_path << "]: fail to find " - << "PD_GetCustomKernelMap symbol in this lib."; - return; - } - auto& custom_kernel_map = func(); - phi::RegisterCustomKernels(custom_kernel_map); - LOG(INFO) << "Successed in loading custom kernels in lib: " << dso_lib_path; -#else - VLOG(3) << "Unsupported: Custom kernel is only implemented on Linux."; -#endif - return; -} } // namespace phi - -#ifdef __cplusplus -extern "C" { -#endif - -// C-API to get global CustomKernelMap. -phi::CustomKernelMap& PD_GetCustomKernelMap() { - return phi::CustomKernelMap::Instance(); -} - -#ifdef __cplusplus -} // end extern "C" -#endif diff --git a/paddle/phi/core/custom_kernel.h b/paddle/phi/core/custom_kernel.h index ffd12b9dd03..5ba14de6a61 100644 --- a/paddle/phi/core/custom_kernel.h +++ b/paddle/phi/core/custom_kernel.h @@ -29,6 +29,12 @@ class CustomKernelMap { return g_custom_kernel_info_map; } + void RegisterCustomKernel(const std::string& kernel_name, + const KernelKey& kernel_key, + const Kernel& kernel); + + void RegisterCustomKernels(); + KernelNameMap& Kernels() { return kernels_; } const KernelNameMap& GetMap() const { return kernels_; } @@ -40,12 +46,4 @@ class CustomKernelMap { KernelNameMap kernels_; }; -/** - * Note: - * Used to register custom kernels to KernelFactory. - */ -void RegisterCustomKernels(const CustomKernelMap& custom_kernel_map); - -// Load custom kernel lib and register -void LoadCustomKernelLib(const std::string& dso_lib_path, void* dso_handle); } // namespace phi diff --git a/paddle/phi/core/kernel_registry.h b/paddle/phi/core/kernel_registry.h index 6a0c7bbc9b7..d9ed68593cd 100644 --- a/paddle/phi/core/kernel_registry.h +++ b/paddle/phi/core/kernel_registry.h @@ -210,7 +210,8 @@ struct KernelRegistrar { if (reg_type == RegType::INNER) { KernelFactory::Instance().kernels()[kernel_name][kernel_key] = kernel; } else { - CustomKernelMap::Instance().Kernels()[kernel_name][kernel_key] = kernel; + CustomKernelMap::Instance().RegisterCustomKernel( + kernel_name, kernel_key, kernel); } } }; diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt index 16fae8d879c..58ea231beef 100644 --- a/paddle/phi/kernels/CMakeLists.txt +++ b/paddle/phi/kernels/CMakeLists.txt @@ -10,7 +10,7 @@ add_subdirectory(funcs) set_property(GLOBAL PROPERTY PHI_KERNELS "") # [ 1. Common kernel compilation dependencies ] -set(COMMON_KERNEL_DEPS dense_tensor sparse_coo_tensor sparse_csr_tensor kernel_context kernel_factory arg_map_context convert_utils lod_utils) +set(COMMON_KERNEL_DEPS dense_tensor sparse_coo_tensor sparse_csr_tensor kernel_context kernel_factory arg_map_context convert_utils lod_utils custom_kernel) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function blas math_function im2col vol2col concat_and_split_functor softmax) # remove this dep after removing fluid deps on tensor creation set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} phi_api_utils) diff --git a/paddle/phi/kernels/sparse/CMakeLists.txt b/paddle/phi/kernels/sparse/CMakeLists.txt index a319e9a13c3..eaea6d95216 100644 --- a/paddle/phi/kernels/sparse/CMakeLists.txt +++ b/paddle/phi/kernels/sparse/CMakeLists.txt @@ -1,3 +1,3 @@ -set(SPARSE_KERNEL_DEPS dense_tensor sparse_coo_tensor sparse_csr_tensor kernel_context kernel_factory arg_map_context convert_utils lod_utils math_function) +set(SPARSE_KERNEL_DEPS dense_tensor sparse_coo_tensor sparse_csr_tensor kernel_context kernel_factory arg_map_context convert_utils lod_utils math_function custom_kernel) register_kernels(DEPS ${SPARSE_KERNEL_DEPS} SUB_DIR "sparse_kernel") diff --git a/paddle/phi/tests/core/CMakeLists.txt b/paddle/phi/tests/core/CMakeLists.txt index 5356bac9fbd..de9bd7a4d47 100644 --- a/paddle/phi/tests/core/CMakeLists.txt +++ b/paddle/phi/tests/core/CMakeLists.txt @@ -1,4 +1,4 @@ -cc_test(test_custom_kernel SRCS test_custom_kernel.cc DEPS phi_custom_kernel) +cc_test(test_custom_kernel SRCS test_custom_kernel.cc DEPS custom_kernel) cc_test(test_dense_tensor SRCS test_dense_tensor.cc DEPS dense_tensor) cc_test(test_intrusive_ptr SRCS test_intrusive_ptr.cc) cc_test(test_type_info SRCS test_type_info.cc) diff --git a/paddle/phi/tests/core/test_custom_kernel.cc b/paddle/phi/tests/core/test_custom_kernel.cc index a4e89231e14..6fe34a6891a 100644 --- a/paddle/phi/tests/core/test_custom_kernel.cc +++ b/paddle/phi/tests/core/test_custom_kernel.cc @@ -172,7 +172,9 @@ TEST(CustomKernel, custom_kernel_dot) { fake_dot_kernels.end()); // register - phi::RegisterCustomKernels(phi::CustomKernelMap::Instance()); + phi::CustomKernelMap::Instance().RegisterCustomKernels(); + + EXPECT_EQ(0, static_cast(custom_fake_dot_kernels.size())); EXPECT_TRUE(fake_dot_kernels.find( phi::KernelKey(backend, layout, phi::DataType::FLOAT32)) != diff --git a/paddle/testing/CMakeLists.txt b/paddle/testing/CMakeLists.txt index eace7c41f4a..0cc68bf3161 100644 --- a/paddle/testing/CMakeLists.txt +++ b/paddle/testing/CMakeLists.txt @@ -1,5 +1,5 @@ # for paddle test case if(WITH_TESTING) - cc_library(paddle_gtest_main SRCS paddle_gtest_main.cc DEPS init device_context memory gtest gflags) + cc_library(paddle_gtest_main SRCS paddle_gtest_main.cc DEPS init device_context memory gtest gflags proto_desc) endif() -- GitLab