From 7e4ed848378f076c7e9e61be3115d335859a9e81 Mon Sep 17 00:00:00 2001 From: ronnywang <524019753@qq.com> Date: Fri, 18 Feb 2022 13:02:04 +0800 Subject: [PATCH] [CustomDevice]Improved custom device initialization (#39634) --- paddle/fluid/framework/custom_kernel.cc | 53 ++----------------- paddle/fluid/framework/custom_kernel.h | 5 +- .../platform/device/custom/custom_device.cc | 42 ++++++++------- .../device/custom/custom_device_test.cc | 4 +- .../fluid/platform/device/device_manager.cc | 14 +---- paddle/fluid/platform/device/device_manager.h | 10 ++-- paddle/fluid/platform/init.cc | 42 ++++++++++----- paddle/pten/api/ext/op_kernel_info.h | 4 -- paddle/pten/api/lib/op_kernel_info.cc | 6 --- 9 files changed, 65 insertions(+), 115 deletions(-) diff --git a/paddle/fluid/framework/custom_kernel.cc b/paddle/fluid/framework/custom_kernel.cc index 6bcae738cc..9f0cf4b301 100644 --- a/paddle/fluid/framework/custom_kernel.cc +++ b/paddle/fluid/framework/custom_kernel.cc @@ -354,25 +354,15 @@ void RegisterKernelWithMetaInfoMap( } } -void LoadCustomKernelLib(const std::string& dso_lib_path) { +void LoadCustomKernelLib(const std::string& dso_lib_path, void* dso_handle) { #ifdef _LINUX - void* dso_handle = nullptr; - int dynload_flags = RTLD_NOW | RTLD_LOCAL; - dso_handle = dlopen(dso_lib_path.c_str(), dynload_flags); - - // MUST valid dso_lib_path - PADDLE_ENFORCE_NOT_NULL( - dso_handle, - platform::errors::InvalidArgument( - "Fail to open library: %s with error: %s", dso_lib_path, dlerror())); - typedef OpKernelInfoMap& get_op_kernel_info_map_t(); auto* func = reinterpret_cast( dlsym(dso_handle, "PD_GetOpKernelInfoMap")); if (func == nullptr) { - LOG(INFO) << "Skipped lib [" << dso_lib_path << "]: fail to find " - << "PD_GetOpKernelInfoMap symbol in this lib."; + LOG(WARNING) << "Skipped lib [" << dso_lib_path << "]: fail to find " + << "PD_GetOpKernelInfoMap symbol in this lib."; return; } auto& op_kernel_info_map = func(); @@ -384,42 +374,5 @@ void LoadCustomKernelLib(const std::string& dso_lib_path) { return; } -// List all libs with given path -std::vector ListAllLib(const std::string& libs_path) { - DIR* dir = nullptr; - dir = opendir(libs_path.c_str()); - - // MUST valid libs_path - PADDLE_ENFORCE_NOT_NULL(dir, platform::errors::InvalidArgument( - "Fail to open path: %s", libs_path)); - - dirent* ptr = nullptr; - std::vector libs; - std::regex express(".*\\.so"); - std::match_results results; - while ((ptr = readdir(dir)) != nullptr) { - std::string filename(ptr->d_name); - if (std::regex_match(filename.begin(), filename.end(), results, express)) { - libs.emplace_back(libs_path + '/' + filename); - LOG(INFO) << "Found lib [" << filename << "]"; - } else { - VLOG(3) << "Skipped file [" << filename << "] without .so postfix"; - } - } - closedir(dir); - return libs; -} - -// Load custom kernels with given path -void LoadCustomKernel(const std::string& libs_path) { - VLOG(3) << "Try loading custom libs from: [" << libs_path << "]"; - std::vector libs = ListAllLib(libs_path); - for (auto& lib_path : libs) { - LoadCustomKernelLib(lib_path); - } - LOG(INFO) << "Finished in LoadCustomKernel with libs_path: [" << libs_path - << "]"; -} - } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/custom_kernel.h b/paddle/fluid/framework/custom_kernel.h index 0c12bdfa8c..981ef1f464 100644 --- a/paddle/fluid/framework/custom_kernel.h +++ b/paddle/fluid/framework/custom_kernel.h @@ -19,10 +19,7 @@ limitations under the License. */ namespace paddle { namespace framework { -// Load custom kernel lib from giwen path -void LoadCustomKernel(const std::string& libs_path); - -void LoadCustomKernelLib(const std::string& dso_lib_path); +void LoadCustomKernelLib(const std::string& dso_lib_path, void* dso_handle); // Load custom kernel api: register kernel after user compiled void LoadOpKernelInfoAndRegister(const std::string& dso_name); diff --git a/paddle/fluid/platform/device/custom/custom_device.cc b/paddle/fluid/platform/device/custom/custom_device.cc index c5b98d3e22..09f0421a87 100644 --- a/paddle/fluid/platform/device/custom/custom_device.cc +++ b/paddle/fluid/platform/device/custom/custom_device.cc @@ -621,28 +621,26 @@ bool ValidCustomCustomRuntimeParams(const CustomRuntimeParams* params) { typedef bool (*RegisterDevicePluginFn)(CustomRuntimeParams* runtime_params); -bool LoadCustomRuntimeLib(const CustomRuntimeParams& runtime_params, +void LoadCustomRuntimeLib(const CustomRuntimeParams& runtime_params, std::unique_ptr device_interface, - void* dso_handle) { + const std::string& dso_lib_path, void* dso_handle) { if (ValidCustomCustomRuntimeParams(&runtime_params)) { auto device = std::make_unique(runtime_params.device_type, 255, true, std::move(device_interface), dso_handle); if (false == DeviceManager::Register(std::move(device))) { - LOG(WARNING) << "Skip this library. Register failed!!! there may be a " + LOG(WARNING) << "Skipped lib [" << dso_lib_path + << "]. Register failed!!! there may be a " "Custom Runtime with the same name."; - return false; } } else { - LOG(WARNING) - << "Skip this library. Wrong parameters!!! please check the version " - "compatibility between PaddlePaddle and Custom Runtime."; - return false; + LOG(WARNING) << "Skipped lib [" << dso_lib_path + << "]. Wrong parameters!!! please check the version " + "compatibility between PaddlePaddle and Custom Runtime."; } - return true; } -bool LoadCustomRuntimeLib(void* dso_handle) { +void LoadCustomRuntimeLib(const std::string& dso_lib_path, void* dso_handle) { CustomRuntimeParams runtime_params; std::memset(&runtime_params, 0, sizeof(CustomRuntimeParams)); runtime_params.size = sizeof(CustomRuntimeParams); @@ -653,19 +651,23 @@ bool LoadCustomRuntimeLib(void* dso_handle) { RegisterDevicePluginFn init_plugin_fn = reinterpret_cast(dlsym(dso_handle, "InitPlugin")); - if (!init_plugin_fn) { - LOG(WARNING) << "Skip this library. InitPlugin symbol not found."; - return false; + + if (init_plugin_fn == nullptr) { + LOG(WARNING) << "Skipped lib [" << dso_lib_path << "]: fail to find " + << "InitPlugin symbol in this lib."; + return; } + init_plugin_fn(&runtime_params); if (runtime_params.device_type == nullptr) { - LOG(WARNING) - << "Skip this library. InitPlugin failed!!! please check the version " - "compatibility between PaddlePaddle and Custom Runtime."; - return false; - } - return LoadCustomRuntimeLib(runtime_params, std::move(device_interface), - dso_handle); + LOG(WARNING) << "Skipped lib [" << dso_lib_path + << "]: InitPlugin failed, please check the version " + "compatibility between PaddlePaddle and Custom Runtime."; + return; + } + LoadCustomRuntimeLib(runtime_params, std::move(device_interface), + dso_lib_path, dso_handle); + LOG(INFO) << "Successed in loading custom runtime in lib: " << dso_lib_path; } } // namespace platform diff --git a/paddle/fluid/platform/device/custom/custom_device_test.cc b/paddle/fluid/platform/device/custom/custom_device_test.cc index 6a874ea221..4fa60edb7d 100644 --- a/paddle/fluid/platform/device/custom/custom_device_test.cc +++ b/paddle/fluid/platform/device/custom/custom_device_test.cc @@ -30,8 +30,8 @@ void RegisterDevice() { runtime_params.interface->size = sizeof(C_DeviceInterface); InitFakeCPUDevice(&runtime_params); - EXPECT_TRUE(paddle::platform::LoadCustomRuntimeLib( - runtime_params, std::move(device_interface), nullptr)); + paddle::platform::LoadCustomRuntimeLib( + runtime_params, std::move(device_interface), "", nullptr); } void InitDevice() { diff --git a/paddle/fluid/platform/device/device_manager.cc b/paddle/fluid/platform/device/device_manager.cc index 38dcb721b1..fed97c0be0 100644 --- a/paddle/fluid/platform/device/device_manager.cc +++ b/paddle/fluid/platform/device/device_manager.cc @@ -389,15 +389,14 @@ std::vector ListAllLibraries(const std::string& library_dir) { dir = opendir(library_dir.c_str()); if (dir == nullptr) { - VLOG(4) << "open CustomDevice library_dir: " << library_dir << " failed"; + VLOG(4) << "Failed to open path: " << library_dir; } else { while ((ptr = readdir(dir)) != nullptr) { std::string filename(ptr->d_name); if (std::regex_match(filename.begin(), filename.end(), results, express)) { libraries.push_back(library_dir + '/' + filename); - VLOG(4) << "found CustomDevice library: " << libraries.back() - << std::endl; + VLOG(4) << "Found lib: " << libraries.back(); } } closedir(dir); @@ -406,15 +405,6 @@ std::vector ListAllLibraries(const std::string& library_dir) { return libraries; } -bool LoadCustomDevice(const std::string& library_dir) { - std::vector libs = ListAllLibraries(library_dir); - for (const auto& lib_path : libs) { - auto dso_handle = dlopen(lib_path.c_str(), RTLD_NOW); - LoadCustomRuntimeLib(dso_handle); - } - return true; -} - } // namespace platform } // namespace paddle #endif diff --git a/paddle/fluid/platform/device/device_manager.h b/paddle/fluid/platform/device/device_manager.h index ad910605d9..ac6a9ef915 100644 --- a/paddle/fluid/platform/device/device_manager.h +++ b/paddle/fluid/platform/device/device_manager.h @@ -162,13 +162,13 @@ class DeviceManager { device_map_; }; -bool LoadCustomRuntimeLib(void* dso_handle); +std::vector ListAllLibraries(const std::string& library_dir); -bool LoadCustomRuntimeLib(const CustomRuntimeParams& runtime_params, - std::unique_ptr device_interface, - void* dso_handle); +void LoadCustomRuntimeLib(const std::string& dso_lib_path, void* dso_handle); -bool LoadCustomDevice(const std::string& library_path); +void LoadCustomRuntimeLib(const CustomRuntimeParams& runtime_params, + std::unique_ptr device_interface, + const std::string& dso_lib_path, void* dso_handle); class Registrar { public: diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc index 5d0fccf9e9..71fd0d2014 100644 --- a/paddle/fluid/platform/init.cc +++ b/paddle/fluid/platform/init.cc @@ -141,6 +141,25 @@ void InitCupti() { } #endif +#ifdef PADDLE_WITH_CUSTOM_DEVICE +void LoadCustomDevice(const std::string &library_dir) { + LOG(INFO) << "Try loading custom device libs from: [" << library_dir << "]"; + std::vector libs = platform::ListAllLibraries(library_dir); + for (const auto &lib_path : libs) { + auto dso_handle = dlopen(lib_path.c_str(), RTLD_NOW); + PADDLE_ENFORCE_NOT_NULL( + dso_handle, + platform::errors::InvalidArgument( + "Fail to open library: %s with error: %s", lib_path, dlerror())); + + platform::LoadCustomRuntimeLib(lib_path, dso_handle); + framework::LoadCustomKernelLib(lib_path, dso_handle); + } + LOG(INFO) << "Finished in LoadCustomDevice with libs_path: [" << library_dir + << "]"; +} +#endif + void InitDevices() { // CUPTI attribute should be set before any CUDA context is created (see CUPTI // documentation about CUpti_ActivityAttribute). @@ -227,6 +246,7 @@ void InitDevices(const std::vector devices) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) places.emplace_back(platform::CUDAPinnedPlace()); #endif +#ifdef PADDLE_WITH_CUSTOM_DEVICE const char *custom_kernel_root_p = std::getenv("CUSTOM_DEVICE_ROOT"); if (!custom_kernel_root_p) { VLOG(3) << "Env [CUSTOM_DEVICE_ROOT] is not set."; @@ -234,24 +254,22 @@ void InitDevices(const std::vector devices) { std::string custom_kernel_root(custom_kernel_root_p); if (!custom_kernel_root.empty()) { LOG(INFO) << "ENV [CUSTOM_DEVICE_ROOT]=" << custom_kernel_root; - framework::LoadCustomKernel(custom_kernel_root); -#ifdef PADDLE_WITH_CUSTOM_DEVICE - if (platform::LoadCustomDevice(custom_kernel_root)) { - auto device_types = platform::DeviceManager::GetAllCustomDeviceTypes(); - for (auto &dev_type : device_types) { - VLOG(1) << "Device type: " << dev_type << ", visible devices count: " - << platform::DeviceManager::GetDeviceCount(dev_type); - for (size_t i = 0; - i < platform::DeviceManager::GetDeviceCount(dev_type); i++) { - places.push_back(platform::CustomPlace(dev_type, i)); - } + LoadCustomDevice(custom_kernel_root); + + auto device_types = platform::DeviceManager::GetAllCustomDeviceTypes(); + for (auto &dev_type : device_types) { + auto device_count = platform::DeviceManager::GetDeviceCount(dev_type); + LOG(INFO) << "CustomDevice: " << dev_type + << ", visible devices count: " << device_count; + for (size_t i = 0; i < device_count; i++) { + places.push_back(platform::CustomPlace(dev_type, i)); } } -#endif } else { VLOG(3) << "ENV [CUSTOM_DEVICE_ROOT] is empty."; } } +#endif platform::DeviceContextPool::Init(places); #ifndef PADDLE_WITH_MKLDNN diff --git a/paddle/pten/api/ext/op_kernel_info.h b/paddle/pten/api/ext/op_kernel_info.h index ebecfaf924..30e0c8e6d1 100644 --- a/paddle/pten/api/ext/op_kernel_info.h +++ b/paddle/pten/api/ext/op_kernel_info.h @@ -633,10 +633,6 @@ class PADDLE_API OpKernelInfoBuilder { // Call after PD_REGISTER_KERNEL(...) void RegisterAllCustomKernel(); -// Using this api to load compiled custom kernel's dynamic library and -// register custom kernels -void LoadCustomKernelLib(const std::string& dso_name); - //////////////// Custom kernel register macro ///////////////////// // Refer to paddle/pten/core/kernel_registry.h, we can not use // PT_REGISTER_KERNEL directly, common macros and functions are diff --git a/paddle/pten/api/lib/op_kernel_info.cc b/paddle/pten/api/lib/op_kernel_info.cc index db474d457c..f7d75e2a8b 100644 --- a/paddle/pten/api/lib/op_kernel_info.cc +++ b/paddle/pten/api/lib/op_kernel_info.cc @@ -92,12 +92,6 @@ void RegisterAllCustomKernel() { framework::RegisterKernelWithMetaInfoMap(op_kernel_info_map); } -// Using this api to load compiled custom kernel's dynamic library and -// register custom kernels -void LoadCustomKernelLib(const std::string& dso_name) { - framework::LoadCustomKernelLib(dso_name); -} - } // namespace paddle #ifdef __cplusplus -- GitLab