未验证 提交 2d7e7759 编写于 作者: C Chen Weihang 提交者: GitHub

[Cherry-pick] Fix Cudnn lib load problem & polish install error message (#25706)

* Add default cudnn lib path (#25175)

* add default cudnn lib path, test=develop

* change default path in func, test=develop

* move to linux branch, test=develop

* fix var error in other plat, test=develop

* Refactor dynamic dso search functions (#25214)

* refactor dynamic dso search func, test=develop

* polish details, test=develop

* polish detail based review comments, test=develop

* revert string type change, test=develop

* Polish install error hint message (#25531)

* polish install error hint msg, test=develop

* fix variable error, test=develop

* polish hint messgae again
上级 30119490
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <memory> #include <memory>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include <string> #include <string>
#include <vector>
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "glog/logging.h" #include "glog/logging.h"
...@@ -59,6 +60,9 @@ struct PathNode { ...@@ -59,6 +60,9 @@ struct PathNode {
static constexpr char cupti_lib_path[] = CUPTI_LIB_PATH; static constexpr char cupti_lib_path[] = CUPTI_LIB_PATH;
// NOTE: In order to adapt to the default installation path of cuda on linux
static constexpr char linux_cudnn_lib_path[] = "/usr/local/cuda/lib64";
static PathNode s_py_site_pkg_path; static PathNode s_py_site_pkg_path;
#if defined(_WIN32) && defined(PADDLE_WITH_CUDA) #if defined(_WIN32) && defined(PADDLE_WITH_CUDA)
...@@ -89,14 +93,29 @@ void SetPaddleLibPath(const std::string& py_site_pkg_path) { ...@@ -89,14 +93,29 @@ void SetPaddleLibPath(const std::string& py_site_pkg_path) {
VLOG(3) << "Set paddle lib path : " << py_site_pkg_path; VLOG(3) << "Set paddle lib path : " << py_site_pkg_path;
} }
static inline void* GetDsoHandleFromSpecificPath(const std::string& spec_path,
const std::string& dso_name,
int dynload_flags) {
void* dso_handle = nullptr;
if (!spec_path.empty()) {
// search xxx.so from custom path
VLOG(3) << "Try to find library: " << dso_name
<< " from specific path: " << spec_path;
std::string dso_path = join(spec_path, dso_name);
dso_handle = dlopen(dso_path.c_str(), dynload_flags);
}
return dso_handle;
}
static inline void* GetDsoHandleFromDefaultPath(const std::string& dso_path, static inline void* GetDsoHandleFromDefaultPath(const std::string& dso_path,
int dynload_flags) { int dynload_flags) {
VLOG(3) << "Try to find library: " << dso_path
<< " from default system path.";
// default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH // default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH
// and /usr/local/lib path // and /usr/local/lib path
void* dso_handle = dlopen(dso_path.c_str(), dynload_flags); void* dso_handle = dlopen(dso_path.c_str(), dynload_flags);
VLOG(3) << "Try to find library: " << dso_path
<< " from default system path.";
// TODO(chenweihang): This path is used to search which libs?
// DYLD_LIBRARY_PATH is disabled after Mac OS 10.11 to // DYLD_LIBRARY_PATH is disabled after Mac OS 10.11 to
// bring System Integrity Projection (SIP), if dso_handle // bring System Integrity Projection (SIP), if dso_handle
// is null, search from default package path in Mac OS. // is null, search from default package path in Mac OS.
...@@ -104,80 +123,79 @@ static inline void* GetDsoHandleFromDefaultPath(const std::string& dso_path, ...@@ -104,80 +123,79 @@ static inline void* GetDsoHandleFromDefaultPath(const std::string& dso_path,
if (nullptr == dso_handle) { if (nullptr == dso_handle) {
dso_handle = dso_handle =
dlopen(join("/usr/local/cuda/lib/", dso_path).c_str(), dynload_flags); dlopen(join("/usr/local/cuda/lib/", dso_path).c_str(), dynload_flags);
if (nullptr == dso_handle) {
if (dso_path == "libcudnn.dylib") {
LOG(WARNING) << "Note: [Recommend] copy cudnn into /usr/local/cuda/ \n "
"For instance, sudo tar -xzf "
"cudnn-7.5-osx-x64-v5.0-ga.tgz -C /usr/local \n sudo "
"chmod a+r /usr/local/cuda/include/cudnn.h "
"/usr/local/cuda/lib/libcudnn*";
}
}
} }
#endif #endif
if (nullptr == dso_handle) {
LOG(WARNING) << "Can not find library: " << dso_path
<< ". The process maybe hang. Please try to add the lib path "
"to LD_LIBRARY_PATH.";
}
return dso_handle; return dso_handle;
} }
static inline void* GetDsoHandleFromSearchPath(const std::string& search_root, /*
const std::string& dso_name, * We define three priorities for dynamic library search:
bool throw_on_error = true) { *
* First: Search for the path specified by the user
* Second: Search the system default path
* Third: Search for a special path corresponding to
* a specific library to adapt to changes and easy to expand.
*/
static inline void* GetDsoHandleFromSearchPath(
const std::string& config_path, const std::string& dso_name,
bool throw_on_error = true,
const std::vector<std::string>& extra_paths = std::vector<std::string>(),
const std::string& warning_msg = std::string()) {
#if !defined(_WIN32) #if !defined(_WIN32)
int dynload_flags = RTLD_LAZY | RTLD_LOCAL; int dynload_flags = RTLD_LAZY | RTLD_LOCAL;
#else #else
int dynload_flags = 0; int dynload_flags = 0;
#endif // !_WIN32 #endif // !_WIN32
void* dso_handle = nullptr; // 1. search in user config path by FLAGS
void* dso_handle =
GetDsoHandleFromSpecificPath(config_path, dso_name, dynload_flags);
// 2. search in system default path
if (nullptr == dso_handle) {
dso_handle = GetDsoHandleFromDefaultPath(dso_name, dynload_flags);
}
// 3. search in extra paths
if (nullptr == dso_handle) {
for (auto path : extra_paths) {
dso_handle = GetDsoHandleFromSpecificPath(path, dso_name, dynload_flags);
}
}
std::string dlPath = dso_name; // 4. [If Failed] logging warning if exists
if (search_root.empty()) { if (nullptr == dso_handle && !warning_msg.empty()) {
dso_handle = GetDsoHandleFromDefaultPath(dlPath, dynload_flags); LOG(WARNING) << warning_msg;
} else { }
// search xxx.so from custom path
dlPath = join(search_root, dso_name); // 5. [If Failed] logging or throw error info
dso_handle = dlopen(dlPath.c_str(), dynload_flags); if (nullptr == dso_handle) {
auto error_msg =
"The third-party dynamic library (%s) that Paddle depends on is not "
"configured correctly. (error code is %s)\n"
" Suggestions:\n"
" 1. Check if the third-party dynamic library (e.g. CUDA, CUDNN) "
"is installed correctly and its version is matched with paddlepaddle "
"you installed.\n"
" 2. Configure third-party dynamic library environment variables as "
"follows:\n"
" - Linux: set LD_LIBRARY_PATH by `export LD_LIBRARY_PATH=...`\n"
" - Windows: set PATH by `set PATH=XXX;%PATH%`\n"
" - Mac: set DYLD_LIBRARY_PATH by `export DYLD_LIBRARY_PATH=...` "
"[Note: After Mac OS 10.11, using the DYLD_LIBRARY_PATH is "
"impossible unless System Integrity Protection (SIP) is disabled.]";
#if !defined(_WIN32) #if !defined(_WIN32)
auto errorno = dlerror(); auto errorno = dlerror();
#else #else
auto errorno = GetLastError(); auto errorno = GetLastError();
#endif // !_WIN32 #endif // !_WIN32
// if not found, search from default path if (throw_on_error) {
if (nullptr == dso_handle) { // NOTE: Special error report case, no need to change its format
LOG(WARNING) << "Failed to find dynamic library: " << dlPath << " (" PADDLE_THROW(
<< errorno << ")"; platform::errors::PreconditionNotMet(error_msg, dso_name, errorno));
if (dlPath.find("nccl") != std::string::npos) { } else {
std::cout LOG(WARNING) << string::Sprintf(error_msg, dso_name, errorno);
<< "You may need to install 'nccl2' from NVIDIA official website: "
<< "https://developer.nvidia.com/nccl/nccl-download"
<< "before install PaddlePaddle" << std::endl;
}
dlPath = dso_name;
dso_handle = GetDsoHandleFromDefaultPath(dlPath, dynload_flags);
} }
} }
auto error_msg =
"Failed to find dynamic library: %s ( %s ) \n Please specify "
"its path correctly using following ways: \n Method. set "
"environment variable LD_LIBRARY_PATH on Linux or "
"DYLD_LIBRARY_PATH on Mac OS. \n For instance, issue command: "
"export LD_LIBRARY_PATH=... \n Note: After Mac OS 10.11, "
"using the DYLD_LIBRARY_PATH is impossible unless System "
"Integrity Protection (SIP) is disabled.";
#if !defined(_WIN32)
auto errorno = dlerror();
#else
auto errorno = GetLastError();
#endif // !_WIN32
if (throw_on_error) {
PADDLE_ENFORCE(nullptr != dso_handle, error_msg, dlPath, errorno);
} else if (nullptr == dso_handle) {
LOG(WARNING) << string::Sprintf(error_msg, dlPath, errorno);
}
return dso_handle; return dso_handle;
} }
...@@ -194,23 +212,29 @@ void* GetCublasDsoHandle() { ...@@ -194,23 +212,29 @@ void* GetCublasDsoHandle() {
void* GetCUDNNDsoHandle() { void* GetCUDNNDsoHandle() {
#if defined(__APPLE__) || defined(__OSX__) #if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.dylib", false); std::string mac_warn_meg(
"Note: [Recommend] copy cudnn into /usr/local/cuda/ \n "
"For instance, sudo tar -xzf "
"cudnn-7.5-osx-x64-v5.0-ga.tgz -C /usr/local \n sudo "
"chmod a+r /usr/local/cuda/include/cudnn.h "
"/usr/local/cuda/lib/libcudnn*");
return GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.dylib", false,
{}, mac_warn_meg);
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA) #elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
return GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, win_cudnn_lib); return GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, win_cudnn_lib);
#else #else
return GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.so", false); return GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.so", false,
{linux_cudnn_lib_path});
#endif #endif
} }
void* GetCUPTIDsoHandle() { void* GetCUPTIDsoHandle() {
std::string cupti_path = cupti_lib_path;
if (!FLAGS_cupti_dir.empty()) {
cupti_path = FLAGS_cupti_dir;
}
#if defined(__APPLE__) || defined(__OSX__) #if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(cupti_path, "libcupti.dylib", false); return GetDsoHandleFromSearchPath(FLAGS_cupti_dir, "libcupti.dylib", false,
{cupti_lib_path});
#else #else
return GetDsoHandleFromSearchPath(cupti_path, "libcupti.so", false); return GetDsoHandleFromSearchPath(FLAGS_cupti_dir, "libcupti.so", false,
{cupti_lib_path});
#endif #endif
} }
...@@ -255,10 +279,16 @@ void* GetWarpCTCDsoHandle() { ...@@ -255,10 +279,16 @@ void* GetWarpCTCDsoHandle() {
} }
void* GetNCCLDsoHandle() { void* GetNCCLDsoHandle() {
std::string warning_msg(
"You may need to install 'nccl2' from NVIDIA official website: "
"https://developer.nvidia.com/nccl/nccl-download"
"before install PaddlePaddle.");
#if defined(__APPLE__) || defined(__OSX__) #if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.dylib"); return GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.dylib", true, {},
warning_msg);
#else #else
return GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.so"); return GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.so", true, {},
warning_msg);
#endif #endif
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册