未验证 提交 2d7e7759 编写于 作者: C Chen Weihang 提交者: GitHub

[Cherry-pick] Fix Cudnn lib load problem & polish install error message (#25706)

* Add default cudnn lib path (#25175)

* add default cudnn lib path, test=develop

* change default path in func, test=develop

* move to linux branch, test=develop

* fix var error in other plat, test=develop

* Refactor dynamic dso search functions (#25214)

* refactor dynamic dso search func, test=develop

* polish details, test=develop

* polish detail based review comments, test=develop

* revert string type change, test=develop

* Polish install error hint message (#25531)

* polish install error hint msg, test=develop

* fix variable error, test=develop

* polish hint messgae again
上级 30119490
......@@ -16,6 +16,7 @@ limitations under the License. */
#include <memory>
#include <mutex> // NOLINT
#include <string>
#include <vector>
#include "gflags/gflags.h"
#include "glog/logging.h"
......@@ -59,6 +60,9 @@ struct PathNode {
static constexpr char cupti_lib_path[] = CUPTI_LIB_PATH;
// NOTE: In order to adapt to the default installation path of cuda on linux
static constexpr char linux_cudnn_lib_path[] = "/usr/local/cuda/lib64";
static PathNode s_py_site_pkg_path;
#if defined(_WIN32) && defined(PADDLE_WITH_CUDA)
......@@ -89,14 +93,29 @@ void SetPaddleLibPath(const std::string& py_site_pkg_path) {
VLOG(3) << "Set paddle lib path : " << py_site_pkg_path;
}
static inline void* GetDsoHandleFromSpecificPath(const std::string& spec_path,
const std::string& dso_name,
int dynload_flags) {
void* dso_handle = nullptr;
if (!spec_path.empty()) {
// search xxx.so from custom path
VLOG(3) << "Try to find library: " << dso_name
<< " from specific path: " << spec_path;
std::string dso_path = join(spec_path, dso_name);
dso_handle = dlopen(dso_path.c_str(), dynload_flags);
}
return dso_handle;
}
static inline void* GetDsoHandleFromDefaultPath(const std::string& dso_path,
int dynload_flags) {
VLOG(3) << "Try to find library: " << dso_path
<< " from default system path.";
// default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH
// and /usr/local/lib path
void* dso_handle = dlopen(dso_path.c_str(), dynload_flags);
VLOG(3) << "Try to find library: " << dso_path
<< " from default system path.";
// TODO(chenweihang): This path is used to search which libs?
// DYLD_LIBRARY_PATH is disabled after Mac OS 10.11 to
// bring System Integrity Projection (SIP), if dso_handle
// is null, search from default package path in Mac OS.
......@@ -104,80 +123,79 @@ static inline void* GetDsoHandleFromDefaultPath(const std::string& dso_path,
if (nullptr == dso_handle) {
dso_handle =
dlopen(join("/usr/local/cuda/lib/", dso_path).c_str(), dynload_flags);
if (nullptr == dso_handle) {
if (dso_path == "libcudnn.dylib") {
LOG(WARNING) << "Note: [Recommend] copy cudnn into /usr/local/cuda/ \n "
"For instance, sudo tar -xzf "
"cudnn-7.5-osx-x64-v5.0-ga.tgz -C /usr/local \n sudo "
"chmod a+r /usr/local/cuda/include/cudnn.h "
"/usr/local/cuda/lib/libcudnn*";
}
}
}
#endif
if (nullptr == dso_handle) {
LOG(WARNING) << "Can not find library: " << dso_path
<< ". The process maybe hang. Please try to add the lib path "
"to LD_LIBRARY_PATH.";
}
return dso_handle;
}
static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
const std::string& dso_name,
bool throw_on_error = true) {
/*
* We define three priorities for dynamic library search:
*
* First: Search for the path specified by the user
* Second: Search the system default path
* Third: Search for a special path corresponding to
* a specific library to adapt to changes and easy to expand.
*/
static inline void* GetDsoHandleFromSearchPath(
const std::string& config_path, const std::string& dso_name,
bool throw_on_error = true,
const std::vector<std::string>& extra_paths = std::vector<std::string>(),
const std::string& warning_msg = std::string()) {
#if !defined(_WIN32)
int dynload_flags = RTLD_LAZY | RTLD_LOCAL;
#else
int dynload_flags = 0;
#endif // !_WIN32
void* dso_handle = nullptr;
// 1. search in user config path by FLAGS
void* dso_handle =
GetDsoHandleFromSpecificPath(config_path, dso_name, dynload_flags);
// 2. search in system default path
if (nullptr == dso_handle) {
dso_handle = GetDsoHandleFromDefaultPath(dso_name, dynload_flags);
}
// 3. search in extra paths
if (nullptr == dso_handle) {
for (auto path : extra_paths) {
dso_handle = GetDsoHandleFromSpecificPath(path, dso_name, dynload_flags);
}
}
std::string dlPath = dso_name;
if (search_root.empty()) {
dso_handle = GetDsoHandleFromDefaultPath(dlPath, dynload_flags);
} else {
// search xxx.so from custom path
dlPath = join(search_root, dso_name);
dso_handle = dlopen(dlPath.c_str(), dynload_flags);
// 4. [If Failed] logging warning if exists
if (nullptr == dso_handle && !warning_msg.empty()) {
LOG(WARNING) << warning_msg;
}
// 5. [If Failed] logging or throw error info
if (nullptr == dso_handle) {
auto error_msg =
"The third-party dynamic library (%s) that Paddle depends on is not "
"configured correctly. (error code is %s)\n"
" Suggestions:\n"
" 1. Check if the third-party dynamic library (e.g. CUDA, CUDNN) "
"is installed correctly and its version is matched with paddlepaddle "
"you installed.\n"
" 2. Configure third-party dynamic library environment variables as "
"follows:\n"
" - Linux: set LD_LIBRARY_PATH by `export LD_LIBRARY_PATH=...`\n"
" - Windows: set PATH by `set PATH=XXX;%PATH%`\n"
" - Mac: set DYLD_LIBRARY_PATH by `export DYLD_LIBRARY_PATH=...` "
"[Note: After Mac OS 10.11, using the DYLD_LIBRARY_PATH is "
"impossible unless System Integrity Protection (SIP) is disabled.]";
#if !defined(_WIN32)
auto errorno = dlerror();
#else
auto errorno = GetLastError();
#endif // !_WIN32
// if not found, search from default path
if (nullptr == dso_handle) {
LOG(WARNING) << "Failed to find dynamic library: " << dlPath << " ("
<< errorno << ")";
if (dlPath.find("nccl") != std::string::npos) {
std::cout
<< "You may need to install 'nccl2' from NVIDIA official website: "
<< "https://developer.nvidia.com/nccl/nccl-download"
<< "before install PaddlePaddle" << std::endl;
}
dlPath = dso_name;
dso_handle = GetDsoHandleFromDefaultPath(dlPath, dynload_flags);
if (throw_on_error) {
// NOTE: Special error report case, no need to change its format
PADDLE_THROW(
platform::errors::PreconditionNotMet(error_msg, dso_name, errorno));
} else {
LOG(WARNING) << string::Sprintf(error_msg, dso_name, errorno);
}
}
auto error_msg =
"Failed to find dynamic library: %s ( %s ) \n Please specify "
"its path correctly using following ways: \n Method. set "
"environment variable LD_LIBRARY_PATH on Linux or "
"DYLD_LIBRARY_PATH on Mac OS. \n For instance, issue command: "
"export LD_LIBRARY_PATH=... \n Note: After Mac OS 10.11, "
"using the DYLD_LIBRARY_PATH is impossible unless System "
"Integrity Protection (SIP) is disabled.";
#if !defined(_WIN32)
auto errorno = dlerror();
#else
auto errorno = GetLastError();
#endif // !_WIN32
if (throw_on_error) {
PADDLE_ENFORCE(nullptr != dso_handle, error_msg, dlPath, errorno);
} else if (nullptr == dso_handle) {
LOG(WARNING) << string::Sprintf(error_msg, dlPath, errorno);
}
return dso_handle;
}
......@@ -194,23 +212,29 @@ void* GetCublasDsoHandle() {
void* GetCUDNNDsoHandle() {
#if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.dylib", false);
std::string mac_warn_meg(
"Note: [Recommend] copy cudnn into /usr/local/cuda/ \n "
"For instance, sudo tar -xzf "
"cudnn-7.5-osx-x64-v5.0-ga.tgz -C /usr/local \n sudo "
"chmod a+r /usr/local/cuda/include/cudnn.h "
"/usr/local/cuda/lib/libcudnn*");
return GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.dylib", false,
{}, mac_warn_meg);
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
return GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, win_cudnn_lib);
#else
return GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.so", false);
return GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.so", false,
{linux_cudnn_lib_path});
#endif
}
void* GetCUPTIDsoHandle() {
std::string cupti_path = cupti_lib_path;
if (!FLAGS_cupti_dir.empty()) {
cupti_path = FLAGS_cupti_dir;
}
#if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(cupti_path, "libcupti.dylib", false);
return GetDsoHandleFromSearchPath(FLAGS_cupti_dir, "libcupti.dylib", false,
{cupti_lib_path});
#else
return GetDsoHandleFromSearchPath(cupti_path, "libcupti.so", false);
return GetDsoHandleFromSearchPath(FLAGS_cupti_dir, "libcupti.so", false,
{cupti_lib_path});
#endif
}
......@@ -255,10 +279,16 @@ void* GetWarpCTCDsoHandle() {
}
void* GetNCCLDsoHandle() {
std::string warning_msg(
"You may need to install 'nccl2' from NVIDIA official website: "
"https://developer.nvidia.com/nccl/nccl-download"
"before install PaddlePaddle.");
#if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.dylib");
return GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.dylib", true, {},
warning_msg);
#else
return GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.so");
return GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.so", true, {},
warning_msg);
#endif
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册