安装1.4.1版本后跑预测出现找不到libnccl.so问题
Created by: littlepan0413
之前安装了1.3.0.post97版本,预测没有问题,但是这个版本有bug导致功能无法使用,卸载之后重新安装1.4.1.post97版本,然后运行报错
Failed to find dynamic library: libnccl.so ( libnccl.so: cannot open shared object file: No such file or directory )
完整报错信息 W0521 17:05:35.625020 21264 dynamic_loader.cc:107] Can not find library: libnccl.so. Please try to add the lib path to LD_LIBRARY_PATH. terminate called after throwing an instance of 'paddle::platform::EnforceNotMet' what(): Failed to find dynamic library: libnccl.so ( libnccl.so: cannot open shared object file: No such file or directory ) Please specify its path correctly using following ways: Method. set environment variable LD_LIBRARY_PATH on Linux or DYLD_LIBRARY_PATH on Mac OS. For instance, issue command: export LD_LIBRARY_PATH=... Note: After Mac OS 10.11, using the DYLD_LIBRARY_PATH is impossible unless System Integrity Protection (SIP) is disabled. at [/paddle/paddle/fluid/platform/dynload/dynamic_loader.cc:163] PaddlePaddle Call Stacks: 0 0x7f9925d177f0p void paddle::platform::EnforceNotMet::Init<char const*>(char const*, char const*, int) + 352 1 0x7f9925d17b69p paddle::platform::EnforceNotMet::EnforceNotMet(std::exception_ptr::exception_ptr, char const*, int) + 137 2 0x7f9927a46a25p paddle::platform::dynload::GetNCCLDsoHandle() + 1813 3 0x7f99276019a9p void std::once_call_impl<std::Bind_simple<paddle::platform::dynload::DynLoad__ncclCommDestroy::operator()<ncclComm*>(ncclComm*)::{lambda()#1 (closed)} ()> >() + 9 4 0x7f997948fbe0p pthread_once + 80 5 0x7f9927a21463p paddle::platform::CUDADeviceContext::~CUDADeviceContext() + 259 6 0x7f9927a216b1p paddle::platform::CUDADeviceContext::~CUDADeviceContext() + 17 7 0x7f9925ec8c45p std::Hashtable<int, std::pair<int const, paddle::platform::NCCLContext>, std::allocator<std::pair<int const, paddle::platform::NCCLContext> >, std::detail::Select1st, std::equal_to, std::hash, std::detail::Mod_range_hashing, std::detail::Default_ranged_hash, std::detail::Prime_rehash_policy, std::detail::Hashtable_traits<false, false, true> >::clear() + 53 8 0x7f9925eca3b9p paddle::platform::NCCLContextMap::NCCLContextMap(std::vector<boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void>, std::allocator<boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> > > const&, ncclUniqueId*, unsigned long, unsigned long) + 2441 9 0x7f9925ec6090p paddle::framework::ParallelExecutor::ParallelExecutor(std::vector<boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_>, std::allocator<boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> > > const&, std::vector<std::string, std::allocatorstd::string > const&, std::string const&, paddle::framework::Scope*, std::vector<paddle::framework::Scope*, std::allocatorpaddle::framework::Scope* > const&, paddle::framework::details::ExecutionStrategy const&, paddle::framework::details::BuildStrategy const&, paddle::framework::ir::Graph*) + 2816 10 0x7f9925dae7e8p 11 0x7f9925d4a8dep 12 0x7f9979729973p PyObject_Call + 67 13 0x7f997973880dp 14 0x7f9979729973p PyObject_Call + 67 15 0x7f9979782764p 16 0x7f997977f01bp 17 0x7f9979729973p PyObject_Call + 67 18 0x7f99797bfd49p PyEval_EvalFrameEx + 15289 19 0x7f99797c56c9p PyEval_EvalCodeEx + 2025 20 0x7f99797c2b98p PyEval_EvalFrameEx + 27144 21 0x7f99797c56c9p PyEval_EvalCodeEx + 2025 22 0x7f99797c2b98p PyEval_EvalFrameEx + 27144 23 0x7f99797c56c9p PyEval_EvalCodeEx + 2025 24 0x7f997974e567p 25 0x7f9979729973p PyObject_Call + 67 26 0x7f997973880dp 27 0x7f9979729973p PyObject_Call + 67 28 0x7f9979782764p 29 0x7f997977f01bp 30 0x7f9979729973p PyObject_Call + 67 31 0x7f99797bfd49p PyEval_EvalFrameEx + 15289 32 0x7f99797c56c9p PyEval_EvalCodeEx + 2025 33 0x7f99797c2b98p PyEval_EvalFrameEx + 27144 34 0x7f99797c56c9p PyEval_EvalCodeEx + 2025 35 0x7f99797c58eap PyEval_EvalCode + 26 36 0x7f99797debadp 37 0x7f99797dfd28p PyRun_FileExFlags + 120 38 0x7f99797e0f48p PyRun_SimpleFileExFlags + 232 39 0x7f99797f316cp Py_Main + 2988 40 0x7f99789dab45p __libc_start_main + 245 41 0x7f99798c307fp