diff --git a/paddle/platform/dynload/CMakeLists.txt b/paddle/platform/dynload/CMakeLists.txt index ceb66f84b6b01892cbaf61c79a47ae60d2589164..4c8be33480e80e586a4bbbdb2664eb4cb2db8f0b 100644 --- a/paddle/platform/dynload/CMakeLists.txt +++ b/paddle/platform/dynload/CMakeLists.txt @@ -1,2 +1,2 @@ cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags) -nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc DEPS dynamic_loader) +nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc nccl.cc DEPS dynamic_loader) diff --git a/paddle/platform/dynload/dynamic_loader.cc b/paddle/platform/dynload/dynamic_loader.cc index ae9a0a982c73de05821579d22b7f9ad99f24a92b..5c2ee2e5fc22d409eb7c90e6be21c919f47daed3 100644 --- a/paddle/platform/dynload/dynamic_loader.cc +++ b/paddle/platform/dynload/dynamic_loader.cc @@ -35,6 +35,11 @@ DEFINE_string(warpctc_dir, "", "Specify path for loading libwarpctc.so."); DEFINE_string(lapack_dir, "", "Specify path for loading liblapack.so."); +DEFINE_string(nccl_dir, "", + "Specify path for loading nccl library, such as libcublas, " + "libcurand. For instance, /usr/local/cuda/lib64. If default, " + "dlopen will search cuda from LD_LIBRARY_PATH"); + namespace paddle { namespace platform { namespace dynload { @@ -157,6 +162,14 @@ void GetLapackDsoHandle(void** dso_handle) { #endif } +void GetNcclDsoHandle(void** dso_handle) { +#if defined(__APPLE__) || defined(__OSX__) + GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.dylib", dso_handle); +#else + GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.so", dso_handle); +#endif +} + } // namespace dynload } // namespace platform } // namespace paddle diff --git a/paddle/platform/dynload/dynamic_loader.h b/paddle/platform/dynload/dynamic_loader.h index a99b05443feb909f10b2c56f4d8bdf3c6fa11e3f..b9483890be19858367464edf63cbe28e23e9ab3e 100644 --- a/paddle/platform/dynload/dynamic_loader.h +++ b/paddle/platform/dynload/dynamic_loader.h @@ -58,6 +58,14 @@ void GetWarpCTCDsoHandle(void** dso_handle); */ void GetLapackDsoHandle(void** dso_handle); +/** + * @brief load the DSO of NVIDIA nccl + * + * @param **dso_handle dso handler + * + */ +void GetNcclDsoHandle(void** dso_handle); + } // namespace dynload } // namespace platform } // namespace paddle diff --git a/paddle/platform/dynload/nccl.cc b/paddle/platform/dynload/nccl.cc new file mode 100644 index 0000000000000000000000000000000000000000..8f92b8d94d56047b7d3fb43b15e3c06575c8d57b --- /dev/null +++ b/paddle/platform/dynload/nccl.cc @@ -0,0 +1,30 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/platform/dynload/nccl.h" + +namespace paddle { +namespace platform { +namespace dynload { + +std::once_flag nccl_dso_flag; +void *nccl_dso_handle; + +#define DEFINE_WRAP(__name) DynLoad__##__name __name + +NCCL_RAND_ROUTINE_EACH(DEFINE_WRAP); + +} // namespace dynload +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/dynload/nccl.h b/paddle/platform/dynload/nccl.h new file mode 100644 index 0000000000000000000000000000000000000000..ad050da4ad32e5d2015fcbcaaffaa428ba0c3c42 --- /dev/null +++ b/paddle/platform/dynload/nccl.h @@ -0,0 +1,72 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#include +#include +#include "paddle/platform/dynload/dynamic_loader.h" + +namespace paddle { +namespace platform { +namespace dynload { + +extern std::once_flag nccl_dso_flag; +extern void* nccl_dso_handle; + +#ifdef PADDLE_USE_DSO +#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + ncclResult_t operator()(Args... args) { \ + typedef ncclResult_t (*ncclFunc)(Args...); \ + std::call_once(nccl_dso_flag, \ + paddle::platform::dynload::GetNcclDsoHandle, \ + &nccl_dso_handle); \ + void* p_##__name = dlsym(nccl_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + }; \ + extern DynLoad__##__name __name +#else +#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + ncclResult_t operator()(Args... args) { \ + return __name(args...); \ + } \ + }; \ + extern DynLoad__##__name __name +#endif + +#define NCCL_RAND_ROUTINE_EACH(__macro) \ + __macro(ncclCommInitAll); \ + __macro(ncclGetUniqueId); \ + __macro(ncclCommInitRank); \ + __macro(ncclCommDestroy); \ + __macro(ncclCommCount); \ + __macro(ncclCommCuDevice); \ + __macro(ncclCommUserRank); \ + __macro(ncclAllReduce); \ + __macro(ncclBcast); \ + __macro(ncclAllGather); \ + __macro(ncclReduce); \ + __macro(ncclGetErrorString); + +NCCL_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_NCCL_WRAP); + +} // namespace dynload +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h index cd906c3fa9375cd6edaed0377a596771e25043d4..2f9e7466f1f6de5ad0252ae4f89ee8a3805b1f2e 100644 --- a/paddle/platform/enforce.h +++ b/paddle/platform/enforce.h @@ -34,6 +34,7 @@ limitations under the License. */ #include "paddle/platform/dynload/cublas.h" #include "paddle/platform/dynload/cudnn.h" #include "paddle/platform/dynload/curand.h" +#include "paddle/platform/dynload/nccl.h" #include #include @@ -172,6 +173,17 @@ inline typename std::enable_if::type throw_on_error( throw std::runtime_error(err + string::Sprintf(args...)); } +template +inline typename std::enable_if::type throw_on_error( + ncclResult_t stat, const Args&... args) { + if (stat == ncclSuccess) { + return; + } else { + throw std::runtime_error(platform::dynload::ncclGetErrorString(stat) + + string::Sprintf(args...)); + } +} + #endif // PADDLE_ONLY_CPU template