diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index 9a9a20f897e09b823dfb19ff841c3f2aeb3f9fe6..a631ad14b18310598f7eea3a51839d61a9e456ff 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -62,7 +62,8 @@ ExternalProject_Add( ) MESSAGE(STATUS "warp-ctc library: ${WARPCTC_LIBRARIES}") -INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR}) +INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR}) # For warpctc code to include its headers. +INCLUDE_DIRECTORIES(${THIRD_PARTY_PATH}/install) # For Paddle code to include warpctc headers. ADD_LIBRARY(warpctc SHARED IMPORTED GLOBAL) SET_PROPERTY(TARGET warpctc PROPERTY IMPORTED_LOCATION ${WARPCTC_LIBRARIES}) diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index bf1a705ef50b663efa53393ead1f81fd6bcf8c48..89b5c6847f15b3f2a270fe1e7db9e590549e8982 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -16,6 +16,6 @@ else() endif() cc_library(multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle scale_loss_grad_op_handle ${multi_devices_graph_builder_deps}) -cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ssa_graph) +cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ssa_graph framework_proto) cc_library(threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope simple_threadpool device_context) diff --git a/paddle/fluid/platform/dynload/cublas.h b/paddle/fluid/platform/dynload/cublas.h index 3b8d192b6c6356d3ab21868d3d74957700b4cdc1..a41018d350e89881888d5e31089c2b9ecd76f6c0 100644 --- a/paddle/fluid/platform/dynload/cublas.h +++ b/paddle/fluid/platform/dynload/cublas.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ #pragma once @@ -35,18 +35,18 @@ extern void *cublas_dso_handle; * note: default dynamic linked libs */ #ifdef PADDLE_USE_DSO -#define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - inline cublasStatus_t operator()(Args... args) { \ - typedef cublasStatus_t (*cublasFunc)(Args...); \ - std::call_once(cublas_dso_flag, \ - paddle::platform::dynload::GetCublasDsoHandle, \ - &cublas_dso_handle); \ - void *p_##__name = dlsym(cublas_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ - }; \ +#define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + inline cublasStatus_t operator()(Args... args) { \ + typedef cublasStatus_t (*cublasFunc)(Args...); \ + std::call_once(cublas_dso_flag, []() { \ + cublas_dso_handle = paddle::platform::dynload::GetCublasDsoHandle(); \ + }); \ + void *p_##__name = dlsym(cublas_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + }; \ extern DynLoad__##__name __name #else #define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ diff --git a/paddle/fluid/platform/dynload/cudnn.cc b/paddle/fluid/platform/dynload/cudnn.cc index c65b060ab46cfcd38292be66dd5f2123f88bae63..f3cd3b2bbedef7c9140c2acddea0732972ff7fa0 100644 --- a/paddle/fluid/platform/dynload/cudnn.cc +++ b/paddle/fluid/platform/dynload/cudnn.cc @@ -44,7 +44,8 @@ CUDNN_DNN_ROUTINE_EACH_R7(DEFINE_WRAP); #ifdef PADDLE_USE_DSO bool HasCUDNN() { - std::call_once(cudnn_dso_flag, GetCUDNNDsoHandle, &cudnn_dso_handle); + std::call_once(cudnn_dso_flag, + []() { cudnn_dso_handle = GetCUDNNDsoHandle(); }); return cudnn_dso_handle != nullptr; } diff --git a/paddle/fluid/platform/dynload/cudnn.h b/paddle/fluid/platform/dynload/cudnn.h index 49a54d8478e9a4e507d31a67b924802def356bfa..24475b62ca2825c45ff7edb39328dece3b822b25 100644 --- a/paddle/fluid/platform/dynload/cudnn.h +++ b/paddle/fluid/platform/dynload/cudnn.h @@ -30,19 +30,19 @@ extern bool HasCUDNN(); #ifdef PADDLE_USE_DSO extern void EnforceCUDNNLoaded(const char* fn_name); -#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - auto operator()(Args... args) -> decltype(__name(args...)) { \ - using cudnn_func = decltype(__name(args...)) (*)(Args...); \ - std::call_once(cudnn_dso_flag, \ - paddle::platform::dynload::GetCUDNNDsoHandle, \ - &cudnn_dso_handle); \ - EnforceCUDNNLoaded(#__name); \ - void* p_##__name = dlsym(cudnn_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ - }; \ +#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + auto operator()(Args... args) -> decltype(__name(args...)) { \ + using cudnn_func = decltype(__name(args...)) (*)(Args...); \ + std::call_once(cudnn_dso_flag, []() { \ + cudnn_dso_handle = paddle::platform::dynload::GetCUDNNDsoHandle(); \ + }); \ + EnforceCUDNNLoaded(#__name); \ + void* p_##__name = dlsym(cudnn_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + }; \ extern struct DynLoad__##__name __name #else diff --git a/paddle/fluid/platform/dynload/cupti.h b/paddle/fluid/platform/dynload/cupti.h index c1bf88f8cb690861b97686d99d36410143445243..d0d676b9d8ac462900b48246bec43166d04ef97b 100644 --- a/paddle/fluid/platform/dynload/cupti.h +++ b/paddle/fluid/platform/dynload/cupti.h @@ -11,14 +11,15 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #pragma once #ifdef PADDLE_WITH_CUPTI + #include #include #include -#include +#include // NOLINT + #include "paddle/fluid/platform/dynload/dynamic_loader.h" namespace paddle { @@ -36,18 +37,18 @@ extern void *cupti_dso_handle; * note: default dynamic linked libs */ #ifdef PADDLE_USE_DSO -#define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - inline CUptiResult CUPTIAPI operator()(Args... args) { \ - typedef CUptiResult CUPTIAPI (*cuptiFunc)(Args...); \ - std::call_once(cupti_dso_flag, \ - paddle::platform::dynload::GetCUPTIDsoHandle, \ - &cupti_dso_handle); \ - void *p_##__name = dlsym(cupti_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ - }; \ +#define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + inline CUptiResult CUPTIAPI operator()(Args... args) { \ + typedef CUptiResult CUPTIAPI (*cuptiFunc)(Args...); \ + std::call_once(cupti_dso_flag, []() { \ + cupti_dso_handle = paddle::platform::dynload::GetCUPTIDsoHandle(); \ + }); \ + void *p_##__name = dlsym(cupti_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + }; \ extern DynLoad__##__name __name #else #define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name) \ diff --git a/paddle/fluid/platform/dynload/curand.h b/paddle/fluid/platform/dynload/curand.h index 1b3ff962d6edceb37deb94cc7daead7346d25352..4697fb6cd96770127206bdabeea77e43eb09d1f5 100644 --- a/paddle/fluid/platform/dynload/curand.h +++ b/paddle/fluid/platform/dynload/curand.h @@ -11,12 +11,13 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #pragma once #include #include -#include + +#include // NOLINT + #include "paddle/fluid/platform/dynload/dynamic_loader.h" namespace paddle { @@ -25,18 +26,18 @@ namespace dynload { extern std::once_flag curand_dso_flag; extern void *curand_dso_handle; #ifdef PADDLE_USE_DSO -#define DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - curandStatus_t operator()(Args... args) { \ - typedef curandStatus_t (*curandFunc)(Args...); \ - std::call_once(curand_dso_flag, \ - paddle::platform::dynload::GetCurandDsoHandle, \ - &curand_dso_handle); \ - void *p_##__name = dlsym(curand_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ - }; \ +#define DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + curandStatus_t operator()(Args... args) { \ + typedef curandStatus_t (*curandFunc)(Args...); \ + std::call_once(curand_dso_flag, []() { \ + curand_dso_handle = paddle::platform::dynload::GetCurandDsoHandle(); \ + }); \ + void *p_##__name = dlsym(curand_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + }; \ extern DynLoad__##__name __name #else #define DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name) \ diff --git a/paddle/fluid/platform/dynload/dynamic_loader.cc b/paddle/fluid/platform/dynload/dynamic_loader.cc index e590e81bab51fd9fe12309335522614263d8e21d..3c1ccc7445ed27c711ab250aa223c66ae0da45dc 100644 --- a/paddle/fluid/platform/dynload/dynamic_loader.cc +++ b/paddle/fluid/platform/dynload/dynamic_loader.cc @@ -11,12 +11,14 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #include "paddle/fluid/platform/dynload/dynamic_loader.h" + #include + #include -#include +#include // NOLINT #include + #include "gflags/gflags.h" #include "glog/logging.h" #include "paddle/fluid/platform/dynload/cupti_lib_path.h" @@ -65,22 +67,21 @@ static inline std::string join(const std::string& part1, return ret; } -static inline void GetDsoHandleFromDefaultPath(std::string& dso_path, - void** dso_handle, - int dynload_flags) { +static inline void* GetDsoHandleFromDefaultPath(const std::string& dso_path, + int dynload_flags) { VLOG(3) << "Try to find library: " << dso_path << " from default system path."; // default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH - *dso_handle = dlopen(dso_path.c_str(), dynload_flags); + void* dso_handle = dlopen(dso_path.c_str(), dynload_flags); // DYLD_LIBRARY_PATH is disabled after Mac OS 10.11 to // bring System Integrity Projection (SIP), if dso_handle // is null, search from default package path in Mac OS. #if defined(__APPLE__) || defined(__OSX__) - if (nullptr == *dso_handle) { - dso_path = join("/usr/local/cuda/lib/", dso_path); - *dso_handle = dlopen(dso_path.c_str(), dynload_flags); - if (nullptr == *dso_handle) { + if (nullptr == dso_handle) { + dso_handle = + dlopen(join("/usr/local/cuda/lib/", dso_path).c_str(), dynload_flags); + if (nullptr == dso_handle) { if (dso_path == "libcudnn.dylib") { LOG(WARNING) << "Note: [Recommend] copy cudnn into /usr/local/cuda/ \n " "For instance, sudo tar -xzf " @@ -91,28 +92,29 @@ static inline void GetDsoHandleFromDefaultPath(std::string& dso_path, } } #endif + + return dso_handle; } -static inline void GetDsoHandleFromSearchPath(const std::string& search_root, - const std::string& dso_name, - void** dso_handle, - bool throw_on_error = true) { +static inline void* GetDsoHandleFromSearchPath(const std::string& search_root, + const std::string& dso_name, + bool throw_on_error = true) { int dynload_flags = RTLD_LAZY | RTLD_LOCAL; - *dso_handle = nullptr; + void* dso_handle = nullptr; std::string dlPath = dso_name; if (search_root.empty()) { - GetDsoHandleFromDefaultPath(dlPath, dso_handle, dynload_flags); + dso_handle = GetDsoHandleFromDefaultPath(dlPath, dynload_flags); } else { // search xxx.so from custom path dlPath = join(search_root, dso_name); - *dso_handle = dlopen(dlPath.c_str(), dynload_flags); + dso_handle = dlopen(dlPath.c_str(), dynload_flags); // if not found, search from default path - if (nullptr == *dso_handle) { + if (nullptr == dso_handle) { LOG(WARNING) << "Failed to find dynamic library: " << dlPath << " (" << dlerror() << ")"; dlPath = dso_name; - GetDsoHandleFromDefaultPath(dlPath, dso_handle, dynload_flags); + dso_handle = GetDsoHandleFromDefaultPath(dlPath, dynload_flags); } } auto error_msg = @@ -124,70 +126,71 @@ static inline void GetDsoHandleFromSearchPath(const std::string& search_root, "using the DYLD_LIBRARY_PATH is impossible unless System " "Integrity Protection (SIP) is disabled."; if (throw_on_error) { - PADDLE_ENFORCE(nullptr != *dso_handle, error_msg, dlPath, dlerror()); - } else if (nullptr == *dso_handle) { + PADDLE_ENFORCE(nullptr != dso_handle, error_msg, dlPath, dlerror()); + } else if (nullptr == dso_handle) { LOG(WARNING) << string::Sprintf(error_msg, dlPath, dlerror()); } + + return dso_handle; } -void GetCublasDsoHandle(void** dso_handle) { +void* GetCublasDsoHandle() { #if defined(__APPLE__) || defined(__OSX__) - GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.dylib", dso_handle); + return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.dylib"); #else - GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.so", dso_handle); + return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.so"); #endif } -void GetCUDNNDsoHandle(void** dso_handle) { +void* GetCUDNNDsoHandle() { #if defined(__APPLE__) || defined(__OSX__) - GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.dylib", dso_handle, - false); + return GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.dylib", false); #else - GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.so", dso_handle, false); + return GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.so", false); #endif } -void GetCUPTIDsoHandle(void** dso_handle) { +void* GetCUPTIDsoHandle() { std::string cupti_path = cupti_lib_path; if (!FLAGS_cupti_dir.empty()) { cupti_path = FLAGS_cupti_dir; } #if defined(__APPLE__) || defined(__OSX__) - GetDsoHandleFromSearchPath(cupti_path, "libcupti.dylib", dso_handle, false); + return GetDsoHandleFromSearchPath(cupti_path, "libcupti.dylib", false); #else - GetDsoHandleFromSearchPath(cupti_path, "libcupti.so", dso_handle, false); + return GetDsoHandleFromSearchPath(cupti_path, "libcupti.so", false); #endif } -void GetCurandDsoHandle(void** dso_handle) { +void* GetCurandDsoHandle() { #if defined(__APPLE__) || defined(__OSX__) - GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.dylib", dso_handle); + return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.dylib"); #else - GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.so", dso_handle); + return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.so"); #endif } -void GetWarpCTCDsoHandle(void** dso_handle) { +void* GetWarpCTCDsoHandle() { #if defined(__APPLE__) || defined(__OSX__) - GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "libwarpctc.dylib", dso_handle); + return GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "libwarpctc.dylib"); #else - GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "libwarpctc.so", dso_handle); + return GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "libwarpctc.so"); #endif } -void GetLapackDsoHandle(void** dso_handle) { +void* GetLapackDsoHandle() { #if defined(__APPLE__) || defined(__OSX__) - GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapacke.dylib", dso_handle); + return GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapacke.dylib"); #else - GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapacke.so", dso_handle); + return GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapacke.so"); #endif } -void GetNCCLDsoHandle(void** dso_handle) { +void* GetNCCLDsoHandle() { #if defined(__APPLE__) || defined(__OSX__) - GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.dylib", dso_handle); + return GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.dylib"); #else - GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.so", dso_handle); + return GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.so"); #endif } diff --git a/paddle/fluid/platform/dynload/dynamic_loader.h b/paddle/fluid/platform/dynload/dynamic_loader.h index b5b9c4af916241c1c7361b506f74563ebcf69b9a..4c85093a43e0e8d75b64c5b29d1ec68db1b44909 100644 --- a/paddle/fluid/platform/dynload/dynamic_loader.h +++ b/paddle/fluid/platform/dynload/dynamic_loader.h @@ -18,55 +18,13 @@ namespace paddle { namespace platform { namespace dynload { -/** - * @brief load the DSO of CUBLAS - * - * @param **dso_handle dso handler - * - */ -void GetCublasDsoHandle(void** dso_handle); - -/** - * @brief load the DSO of CUDNN - * - * @param **dso_handle dso handler - * - */ -void GetCUDNNDsoHandle(void** dso_handle); - -void GetCUPTIDsoHandle(void** dso_handle); - -/** - * @brief load the DSO of CURAND - * - * @param **dso_handle dso handler - * - */ -void GetCurandDsoHandle(void** dso_handle); - -/** - * @brief load the DSO of warp-ctc - * - * @param **dso_handle dso handler - * - */ -void GetWarpCTCDsoHandle(void** dso_handle); - -/** - * @brief load the DSO of lapack - * - * @param **dso_handle dso handler - * - */ -void GetLapackDsoHandle(void** dso_handle); - -/** - * @brief load the DSO of NVIDIA nccl - * - * @param **dso_handle dso handler - * - */ -void GetNCCLDsoHandle(void** dso_handle); +void* GetCublasDsoHandle(); +void* GetCUDNNDsoHandle(); +void* GetCUPTIDsoHandle(); +void* GetCurandDsoHandle(); +void* GetWarpCTCDsoHandle(); +void* GetLapackDsoHandle(); +void* GetNCCLDsoHandle(); } // namespace dynload } // namespace platform diff --git a/paddle/fluid/platform/dynload/nccl.cc b/paddle/fluid/platform/dynload/nccl.cc index 3edc70c46d03ddcc751e865676928c47fcb48e69..2c40c48ee08497f9a2a414687b9c51d87ba574aa 100644 --- a/paddle/fluid/platform/dynload/nccl.cc +++ b/paddle/fluid/platform/dynload/nccl.cc @@ -25,11 +25,6 @@ void *nccl_dso_handle; NCCL_RAND_ROUTINE_EACH(DEFINE_WRAP); -void LoadNCCLDSO() { - platform::call_once(nccl_dso_flag, - [] { GetNCCLDsoHandle(&nccl_dso_handle); }); -} - } // namespace dynload } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/dynload/nccl.h b/paddle/fluid/platform/dynload/nccl.h index dc78bcb44d3316a1ecee0c8d70dcb4777a9e2de4..d21e29df3cf9b2d78920d8bac41209d200b5ba3a 100644 --- a/paddle/fluid/platform/dynload/nccl.h +++ b/paddle/fluid/platform/dynload/nccl.h @@ -11,12 +11,13 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #pragma once #include #include -#include + +#include // NOLINT + #include "paddle/fluid/platform/call_once.h" #include "paddle/fluid/platform/dynload/dynamic_loader.h" @@ -28,18 +29,19 @@ extern std::once_flag nccl_dso_flag; extern void* nccl_dso_handle; #ifdef PADDLE_USE_DSO -extern void LoadNCCLDSO(); -#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - auto operator()(Args... args) -> decltype(__name(args...)) { \ - using nccl_func = decltype(__name(args...)) (*)(Args...); \ - paddle::platform::dynload::LoadNCCLDSO(); \ - void* p_##__name = dlsym(nccl_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ - }; \ +#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + auto operator()(Args... args) -> decltype(__name(args...)) { \ + using nccl_func = decltype(__name(args...)) (*)(Args...); \ + std::call_once(nccl_dso_flag, []() { \ + nccl_dso_handle = paddle::platform::dynload::GetNCCLDsoHandle(); \ + }); \ + void* p_##__name = dlsym(nccl_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + }; \ extern DynLoad__##__name __name #else #define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \ diff --git a/paddle/fluid/platform/dynload/warpctc.h b/paddle/fluid/platform/dynload/warpctc.h index f5ded0eb6b1107c886641e848f5040a7a2d806a5..7fa468370463a51c486b80317f401612930bc72e 100644 --- a/paddle/fluid/platform/dynload/warpctc.h +++ b/paddle/fluid/platform/dynload/warpctc.h @@ -15,9 +15,10 @@ limitations under the License. */ #pragma once #include -#include -#include "ctc.h" +#include // NOLINT + #include "paddle/fluid/platform/dynload/dynamic_loader.h" +#include "warpctc/include/ctc.h" namespace paddle { namespace platform { @@ -31,18 +32,18 @@ extern void* warpctc_dso_handle; * (for each function) to dynamic load warpctc routine * via operator overloading. */ -#define DYNAMIC_LOAD_WARPCTC_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - auto operator()(Args... args) -> decltype(__name(args...)) { \ - using warpctcFunc = decltype(__name(args...)) (*)(Args...); \ - std::call_once(warpctc_dso_flag, \ - paddle::platform::dynload::GetWarpCTCDsoHandle, \ - &warpctc_dso_handle); \ - void* p_##_name = dlsym(warpctc_dso_handle, #__name); \ - return reinterpret_cast(p_##_name)(args...); \ - } \ - }; \ +#define DYNAMIC_LOAD_WARPCTC_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + auto operator()(Args... args) -> decltype(__name(args...)) { \ + using warpctcFunc = decltype(__name(args...)) (*)(Args...); \ + std::call_once(warpctc_dso_flag, []() { \ + warpctc_dso_handle = paddle::platform::dynload::GetWarpCTCDsoHandle(); \ + }); \ + void* p_##_name = dlsym(warpctc_dso_handle, #__name); \ + return reinterpret_cast(p_##_name)(args...); \ + } \ + }; \ extern DynLoad__##__name __name #define DECLARE_DYNAMIC_LOAD_WARPCTC_WRAP(__name) \