From 3424a4c0d898820442b648c31b510403fea7c994 Mon Sep 17 00:00:00 2001 From: gangliao Date: Thu, 3 Nov 2016 22:39:39 -0700 Subject: [PATCH] Fix bug and redundant code in hl_dso_loader.cc (#306) --- paddle/cuda/src/hl_cuda_cudnn.cc | 73 ++++++++----------------------- paddle/cuda/src/hl_cuda_device.cc | 31 +++---------- paddle/cuda/src/hl_dso_loader.cc | 4 +- 3 files changed, 27 insertions(+), 81 deletions(-) diff --git a/paddle/cuda/src/hl_cuda_cudnn.cc b/paddle/cuda/src/hl_cuda_cudnn.cc index 7810d0d1005..92b28e4345c 100644 --- a/paddle/cuda/src/hl_cuda_cudnn.cc +++ b/paddle/cuda/src/hl_cuda_cudnn.cc @@ -41,65 +41,28 @@ void* cudnn_dso_handle = nullptr; #ifdef PADDLE_USE_DSO -#define DYNAMIC_LOAD_CUDNN_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - cudnnStatus_t operator()(Args... args) { \ - typedef cudnnStatus_t (*cudnnFunc)(Args...); \ - std::call_once(cudnn_dso_flag, GetCudnnDsoHandle, \ - &cudnn_dso_handle); \ - void* p_##__name = dlsym(cudnn_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ +#define DYNAMIC_LOAD_CUDNN_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + auto operator()(Args... args) -> decltype(__name(args...)) { \ + using cudnn_func = decltype(__name(args...))(*)(Args...); \ + std::call_once(cudnn_dso_flag, GetCudnnDsoHandle, \ + &cudnn_dso_handle); \ + void* p_##__name = dlsym(cudnn_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ } __name; /* struct DynLoad__##__name */ -struct DynLoad__cudnnGetVersion { - template - size_t operator()(Args... args) { - typedef size_t (*cudnnFunc)(Args...); - std::call_once(cudnn_dso_flag, GetCudnnDsoHandle, - &cudnn_dso_handle); - void* p_name = dlsym(cudnn_dso_handle, "cudnnGetVersion"); - return reinterpret_cast(p_name)(args...); - } -} cudnnGetVersion; /* struct DynLoad__##__name */ - -struct DynLoad__cudnnGetErrorString { - template - const char* operator()(Args... args) { - typedef const char* (*cudnnFunc)(Args...); - std::call_once(cudnn_dso_flag, GetCudnnDsoHandle, - &cudnn_dso_handle); - void* p_name = dlsym(cudnn_dso_handle, "cudnnGetErrorString"); - return reinterpret_cast(p_name)(args...); - } -} cudnnGetErrorString; /* struct DynLoad__##__name */ - - #else -#define DYNAMIC_LOAD_CUDNN_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - cudnnStatus_t operator()(Args... args) { \ - return __name(args...); \ - } \ +#define DYNAMIC_LOAD_CUDNN_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + auto operator()(Args... args) -> decltype(__name(args...)) { \ + return __name(args...); \ + } \ } __name; /* struct DynLoad__##__name */ -struct DynLoad__cudnnGetVersion { - template - size_t operator()(Args... args) { - return cudnnGetVersion(args...); - } -} cudnnGetVersion; /* struct DynLoad__##__name */ - -struct DynLoad__cudnnGetErrorString { - template - const char* operator()(Args... args) { - return cudnnGetErrorString(args...); - } -} cudnnGetErrorString; /* struct DynLoad__##__name */ - #endif /** @@ -133,7 +96,9 @@ struct DynLoad__cudnnGetErrorString { __macro(cudnnPoolingForward) \ __macro(cudnnPoolingBackward) \ __macro(cudnnSoftmaxBackward) \ - __macro(cudnnSoftmaxForward) + __macro(cudnnSoftmaxForward) \ + __macro(cudnnGetVersion) \ + __macro(cudnnGetErrorString) CUDNN_DNN_ROUTINE_EACH(DYNAMIC_LOAD_CUDNN_WRAP) #define CUDNN_DNN_ROUTINE_EACH_R2(__macro) \ diff --git a/paddle/cuda/src/hl_cuda_device.cc b/paddle/cuda/src/hl_cuda_device.cc index e9fe9f1c117..3ea2c91bd5a 100644 --- a/paddle/cuda/src/hl_cuda_device.cc +++ b/paddle/cuda/src/hl_cuda_device.cc @@ -85,44 +85,24 @@ void* cudart_dso_handle = nullptr; #define DYNAMIC_LOAD_CUDART_WRAP(__name) \ struct DynLoad__##__name { \ template \ - cudaError_t operator()(Args... args) { \ - typedef cudaError_t (*cudartFunc)(Args...); \ + auto operator()(Args... args) -> decltype(__name(args...)) { \ + using cudart_func = decltype(__name(args...))(*)(Args...); \ std::call_once(cudart_dso_flag, GetCudartDsoHandle, \ &cudart_dso_handle); \ void* p_##__name = dlsym(cudart_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ + return reinterpret_cast(p_##__name)(args...); \ } \ } __name; /* struct DynLoad__##__name */ #else #define DYNAMIC_LOAD_CUDART_WRAP(__name) \ struct DynLoad__##__name { \ template \ - cudaError_t operator()(Args... args) { \ + auto operator()(Args... args) -> decltype(__name(args...)) { \ return __name(args...); \ } \ } __name; /* struct DynLoad__##__name */ #endif -#ifdef PADDLE_USE_DSO - struct DynLoad__cudaGetErrorString { - template - const char* operator()(Args... args) { - typedef const char* (*cudaFunc)(Args...); - std::call_once(cudart_dso_flag, GetCudartDsoHandle, - &cudart_dso_handle); - void* p_func = dlsym(cudart_dso_handle, "cudaGetErrorString"); - return reinterpret_cast(p_func)(args...); - } - } cudaGetErrorString; /* struct DynLoad__cudaGetErrorString */ -#else -struct DynLoad__cudaGetErrorString { - template - const char* operator()(Args... args) { - return cudaGetErrorString(args...); - } -} cudaGetErrorString; /* struct DynLoad__cudaGetErrorString */ -#endif - /* include all needed cuda functions in HPPL */ #define CUDA_ROUTINE_EACH(__macro) \ __macro(cudaMalloc) \ @@ -152,7 +132,8 @@ struct DynLoad__cudaGetErrorString { __macro(cudaSetDeviceFlags) \ __macro(cudaGetLastError) \ __macro(cudaFuncSetCacheConfig) \ - __macro(cudaRuntimeGetVersion) + __macro(cudaRuntimeGetVersion) \ + __macro(cudaGetErrorString) CUDA_ROUTINE_EACH(DYNAMIC_LOAD_CUDART_WRAP) diff --git a/paddle/cuda/src/hl_dso_loader.cc b/paddle/cuda/src/hl_dso_loader.cc index 91c60d85a1e..c0b5d6e357f 100644 --- a/paddle/cuda/src/hl_dso_loader.cc +++ b/paddle/cuda/src/hl_dso_loader.cc @@ -49,14 +49,14 @@ static inline std::string join(const std::string& part1, const std::string& part static inline void GetDsoHandleFromDefaultPath( std::string& dso_path, void** dso_handle, int dynload_flags) { LOG(INFO) << "Try to find cuda library: " << dso_path - << "from default system path."; + << " from default system path."; // default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH *dso_handle = dlopen(dso_path.c_str(), dynload_flags); // DYLD_LIBRARY_PATH is disabled after Mac OS 10.11 to // bring System Integrity Projection (SIP), if dso_handle // is null, search from default package path in Mac OS. - #if defined(__APPLE__) or defined(__OSX__) + #if defined(__APPLE__) || defined(__OSX__) if (nullptr == *dso_handle) { dso_path = join("/usr/local/cuda/lib/", dso_path); *dso_handle = dlopen(dso_path.c_str(), dynload_flags); -- GitLab