From d5cab4f07c3e63970642cd428a9a33b284ddf4f1 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 15 Dec 2017 14:22:30 +0800 Subject: [PATCH] Fix compile on CUDA9.1 & MacOS (#6642) --- paddle/math/float16.h | 4 ++-- paddle/platform/dynload/nccl.cc | 5 +++++ paddle/platform/dynload/nccl.h | 24 ++++++++++++------------ paddle/platform/nccl_test.cu | 4 ++-- paddle/platform/variant.h | 13 +++++++++++++ python/.gitignore | 1 + 6 files changed, 35 insertions(+), 16 deletions(-) diff --git a/paddle/math/float16.h b/paddle/math/float16.h index 76ad3a01239..efebbce5040 100644 --- a/paddle/math/float16.h +++ b/paddle/math/float16.h @@ -79,7 +79,7 @@ public: #ifdef PADDLE_CUDA_FP16 HOSTDEVICE inline explicit float16(const half& h) { #if CUDA_VERSION >= 9000 - x = reinterpret_cast<__half_raw*>(&h)->x; + x = reinterpret_cast<__half_raw*>(const_cast(&h))->x; #else x = h.x; #endif // CUDA_VERSION >= 9000 @@ -145,7 +145,7 @@ public: #ifdef PADDLE_CUDA_FP16 HOSTDEVICE inline float16& operator=(const half& rhs) { #if CUDA_VERSION >= 9000 - x = reinterpret_cast<__half_raw*>(&rhs)->x; + x = reinterpret_cast<__half_raw*>(const_cast(&rhs))->x; #else x = rhs.x; #endif diff --git a/paddle/platform/dynload/nccl.cc b/paddle/platform/dynload/nccl.cc index 8f92b8d94d5..91168f37eff 100644 --- a/paddle/platform/dynload/nccl.cc +++ b/paddle/platform/dynload/nccl.cc @@ -25,6 +25,11 @@ void *nccl_dso_handle; NCCL_RAND_ROUTINE_EACH(DEFINE_WRAP); +void LoadNCCLDSO() { + platform::call_once(nccl_dso_flag, + [] { GetNCCLDsoHandle(&nccl_dso_handle); }); +} + } // namespace dynload } // namespace platform } // namespace paddle diff --git a/paddle/platform/dynload/nccl.h b/paddle/platform/dynload/nccl.h index 981b2ab258a..11007c1031c 100644 --- a/paddle/platform/dynload/nccl.h +++ b/paddle/platform/dynload/nccl.h @@ -28,18 +28,18 @@ extern std::once_flag nccl_dso_flag; extern void* nccl_dso_handle; #ifdef PADDLE_USE_DSO -#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - auto operator()(Args... args) -> decltype(__name(args...)) { \ - using nccl_func = decltype(__name(args...)) (*)(Args...); \ - platform::call_once(nccl_dso_flag, \ - paddle::platform::dynload::GetNCCLDsoHandle, \ - &nccl_dso_handle); \ - void* p_##__name = dlsym(nccl_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ - }; \ +extern void LoadNCCLDSO(); + +#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + auto operator()(Args... args) -> decltype(__name(args...)) { \ + using nccl_func = decltype(__name(args...)) (*)(Args...); \ + paddle::platform::dynload::LoadNCCLDSO(); \ + void* p_##__name = dlsym(nccl_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + }; \ extern DynLoad__##__name __name #else #define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \ diff --git a/paddle/platform/nccl_test.cu b/paddle/platform/nccl_test.cu index c99dae68bef..94ab360a196 100644 --- a/paddle/platform/nccl_test.cu +++ b/paddle/platform/nccl_test.cu @@ -31,7 +31,7 @@ namespace platform { TEST(NCCL, init) { std::vector comms; comms.resize(dev_count); - PADDLE_ENFORCE(dynload::ncclCommInitAll(comms.data(), dev_count, nullptr)); + dynload::ncclCommInitAll(comms.data(), dev_count, nullptr); for (int i = 0; i < dev_count; ++i) { dynload::ncclCommDestroy(comms[i]); } @@ -62,7 +62,7 @@ TEST(NCCL, all_reduce) { std::vector comms; comms.resize(dev_count); VLOG(1) << "Initializing ncclComm"; - PADDLE_ENFORCE(dynload::ncclCommInitAll(comms.data(), dev_count, nullptr)); + dynload::ncclCommInitAll(comms.data(), dev_count, nullptr); VLOG(1) << "ncclComm initialized"; VLOG(1) << "Creating thread data"; std::vector>> data; diff --git a/paddle/platform/variant.h b/paddle/platform/variant.h index 619897ca19e..284b4c42ac0 100644 --- a/paddle/platform/variant.h +++ b/paddle/platform/variant.h @@ -14,6 +14,19 @@ #pragma once +#ifdef __CUDACC__ +#ifdef __CUDACC_VER_MAJOR__ +// CUDA 9 define `__CUDACC_VER__` as a warning message, manually define +// __CUDACC_VER__ instead. +#undef __CUDACC_VER__ + +#define __CUDACC_VER__ \ + (__CUDACC_VER_MAJOR__ * 10000 + __CUDACC_VER_MINOR__ * 100 + \ + __CUDACC_VER_BUILD__) +#endif + +#endif + #include #ifdef PADDLE_WITH_CUDA diff --git a/python/.gitignore b/python/.gitignore index cc7d0ece4ac..1ba1d4c9b03 100644 --- a/python/.gitignore +++ b/python/.gitignore @@ -2,6 +2,7 @@ build dist paddle.egg-info +paddlepaddle_gpu.egg-info .idea paddle/proto/*.py paddle/proto/*.pyc -- GitLab