diff --git a/paddle/math/float16.h b/paddle/math/float16.h index 76ad3a01239e409caeefc36a3d562ed5e388dc92..efebbce50405018c6b7ce2049f8d55c33680469f 100644 --- a/paddle/math/float16.h +++ b/paddle/math/float16.h @@ -79,7 +79,7 @@ public: #ifdef PADDLE_CUDA_FP16 HOSTDEVICE inline explicit float16(const half& h) { #if CUDA_VERSION >= 9000 - x = reinterpret_cast<__half_raw*>(&h)->x; + x = reinterpret_cast<__half_raw*>(const_cast(&h))->x; #else x = h.x; #endif // CUDA_VERSION >= 9000 @@ -145,7 +145,7 @@ public: #ifdef PADDLE_CUDA_FP16 HOSTDEVICE inline float16& operator=(const half& rhs) { #if CUDA_VERSION >= 9000 - x = reinterpret_cast<__half_raw*>(&rhs)->x; + x = reinterpret_cast<__half_raw*>(const_cast(&rhs))->x; #else x = rhs.x; #endif diff --git a/paddle/platform/dynload/nccl.cc b/paddle/platform/dynload/nccl.cc index 8f92b8d94d56047b7d3fb43b15e3c06575c8d57b..91168f37effff3f8b864b6bb2ede070cb0a976fa 100644 --- a/paddle/platform/dynload/nccl.cc +++ b/paddle/platform/dynload/nccl.cc @@ -25,6 +25,11 @@ void *nccl_dso_handle; NCCL_RAND_ROUTINE_EACH(DEFINE_WRAP); +void LoadNCCLDSO() { + platform::call_once(nccl_dso_flag, + [] { GetNCCLDsoHandle(&nccl_dso_handle); }); +} + } // namespace dynload } // namespace platform } // namespace paddle diff --git a/paddle/platform/dynload/nccl.h b/paddle/platform/dynload/nccl.h index 981b2ab258a34ce92f02ee12b5957f88ba61d1c0..11007c1031c6d224c475e9ef4f11e7797decd78e 100644 --- a/paddle/platform/dynload/nccl.h +++ b/paddle/platform/dynload/nccl.h @@ -28,18 +28,18 @@ extern std::once_flag nccl_dso_flag; extern void* nccl_dso_handle; #ifdef PADDLE_USE_DSO -#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - auto operator()(Args... args) -> decltype(__name(args...)) { \ - using nccl_func = decltype(__name(args...)) (*)(Args...); \ - platform::call_once(nccl_dso_flag, \ - paddle::platform::dynload::GetNCCLDsoHandle, \ - &nccl_dso_handle); \ - void* p_##__name = dlsym(nccl_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ - }; \ +extern void LoadNCCLDSO(); + +#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + auto operator()(Args... args) -> decltype(__name(args...)) { \ + using nccl_func = decltype(__name(args...)) (*)(Args...); \ + paddle::platform::dynload::LoadNCCLDSO(); \ + void* p_##__name = dlsym(nccl_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + }; \ extern DynLoad__##__name __name #else #define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \ diff --git a/paddle/platform/nccl_test.cu b/paddle/platform/nccl_test.cu index c99dae68bef67c58d3efea42fef45e84bb3d9255..94ab360a1967d22e73bc6aefc0301487537c97f7 100644 --- a/paddle/platform/nccl_test.cu +++ b/paddle/platform/nccl_test.cu @@ -31,7 +31,7 @@ namespace platform { TEST(NCCL, init) { std::vector comms; comms.resize(dev_count); - PADDLE_ENFORCE(dynload::ncclCommInitAll(comms.data(), dev_count, nullptr)); + dynload::ncclCommInitAll(comms.data(), dev_count, nullptr); for (int i = 0; i < dev_count; ++i) { dynload::ncclCommDestroy(comms[i]); } @@ -62,7 +62,7 @@ TEST(NCCL, all_reduce) { std::vector comms; comms.resize(dev_count); VLOG(1) << "Initializing ncclComm"; - PADDLE_ENFORCE(dynload::ncclCommInitAll(comms.data(), dev_count, nullptr)); + dynload::ncclCommInitAll(comms.data(), dev_count, nullptr); VLOG(1) << "ncclComm initialized"; VLOG(1) << "Creating thread data"; std::vector>> data; diff --git a/paddle/platform/variant.h b/paddle/platform/variant.h index 619897ca19eb2e6f4dbfd9160edf8c4bc58c89a9..284b4c42ac068b23737c12d3f147bbceb135dacc 100644 --- a/paddle/platform/variant.h +++ b/paddle/platform/variant.h @@ -14,6 +14,19 @@ #pragma once +#ifdef __CUDACC__ +#ifdef __CUDACC_VER_MAJOR__ +// CUDA 9 define `__CUDACC_VER__` as a warning message, manually define +// __CUDACC_VER__ instead. +#undef __CUDACC_VER__ + +#define __CUDACC_VER__ \ + (__CUDACC_VER_MAJOR__ * 10000 + __CUDACC_VER_MINOR__ * 100 + \ + __CUDACC_VER_BUILD__) +#endif + +#endif + #include #ifdef PADDLE_WITH_CUDA diff --git a/python/.gitignore b/python/.gitignore index cc7d0ece4acaba2a3fa38a89110587fe8dffb992..1ba1d4c9b0301ed920f5303089e75dd3a8e4e3fa 100644 --- a/python/.gitignore +++ b/python/.gitignore @@ -2,6 +2,7 @@ build dist paddle.egg-info +paddlepaddle_gpu.egg-info .idea paddle/proto/*.py paddle/proto/*.pyc