From 033ebe7e652aed9de07ea525abe3d15ba659a1b9 Mon Sep 17 00:00:00 2001 From: sneaxiy <32832641+sneaxiy@users.noreply.github.com> Date: Thu, 9 Dec 2021 14:51:21 +0800 Subject: [PATCH] Refine CUDA atomicAdd for FP16 by CUDA primitive methods (#37895) * fix cuda atomicAdd for FP16 * try to fix ci --- paddle/fluid/platform/device/gpu/gpu_primitives.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/paddle/fluid/platform/device/gpu/gpu_primitives.h b/paddle/fluid/platform/device/gpu/gpu_primitives.h index d443e78ed87..3e070da546b 100644 --- a/paddle/fluid/platform/device/gpu/gpu_primitives.h +++ b/paddle/fluid/platform/device/gpu/gpu_primitives.h @@ -101,6 +101,20 @@ inline static __device__ uint32_t add_to_high_half(uint32_t val, float x) { return (val & 0xFFFFu) | (static_cast(high_half.x) << 16); } +#if CUDA_VERSION >= 10000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700 +static __device__ __forceinline__ float16 CUDAFP16ToPDFP16(__half x) { + return *reinterpret_cast(&x); +} + +static __device__ __forceinline__ __half PDFP16ToCUDAFP16(float16 x) { + return *reinterpret_cast<__half *>(&x); +} + +CUDA_ATOMIC_WRAPPER(Add, float16) { + return CUDAFP16ToPDFP16( + atomicAdd(reinterpret_cast<__half *>(address), PDFP16ToCUDAFP16(val))); +} +#else CUDA_ATOMIC_WRAPPER(Add, float16) { // concrete packed float16 value may exsits in lower or higher 16bits // of the 32bits address. @@ -133,6 +147,7 @@ CUDA_ATOMIC_WRAPPER(Add, float16) { } } #endif +#endif CUDA_ATOMIC_WRAPPER(Add, complex) { float *real = reinterpret_cast(address); -- GitLab