diff --git a/paddle/fluid/platform/cuda_primitives.h b/paddle/fluid/platform/cuda_primitives.h index f7c77071b122366ffc79cc5449815ad5937bea8f..a5dd19d4363d6a8fa99cf48ef2969186de605127 100644 --- a/paddle/fluid/platform/cuda_primitives.h +++ b/paddle/fluid/platform/cuda_primitives.h @@ -134,7 +134,26 @@ USE_CUDA_ATOMIC(Max, int); USE_CUDA_ATOMIC(Max, unsigned int); // CUDA API uses unsigned long long int, we cannot use uint64_t here. // It because unsigned long long int is not necessarily uint64_t +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 USE_CUDA_ATOMIC(Max, unsigned long long int); // NOLINT +#else +CUDA_ATOMIC_WRAPPER(Max, unsigned long long int) { + if (*address >= val) { + return; + } + + unsigned long long int old = *address, assumed; + + do { + assumed = old; + if (assumed >= val) { + break; + } + + old = atomicCAS(address, assumed, val); + } while (assumed != old); +} +#endif CUDA_ATOMIC_WRAPPER(Max, int64_t) { // Here, we check long long int must be int64_t. @@ -187,7 +206,26 @@ USE_CUDA_ATOMIC(Min, int); USE_CUDA_ATOMIC(Min, unsigned int); // CUDA API uses unsigned long long int, we cannot use uint64_t here. // It because unsigned long long int is not necessarily uint64_t +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 USE_CUDA_ATOMIC(Min, unsigned long long int); // NOLINT +#else +CUDA_ATOMIC_WRAPPER(Min, unsigned long long int) { + if (*address <= val) { + return; + } + + unsigned long long int old = *address, assumed; + + do { + assumed = old; + if (assumed <= val) { + break; + } + + old = atomicCAS(address, assumed, val); + } while (assumed != old); +} +#endif CUDA_ATOMIC_WRAPPER(Min, int64_t) { // Here, we check long long int must be int64_t.