未验证 提交 597345d1 编写于 作者: Z Zhong Hui 提交者: GitHub

fix cuda atomic for ARCH<350 for the automic_max

fix cuda atomic for ARCH<350 for the automic_max
上级 dd04b160
...@@ -134,7 +134,26 @@ USE_CUDA_ATOMIC(Max, int); ...@@ -134,7 +134,26 @@ USE_CUDA_ATOMIC(Max, int);
USE_CUDA_ATOMIC(Max, unsigned int); USE_CUDA_ATOMIC(Max, unsigned int);
// CUDA API uses unsigned long long int, we cannot use uint64_t here. // CUDA API uses unsigned long long int, we cannot use uint64_t here.
// It because unsigned long long int is not necessarily uint64_t // It because unsigned long long int is not necessarily uint64_t
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
USE_CUDA_ATOMIC(Max, unsigned long long int); // NOLINT USE_CUDA_ATOMIC(Max, unsigned long long int); // NOLINT
#else
CUDA_ATOMIC_WRAPPER(Max, unsigned long long int) {
if (*address >= val) {
return;
}
unsigned long long int old = *address, assumed;
do {
assumed = old;
if (assumed >= val) {
break;
}
old = atomicCAS(address, assumed, val);
} while (assumed != old);
}
#endif
CUDA_ATOMIC_WRAPPER(Max, int64_t) { CUDA_ATOMIC_WRAPPER(Max, int64_t) {
// Here, we check long long int must be int64_t. // Here, we check long long int must be int64_t.
...@@ -187,7 +206,26 @@ USE_CUDA_ATOMIC(Min, int); ...@@ -187,7 +206,26 @@ USE_CUDA_ATOMIC(Min, int);
USE_CUDA_ATOMIC(Min, unsigned int); USE_CUDA_ATOMIC(Min, unsigned int);
// CUDA API uses unsigned long long int, we cannot use uint64_t here. // CUDA API uses unsigned long long int, we cannot use uint64_t here.
// It because unsigned long long int is not necessarily uint64_t // It because unsigned long long int is not necessarily uint64_t
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
USE_CUDA_ATOMIC(Min, unsigned long long int); // NOLINT USE_CUDA_ATOMIC(Min, unsigned long long int); // NOLINT
#else
CUDA_ATOMIC_WRAPPER(Min, unsigned long long int) {
if (*address <= val) {
return;
}
unsigned long long int old = *address, assumed;
do {
assumed = old;
if (assumed <= val) {
break;
}
old = atomicCAS(address, assumed, val);
} while (assumed != old);
}
#endif
CUDA_ATOMIC_WRAPPER(Min, int64_t) { CUDA_ATOMIC_WRAPPER(Min, int64_t) {
// Here, we check long long int must be int64_t. // Here, we check long long int must be int64_t.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册