From d6d85add20861082530d396cacdd849f09085e48 Mon Sep 17 00:00:00 2001 From: liaogang Date: Mon, 5 Sep 2016 20:48:51 +0800 Subject: [PATCH] fix bug in findLastSet * findLastSet function: size_t could be uint, ulong, ulonglong * add default cuda 7.0 in flags.cmake Change-Id: Ica9d09520e6a1468e5fc97e2c29c1ea7c73dafbb --- cmake/flags.cmake | 12 ++++++------ paddle/utils/Util.h | 6 +++++- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 5c386d442e..4b99e7f7fb 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -81,12 +81,8 @@ function(specify_cuda_arch cuda_version cuda_arch) list(APPEND __arch_flags " -gencode arch=compute_${cuda_arch},code=sm_${cuda_arch}") endif() endforeach() - elseif(${cuda_version} VERSION_GREATER "7.0") - foreach(capability 52 53) - if(${cuda_arch} STREQUAL ${capability}) - list(APPEND __arch_flags " -gencode arch=compute_${cuda_arch},code=sm_${cuda_arch}") - endif() - endforeach() + elseif(${cuda_version} VERSION_GREATER "7.0" and ${cuda_arch} STREQUAL "53") + list(APPEND __arch_flags " -gencode arch=compute_${cuda_arch},code=sm_${cuda_arch}") endif() endfunction() @@ -95,6 +91,10 @@ foreach(capability 30 35 50) list(APPEND __arch_flags " -gencode arch=compute_${capability},code=sm_${capability}") endforeach() +if (CUDA_VERSION VERSION_GREATER "7.0") + list(APPEND __arch_flags " -gencode arch=compute_52,code=sm_52") +endif() + # Modern gpu architectures: Pascal if (CUDA_VERSION VERSION_GREATER "8.0") list(APPEND __arch_flags " -gencode arch=compute_60,code=sm_60") diff --git a/paddle/utils/Util.h b/paddle/utils/Util.h index 7d43713d5f..11a03e141d 100644 --- a/paddle/utils/Util.h +++ b/paddle/utils/Util.h @@ -72,7 +72,11 @@ namespace paddle { * \f] */ inline constexpr size_t findLastSet(size_t x) { - return x ? 8 * sizeof(unsigned long) - __builtin_clzl(x) : 0; // NOLINT + return std::is_same::value ? + (x ? 8 * sizeof(x) - __builtin_clz(x) : 0) + : (std::is_same::value ? // NOLINT + (x ? 8 * sizeof(x) - __builtin_clzl(x) : 0) + : (x ? 8 * sizeof(x) - __builtin_clzll(x) : 0)); } /** -- GitLab