提交 d6d85add 编写于 作者: L liaogang 提交者: Yu Yang

fix bug in findLastSet

* findLastSet function: size_t could be  uint, ulong, ulonglong
* add default cuda 7.0 in flags.cmake

Change-Id: Ica9d09520e6a1468e5fc97e2c29c1ea7c73dafbb
上级 f15aa809
...@@ -81,12 +81,8 @@ function(specify_cuda_arch cuda_version cuda_arch) ...@@ -81,12 +81,8 @@ function(specify_cuda_arch cuda_version cuda_arch)
list(APPEND __arch_flags " -gencode arch=compute_${cuda_arch},code=sm_${cuda_arch}") list(APPEND __arch_flags " -gencode arch=compute_${cuda_arch},code=sm_${cuda_arch}")
endif() endif()
endforeach() endforeach()
elseif(${cuda_version} VERSION_GREATER "7.0") elseif(${cuda_version} VERSION_GREATER "7.0" and ${cuda_arch} STREQUAL "53")
foreach(capability 52 53) list(APPEND __arch_flags " -gencode arch=compute_${cuda_arch},code=sm_${cuda_arch}")
if(${cuda_arch} STREQUAL ${capability})
list(APPEND __arch_flags " -gencode arch=compute_${cuda_arch},code=sm_${cuda_arch}")
endif()
endforeach()
endif() endif()
endfunction() endfunction()
...@@ -95,6 +91,10 @@ foreach(capability 30 35 50) ...@@ -95,6 +91,10 @@ foreach(capability 30 35 50)
list(APPEND __arch_flags " -gencode arch=compute_${capability},code=sm_${capability}") list(APPEND __arch_flags " -gencode arch=compute_${capability},code=sm_${capability}")
endforeach() endforeach()
if (CUDA_VERSION VERSION_GREATER "7.0")
list(APPEND __arch_flags " -gencode arch=compute_52,code=sm_52")
endif()
# Modern gpu architectures: Pascal # Modern gpu architectures: Pascal
if (CUDA_VERSION VERSION_GREATER "8.0") if (CUDA_VERSION VERSION_GREATER "8.0")
list(APPEND __arch_flags " -gencode arch=compute_60,code=sm_60") list(APPEND __arch_flags " -gencode arch=compute_60,code=sm_60")
......
...@@ -72,7 +72,11 @@ namespace paddle { ...@@ -72,7 +72,11 @@ namespace paddle {
* \f] * \f]
*/ */
inline constexpr size_t findLastSet(size_t x) { inline constexpr size_t findLastSet(size_t x) {
return x ? 8 * sizeof(unsigned long) - __builtin_clzl(x) : 0; // NOLINT return std::is_same<size_t , unsigned int>::value ?
(x ? 8 * sizeof(x) - __builtin_clz(x) : 0)
: (std::is_same<size_t , unsigned long>::value ? // NOLINT
(x ? 8 * sizeof(x) - __builtin_clzl(x) : 0)
: (x ? 8 * sizeof(x) - __builtin_clzll(x) : 0));
} }
/** /**
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册