提交 d6d85add 编写于 作者: L liaogang 提交者: Yu Yang

fix bug in findLastSet

* findLastSet function: size_t could be  uint, ulong, ulonglong
* add default cuda 7.0 in flags.cmake

Change-Id: Ica9d09520e6a1468e5fc97e2c29c1ea7c73dafbb
上级 f15aa809
......@@ -81,13 +81,9 @@ function(specify_cuda_arch cuda_version cuda_arch)
list(APPEND __arch_flags " -gencode arch=compute_${cuda_arch},code=sm_${cuda_arch}")
endif()
endforeach()
elseif(${cuda_version} VERSION_GREATER "7.0")
foreach(capability 52 53)
if(${cuda_arch} STREQUAL ${capability})
elseif(${cuda_version} VERSION_GREATER "7.0" and ${cuda_arch} STREQUAL "53")
list(APPEND __arch_flags " -gencode arch=compute_${cuda_arch},code=sm_${cuda_arch}")
endif()
endforeach()
endif()
endfunction()
# Common gpu architectures: Kepler, Maxwell
......@@ -95,6 +91,10 @@ foreach(capability 30 35 50)
list(APPEND __arch_flags " -gencode arch=compute_${capability},code=sm_${capability}")
endforeach()
if (CUDA_VERSION VERSION_GREATER "7.0")
list(APPEND __arch_flags " -gencode arch=compute_52,code=sm_52")
endif()
# Modern gpu architectures: Pascal
if (CUDA_VERSION VERSION_GREATER "8.0")
list(APPEND __arch_flags " -gencode arch=compute_60,code=sm_60")
......
......@@ -72,7 +72,11 @@ namespace paddle {
* \f]
*/
inline constexpr size_t findLastSet(size_t x) {
return x ? 8 * sizeof(unsigned long) - __builtin_clzl(x) : 0; // NOLINT
return std::is_same<size_t , unsigned int>::value ?
(x ? 8 * sizeof(x) - __builtin_clz(x) : 0)
: (std::is_same<size_t , unsigned long>::value ? // NOLINT
(x ? 8 * sizeof(x) - __builtin_clzl(x) : 0)
: (x ? 8 * sizeof(x) - __builtin_clzll(x) : 0));
}
/**
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册