diff --git a/CMakeLists.txt b/CMakeLists.txt index 39f876bc9ee4b34ef512cfaaf5aae7752920c33f..d7e7e49e9a038acc6ca272433cd39b08c812eccc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -109,11 +109,9 @@ else() set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-g -O3 --use_fast_math") if(WITH_AVX) - if(AVX_FOUND) - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler -mavx") - endif(AVX_FOUND) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${AVX_FLAG}") else(WITH_AVX) - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler -msse3") + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SSE3_FLAG}") endif(WITH_AVX) if(WITH_DSO) @@ -138,11 +136,11 @@ if(NOT WITH_TIMER) endif(NOT WITH_TIMER) if(WITH_AVX) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAG}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAG}") else(WITH_AVX) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SSE3_FLAG}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SSE3_FLAG}") endif(WITH_AVX) if(WITH_PYTHON) diff --git a/cmake/FindAVX.cmake b/cmake/FindAVX.cmake index f6103c6e667e8a8f6b8998d8eb467235fb49cb19..d380c996dfa95f0caa2b9cd9daa0ac9141e51fe0 100644 --- a/cmake/FindAVX.cmake +++ b/cmake/FindAVX.cmake @@ -3,36 +3,55 @@ INCLUDE(CheckCXXSourceRuns) -SET(FIND_AVX_10) -SET(FIND_AVX_20) -SET(AVX_FLAGS) -SET(AVX_FOUND) - -# Check AVX 2 -SET(CMAKE_REQUIRED_FLAGS) IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - SET(CMAKE_REQUIRED_FLAGS "-mavx2") -ELSEIF(MSVC AND NOT CMAKE_CL_64) # reserve for WINDOWS - SET(CMAKE_REQUIRED_FLAGS "/arch:AVX2") + set(MMX_FLAG "-mmmx") + set(SSE2_FLAG "-msse2") + set(SSE3_FLAG "-msse3") + SET(AVX_FLAG "-mavx") + SET(AVX2_FLAG "-mavx2") +ELSEIF(MSVC) + set(MMX_FLAG "/arch:MMX") + set(SSE2_FLAG "/arch:SSE2") + set(SSE3_FLAG "/arch:SSE3") + SET(AVX_FLAG "/arch:AVX") + SET(AVX2_FLAG "/arch:AVX2") ENDIF() +# Check MMX +set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG}) CHECK_CXX_SOURCE_RUNS(" -#include +#include int main() { - __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4); - __m256i result = _mm256_abs_epi32 (a); + _mm_setzero_si64(); return 0; -}" FIND_AVX_20) +}" MMX_FOUND) -# Check AVX -SET(CMAKE_REQUIRED_FLAGS) -IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - SET(CMAKE_REQUIRED_FLAGS "-mavx") -ELSEIF(MSVC AND NOT CMAKE_CL_64) - SET(CMAKE_REQUIRED_FLAGS "/arch:AVX") -endif() +# Check SSE2 +set(CMAKE_REQUIRED_FLAGS ${SSE2_FLAG}) +CHECK_CXX_SOURCE_RUNS(" +#include +int main() +{ + _mm_setzero_si128(); + return 0; +}" SSE2_FOUND) +# Check SSE3 +set(CMAKE_REQUIRED_FLAGS ${SSE3_FLAG}) +CHECK_CXX_SOURCE_RUNS(" +#include +int main() +{ + __m128d a = _mm_set1_pd(6.28); + __m128d b = _mm_set1_pd(3.14); + __m128d result = _mm_addsub_pd(a, b); + result = _mm_movedup_pd(result); + return 0; +}" SSE3_FOUND) + +# Check AVX +set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG}) CHECK_CXX_SOURCE_RUNS(" #include int main() @@ -41,25 +60,17 @@ int main() __m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); __m256 result = _mm256_add_ps (a, b); return 0; -}" FIND_AVX_10) - -IF(${FIND_AVX_20}) - IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - SET(AVX_FLAGS "${AVX_FLAGS} -mavx2") - ELSEIF(MSVC) - SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX2") - ENDIF() -ENDIF() +}" AVX_FOUND) -IF(${FIND_AVX_10}) - IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - SET(AVX_FLAGS "${AVX_FLAGS} -mavx") - ELSEIF(MSVC) - SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX") - ENDIF() -ENDIF() +# Check AVX 2 +set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG}) +CHECK_CXX_SOURCE_RUNS(" +#include +int main() +{ + __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4); + __m256i result = _mm256_abs_epi32 (a); + return 0; +}" AVX2_FOUND) -IF(${FIND_AVX_10}) - SET(AVX_FOUND TRUE) - MESSAGE(STATUS "Find CPU supports ${AVX_FLAGS}.") -ENDIF() +mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND)