diff --git a/cmake/simd.cmake b/cmake/simd.cmake index 4926fb991332bc3b30b01caf3bff08037769e58a..86096d4feaace040da416a01872882456c4098fc 100644 --- a/cmake/simd.cmake +++ b/cmake/simd.cmake @@ -57,21 +57,21 @@ int main() return 0; }" SSE3_FOUND) -# Check AVX -set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG}) -set(AVX_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) -CHECK_CXX_SOURCE_RUNS(" -#include -int main() -{ - __m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f); - __m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); - __m256 result = _mm256_add_ps (a, b); - return 0; -}" AVX_FOUND) - -# disable AVX2 by default on windows +# disable AVX by default on windows if(NOT WIN32) + # Check AVX + set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG}) + set(AVX_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) + CHECK_CXX_SOURCE_RUNS(" + #include + int main() + { + __m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f); + __m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); + __m256 result = _mm256_add_ps (a, b); + return 0; + }" AVX_FOUND) + # Check AVX 2 set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG}) set(AVX2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) @@ -83,20 +83,20 @@ if(NOT WIN32) __m256i result = _mm256_abs_epi32 (a); return 0; }" AVX2_FOUND) -endif(NOT WIN32) -# Check AVX512F -set(CMAKE_REQUIRED_FLAGS ${AVX512F_FLAG}) -set(AVX512F_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) -CHECK_CXX_SOURCE_RUNS(" -#include -int main() -{ - __m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4, - 13, -5, 6, -7, 9, 2, -6, 3); - __m512i result = _mm512_abs_epi32 (a); - return 0; -}" AVX512F_FOUND) + # Check AVX512F + set(CMAKE_REQUIRED_FLAGS ${AVX512F_FLAG}) + set(AVX512F_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) + CHECK_CXX_SOURCE_RUNS(" + #include + int main() + { + __m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4, + 13, -5, 6, -7, 9, 2, -6, 3); + __m512i result = _mm512_abs_epi32 (a); + return 0; + }" AVX512F_FOUND) +endif(NOT WIN32) set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_RETAINED}) mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND AVX512F_FOUND) diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 9b1b272292e6ac0bcde8c02fe321f15e69c2800b..60a42cf56817f2519dee2fbcaefec23fd0dd8c4f 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -49,11 +49,12 @@ endif() set(COMMON_OP_DEPS "") -set(COMMON_OP_DEPS ${COMMON_OP_DEPS} xxhash selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor sequence_padding sequence_scale cos_sim_functor memory jit_kernel concat_and_split cross_entropy softmax vol2col im2col sampler) -set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions) +set(COMMON_OP_DEPS ${COMMON_OP_DEPS} xxhash selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor) if (NOT WIN32) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc) endif() +set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel concat_and_split cross_entropy softmax vol2col im2col sampler) +set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions) if (WITH_GPU) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} depthwise_conv cub) endif()