diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c3ff01e5d3e20923021904cdbe9008a11cc30ce..f2e3a2a4e974dcdb0431bced75a35305d4fbddfa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,9 +25,10 @@ endif() set(CMAKE_CXX_FLAGS "-std=c++14 -O3 -s ${CMAKE_CXX_FLAGS}") if (DEBUGING) set(CMAKE_BUILD_TYPE Debug) - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS_DEBUG "-O3 -DNDEBUG") else() set(CMAKE_BUILD_TYPE Release) + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") endif () if(DEBUGING) diff --git a/src/operators/math/gemm.cpp b/src/operators/math/gemm.cpp index da3dacb58a72d779d2ccd1224bbf4eab12dfbb91..7ade30600aa47feaf054d0bef043cae3c1fdd1e4 100644 --- a/src/operators/math/gemm.cpp +++ b/src/operators/math/gemm.cpp @@ -375,12 +375,15 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b, "subs %[kc2], %[kc2], #1 \n\t" "blt end_kc2_%= \n\t" + "loop_kc2_%=: \n\t" "vld1.32 {q0}, [%[a]]! \n\t" "vld1.32 {q1}, [%[b]]! \n\t" "vmla.f32 q10, q1, d0[0] \n\t" "vmla.f32 q11, q1, d0[1] \n\t" "vmla.f32 q12, q1, d1[0] \n\t" "vmla.f32 q13, q1, d1[1] \n\t" + "subs %[kc2], %[kc2], #1 \n\t" + "bge loop_kc2_%= \n\t" "end_kc2_%=: \n\t" "cmp %[mc], #4 \n\t"