From 9f47487ccadb18d62596df2e46800c7c34f2715c Mon Sep 17 00:00:00 2001 From: zhaojiaying01 Date: Wed, 27 Jun 2018 00:26:50 +0800 Subject: [PATCH] fix gemm bug --- CMakeLists.txt | 3 ++- src/operators/math/gemm.cpp | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c3ff01e5d..f2e3a2a4e9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,9 +25,10 @@ endif() set(CMAKE_CXX_FLAGS "-std=c++14 -O3 -s ${CMAKE_CXX_FLAGS}") if (DEBUGING) set(CMAKE_BUILD_TYPE Debug) - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS_DEBUG "-O3 -DNDEBUG") else() set(CMAKE_BUILD_TYPE Release) + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") endif () if(DEBUGING) diff --git a/src/operators/math/gemm.cpp b/src/operators/math/gemm.cpp index da3dacb58a..7ade30600a 100644 --- a/src/operators/math/gemm.cpp +++ b/src/operators/math/gemm.cpp @@ -375,12 +375,15 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b, "subs %[kc2], %[kc2], #1 \n\t" "blt end_kc2_%= \n\t" + "loop_kc2_%=: \n\t" "vld1.32 {q0}, [%[a]]! \n\t" "vld1.32 {q1}, [%[b]]! \n\t" "vmla.f32 q10, q1, d0[0] \n\t" "vmla.f32 q11, q1, d0[1] \n\t" "vmla.f32 q12, q1, d1[0] \n\t" "vmla.f32 q13, q1, d1[1] \n\t" + "subs %[kc2], %[kc2], #1 \n\t" + "bge loop_kc2_%= \n\t" "end_kc2_%=: \n\t" "cmp %[mc], #4 \n\t" -- GitLab