提交 15f9f705 编写于 作者: Z zhaojiaying01

fix gemm bug

上级 ea621305
...@@ -25,9 +25,10 @@ endif() ...@@ -25,9 +25,10 @@ endif()
set(CMAKE_CXX_FLAGS "-std=c++14 -O3 -s ${CMAKE_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "-std=c++14 -O3 -s ${CMAKE_CXX_FLAGS}")
if (DEBUGING) if (DEBUGING)
set(CMAKE_BUILD_TYPE Debug) set(CMAKE_BUILD_TYPE Debug)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS}") set(CMAKE_CXX_FLAGS_DEBUG "-O3 -DNDEBUG")
else() else()
set(CMAKE_BUILD_TYPE Release) set(CMAKE_BUILD_TYPE Release)
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
endif () endif ()
if(DEBUGING) if(DEBUGING)
......
...@@ -375,12 +375,15 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b, ...@@ -375,12 +375,15 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
"subs %[kc2], %[kc2], #1 \n\t" "subs %[kc2], %[kc2], #1 \n\t"
"blt end_kc2_%= \n\t" "blt end_kc2_%= \n\t"
"loop_kc2_%=: \n\t"
"vld1.32 {q0}, [%[a]]! \n\t" "vld1.32 {q0}, [%[a]]! \n\t"
"vld1.32 {q1}, [%[b]]! \n\t" "vld1.32 {q1}, [%[b]]! \n\t"
"vmla.f32 q10, q1, d0[0] \n\t" "vmla.f32 q10, q1, d0[0] \n\t"
"vmla.f32 q11, q1, d0[1] \n\t" "vmla.f32 q11, q1, d0[1] \n\t"
"vmla.f32 q12, q1, d1[0] \n\t" "vmla.f32 q12, q1, d1[0] \n\t"
"vmla.f32 q13, q1, d1[1] \n\t" "vmla.f32 q13, q1, d1[1] \n\t"
"subs %[kc2], %[kc2], #1 \n\t"
"bge loop_kc2_%= \n\t"
"end_kc2_%=: \n\t" "end_kc2_%=: \n\t"
"cmp %[mc], #4 \n\t" "cmp %[mc], #4 \n\t"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册