From b4d653b08c397df5187425037036d2a8b3e65dca Mon Sep 17 00:00:00 2001
From: zhaojiaying01 <zhaojiaying01@baidu.com>
Date: Mon, 25 Jun 2018 16:20:42 +0800
Subject: [PATCH] add vector matrix multiplication in Gemm

---
 src/memory/t_malloc.cpp     | 6 +++---
 src/operators/math/gemm.cpp | 5 +----
 src/operators/math/gemm.h   | 2 +-
 3 files changed, 5 insertions(+), 8 deletions(-)
diff --git a/src/memory/t_malloc.cpp b/src/memory/t_malloc.cpp
index 92cd9ac036..4cb28d55d3 100644
--- a/src/memory/t_malloc.cpp
+++ b/src/memory/t_malloc.cpp
@@ -14,17 +14,17 @@ limitations under the License. */
 
 #pragma once
 
-#include "t_malloc.h"
+#include "memory/t_malloc.h"
 #include <cstdlib>
 #include <cstring>
 
 namespace paddle_mobile {
 namespace memory {
-const int MALLOC_ALIGN = 64;
+const int MALLOC_ALIGN = 16;
 
 void Copy(void *dst, const void *src, size_t num) {
   std::memcpy(dst, src, num);
-};
+}
 
 void *Alloc(size_t size) {
   size_t offset = sizeof(void *) + MALLOC_ALIGN - 1;
diff --git a/src/operators/math/gemm.cpp b/src/operators/math/gemm.cpp
index 7c42d6dce7..da3dacb58a 100644
--- a/src/operators/math/gemm.cpp
+++ b/src/operators/math/gemm.cpp
@@ -216,7 +216,7 @@ void InnerKernel_relu(int m, int n, int k, float alpha, const float *A, int lda,
   }
 }
 
-//计算一个更小的 4 * 4 的 C 矩阵分块
+// 计算一个更小的 4 * 4 的 C 矩阵分块
 #if defined(IOS)
 void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
                int ldb, float beta, float *C, int ldc, int mc, int nc) {
@@ -822,9 +822,6 @@ void VectorKernel(int m, int n, int k, float alpha, const float *A, int lda,
   int _nc1 = n % 16;
   int volatile nc2 = _nc1 / 4;
   int volatile nc3 = _nc1 % 4;
-  //  DLOG << "GEMM VECTOR kc1 = " << kc1 << ", kc2 = " << kc2;
-  //  DLOG << "GEMM VECTOR nc1 = " << nc1 << ", nc2 = " << nc2 << ", nc3 = " <<
-  //  nc3;
   for (int i = 0; i < kc1; i++) {
     a0 = A + i * 4;
     b0 = B + i * 4 * ldb;
diff --git a/src/operators/math/gemm.h b/src/operators/math/gemm.h
index b5351dd1e8..73d773987b 100644
--- a/src/operators/math/gemm.h
+++ b/src/operators/math/gemm.h
@@ -55,7 +55,7 @@ void InnerKernel(int m, int n, int k, float alpha, const float *A, int lda,
 
 // 向量矩阵乘法 (M = 1)
 void VectorKernel(int m, int n, int k, float alpha, const float *A, int lda,
-                   const float *B, int ldb, float beta, float *C, int ldc);
+                  const float *B, int ldb, float beta, float *C, int ldc);
 
 // 计算一个更小的 4 * 4 的 C 矩阵分块
 void AddDot4x4(int k, float alpha, const float *A, int lda, const float *B,
-- 
GitLab