提交 3c5da142 编写于 作者: L Liangliang He

Merge branch 'gemm-doc' into 'master'

Add gemm comments

See merge request !689
...@@ -18,6 +18,17 @@ ...@@ -18,6 +18,17 @@
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/kernels/gemm.h" #include "mace/kernels/gemm.h"
/**
* Gemm does fast matrix multiplications with batch.
* It is optimized for arm64-v8 and armeabi-v7a using neon.
*
* We adopt two-level tiling to make better use of l1 cache and register.
* For register tiling, function like GemmXYZ computes gemm for
* matrix[X, Y] * matrix[Y, Z] with all data being able to fit in register.
* For cache tiling, we try to compute one block of multiplication with
* two input matrices and one output matrix fit in l1 cache.
*/
#if defined(MACE_ENABLE_NEON) #if defined(MACE_ENABLE_NEON)
#include <arm_neon.h> #include <arm_neon.h>
#endif #endif
......
...@@ -21,9 +21,15 @@ ...@@ -21,9 +21,15 @@
#include "mace/core/types.h" #include "mace/core/types.h"
// Gemm function does fast matrix-matrix multiplications with batch.
// Gemv function does fast matrix-vector multiplications with batch.
namespace mace { namespace mace {
namespace kernels { namespace kernels {
// Gemm calculates A[batch, height, K] dot B[batch, K, width] within each batch,
// and output to C[batch, height, width].
// height, K, width correspond to matrix dimension size after transpose (if any)
void Gemm(const float *A, void Gemm(const float *A,
const float *B, const float *B,
const index_t batch, const index_t batch,
...@@ -44,6 +50,8 @@ void GemmRef(const float *A, ...@@ -44,6 +50,8 @@ void GemmRef(const float *A,
const bool transpose_a = false, const bool transpose_a = false,
const bool transpose_b = false); const bool transpose_b = false);
// Gemm calculates M[height, width] dot V[batch, height] within each batch of V,
// and output to out[batch, width].
void Gemv(const float *m_ptr, void Gemv(const float *m_ptr,
const float *v_ptr, const float *v_ptr,
const index_t batch, const index_t batch,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册