From 43cee33a23cb1dce5501f0642a38f97dad8cea45 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Thu, 2 Aug 2018 15:38:44 +0800 Subject: [PATCH] add mkl packed gemm --- paddle/fluid/operators/math/blas.h | 37 +++++++++++++ paddle/fluid/operators/math/blas_impl.h | 73 +++++++++++++++++++++++++ paddle/fluid/platform/dynload/mklml.h | 8 +++ 3 files changed, 118 insertions(+) diff --git a/paddle/fluid/operators/math/blas.h b/paddle/fluid/operators/math/blas.h index 70f88f24f68..2470df9d783 100644 --- a/paddle/fluid/operators/math/blas.h +++ b/paddle/fluid/operators/math/blas.h @@ -90,6 +90,23 @@ class Blas { void GEMM(bool transA, bool transB, int M, int N, int K, T alpha, const T* A, int lda, const T* B, int ldb, T beta, T* C, int ldc) const; + template + T* GEMM_ALLOC(const CBLAS_IDENTIFIER id, const int M, const int N, + const int K) const; + + template + void GEMM_PACK(const CBLAS_IDENTIFIER id, const CBLAS_TRANSPOSE trans, int M, + int N, int K, const T alpha, const T* src, const int ld, + T* dst) const; + + template + void GEMM_COMPUTE(int transA, int transB, int M, int N, int K, const T* A, + const int lda, const T* B, const int ldb, T beta, T* C, + const int ldc) const; + + template + void GEMM_FREE(T* data) const; + template void MatMul(const framework::Tensor& mat_a, bool trans_a, const framework::Tensor& mat_b, bool trans_b, T alpha, @@ -146,6 +163,26 @@ class BlasT : private Blas { Base()->template GEMM(args...); } + template + T* GEMM_ALLOC(ARGS... args) const { + Base()->template GEMM_ALLOC(args...); + } + + template + void GEMM_PACK(ARGS... args) const { + Base()->template GEMM_PACK(args...); + } + + template + void GEMM_COMPUTE(ARGS... args) const { + Base()->template GEMM_COMPUTE(args...); + } + + template + void GEMM_FREE(ARGS... args) const { + Base()->template GEMM_FREE(args...); + } + template void MatMul(ARGS... args) const { Base()->template MatMul(args...); diff --git a/paddle/fluid/operators/math/blas_impl.h b/paddle/fluid/operators/math/blas_impl.h index a0802ef90ca..4164fe6229c 100644 --- a/paddle/fluid/operators/math/blas_impl.h +++ b/paddle/fluid/operators/math/blas_impl.h @@ -31,6 +31,26 @@ struct CBlas { platform::dynload::cblas_sgemm(args...); } + template + static float *GEMM_ALLOC(ARGS... args) { + return platform::dynload::cblas_sgemm_alloc(args...); + } + + template + static void GEMM_PACK(ARGS... args) { + platform::dynload::cblas_sgemm_pack(args...); + } + + template + static void GEMM_COMPUTE(ARGS... args) { + platform::dynload::cblas_sgemm_compute(args...); + } + + template + static void GEMM_FREE(ARGS... args) { + platform::dynload::cblas_sgemm_free(args...); + } + #ifdef PADDLE_WITH_LIBXSMM template static void SMM_GEMM(ARGS... args) { @@ -71,6 +91,26 @@ struct CBlas { platform::dynload::cblas_dgemm(args...); } + template + static double *GEMM_ALLOC(ARGS... args) { + return platform::dynload::cblas_dgemm_alloc(args...); + } + + template + static void GEMM_PACK(ARGS... args) { + platform::dynload::cblas_dgemm_pack(args...); + } + + template + static void GEMM_COMPUTE(ARGS... args) { + platform::dynload::cblas_dgemm_compute(args...); + } + + template + static void GEMM_FREE(ARGS... args) { + platform::dynload::cblas_dgemm_free(args...); + } + #ifdef PADDLE_WITH_LIBXSMM template static void SMM_GEMM(ARGS... args) { @@ -224,6 +264,39 @@ inline void GEMM_WARP(CBLAS_ORDER order, CBLAS_TRANSPOSE transA, beta, C, ldc); } +template <> +template +T *Blas::GEMM_ALLOC(const CBLAS_IDENTIFIER id, + const int M, const int N, + const int K) const { + return CBlas::GEMM_ALLOC(id, M, N, K); +} + +template <> +template +void Blas::GEMM_PACK(const CBLAS_IDENTIFIER id, + const CBLAS_TRANSPOSE trans, + int M, int N, int K, + const T alpha, const T *src, + const int ld, T *dst) const { + CBlas::GEMM_PACK(CblasRowMajor, id, trans, M, N, K, alpha, src, ld, dst); +} + +template <> +template +void Blas::GEMM_COMPUTE( + int transA, int transB, int M, int N, int K, const T *A, const int lda, + const T *B, const int ldb, T beta, T *C, const int ldc) const { + CBlas::GEMM_COMPUTE(CblasRowMajor, transA, transB, M, N, K, A, lda, B, ldb, + beta, C, ldc); +} + +template <> +template +void Blas::GEMM_FREE(T *data) const { + CBlas::GEMM_FREE(data); +} + template <> template void Blas::GEMM(CBLAS_TRANSPOSE transA, diff --git a/paddle/fluid/platform/dynload/mklml.h b/paddle/fluid/platform/dynload/mklml.h index 17acefe8cde..9e7a616094e 100644 --- a/paddle/fluid/platform/dynload/mklml.h +++ b/paddle/fluid/platform/dynload/mklml.h @@ -60,6 +60,14 @@ extern void* mklml_dso_handle; __macro(cblas_dgemm_batch); \ __macro(vsAdd); \ __macro(vdAdd); \ + __macro(cblas_sgemm_alloc); \ + __macro(cblas_sgemm_pack); \ + __macro(cblas_sgemm_compute); \ + __macro(cblas_sgemm_free); \ + __macro(cblas_dgemm_alloc); \ + __macro(cblas_dgemm_pack); \ + __macro(cblas_dgemm_compute); \ + __macro(cblas_dgemm_free); \ __macro(MKL_Set_Num_Threads) MKLML_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MKLML_WRAP); -- GitLab