diff --git a/src/operators/math/gemm.cpp b/src/operators/math/gemm.cpp index d7bc60e3c85f5ce5b5165d4d37caf397651eeccd..bb91adcc4db412db137fdc12831bad75e069e38c 100644 --- a/src/operators/math/gemm.cpp +++ b/src/operators/math/gemm.cpp @@ -18,6 +18,9 @@ limitations under the License. */ #ifndef X86 #include #endif +#ifdef _OPENMP +#include +#endif namespace paddle_mobile { namespace operators { @@ -158,6 +161,7 @@ void PackMatrixB_(int k, int n, int n_tail, const float *B, int ldb, // 分块矩阵乘法 void InnerKernel(int mc, int nc, float alpha, const float *a, const float *b, float beta, float *c, float *C, int ldc, bool relu) { +#pragma omp parallel for for (int j = 0; j < nc; j += NR) { for (int i = 0; i < mc; i += MR) { // AddDot4x4(KC, a + i * KC, b + j * KC, c + i * NC + j, NC); @@ -187,6 +191,7 @@ void InnerKernel(int mc, int nc, float alpha, const float *a, const float *b, void InnerKernelWithBn(int mc, int nc, float alpha, const float *a, const float *b, float beta, float *c, float *C, int ldc, bool relu, float *new_scale, float *new_bias) { +#pragma omp parallel for for (int j = 0; j < nc; j += NR) { for (int i = 0; i < mc; i += MR) { // AddDot4x4(KC, a + i * KC, b + j * KC, c + i * NC + j, NC); diff --git a/test/net/test_mobilenet.cpp b/test/net/test_mobilenet.cpp index 1f38dc5d19d0e7bb54faf75a41419941e8b1f412..2e285695fb79f3ed5471a653c71a10b36ef4e7f2 100644 --- a/test/net/test_mobilenet.cpp +++ b/test/net/test_mobilenet.cpp @@ -18,6 +18,7 @@ limitations under the License. */ int main() { paddle_mobile::PaddleMobile paddle_mobile; + paddle_mobile.SetThreadNum(4); auto time1 = time(); if (paddle_mobile.Load(g_mobilenet, true)) { auto time2 = time();