未验证 提交 9862e591 编写于 作者: W WangLiu 提交者: GitHub

Merge pull request #574 from smilejames/develop

add openmp in gemm and test_mobilenet
......@@ -18,6 +18,9 @@ limitations under the License. */
#ifndef X86
#include <arm_neon.h>
#endif
#ifdef _OPENMP
#include <omp.h>
#endif
namespace paddle_mobile {
namespace operators {
......@@ -158,6 +161,7 @@ void PackMatrixB_(int k, int n, int n_tail, const float *B, int ldb,
// 分块矩阵乘法
void InnerKernel(int mc, int nc, float alpha, const float *a, const float *b,
float beta, float *c, float *C, int ldc, bool relu) {
#pragma omp parallel for
for (int j = 0; j < nc; j += NR) {
for (int i = 0; i < mc; i += MR) {
// AddDot4x4(KC, a + i * KC, b + j * KC, c + i * NC + j, NC);
......@@ -187,6 +191,7 @@ void InnerKernel(int mc, int nc, float alpha, const float *a, const float *b,
void InnerKernelWithBn(int mc, int nc, float alpha, const float *a,
const float *b, float beta, float *c, float *C, int ldc,
bool relu, float *new_scale, float *new_bias) {
#pragma omp parallel for
for (int j = 0; j < nc; j += NR) {
for (int i = 0; i < mc; i += MR) {
// AddDot4x4(KC, a + i * KC, b + j * KC, c + i * NC + j, NC);
......
......@@ -18,6 +18,7 @@ limitations under the License. */
int main() {
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
paddle_mobile.SetThreadNum(4);
auto time1 = time();
if (paddle_mobile.Load(g_mobilenet, true)) {
auto time2 = time();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册