diff --git a/lite/backends/arm/math/packed_sgemm.cc b/lite/backends/arm/math/packed_sgemm.cc old mode 100644 new mode 100755 index 121b85b6a8a26ab15190b4cacb4ae4ace0818960..2e869f2df3a292b264dae948f13c64e05854d052 --- a/lite/backends/arm/math/packed_sgemm.cc +++ b/lite/backends/arm/math/packed_sgemm.cc @@ -323,7 +323,7 @@ void sgemm_prepack(bool is_transB, (has_act == true && act_type == lite_api::ActivationType::kRelu); bool has_beta = fabsf(beta) > 1e-8f ? true : false; bool a53_sgemm = act_flag && !has_beta; - if (a53_sgemm) {//无act 无 beta + if (a53_sgemm) { sgemm_prepacked_6x8_a53(is_transB, M, N, @@ -2368,19 +2368,16 @@ void sgemm_prepacked_8x12(bool is_transB, //! MBLOCK * x (result) + MBLOCK * k (A) + x * k (B) = l2 int x_block = (l2_cache - (MBLOCK * K)) / (sizeof(float) * (K + MBLOCK)); x_block /= NBLOCK; - x_block *= NBLOCK;//一次可以放多少个B的列 为NBLOCK的整数倍 - int x_num = (N + (x_block - 1)) / x_block; //可以分x_num 进行计算, 一次放x_block列,可以分x_num计算完成。 - LOG(INFO) << "x_block:"< N) { xmax = N; } - int bblocks = (xmax - x0 + NBLOCK - 1) / NBLOCK;//B 有多少个NBLOCK - remain = xmax - x0 - (bblocks - 1) * NBLOCK;//不够NBLOCK,的余数 + int bblocks = (xmax - x0 + NBLOCK - 1) / NBLOCK; + remain = xmax - x0 - (bblocks - 1) * NBLOCK; if (remain > 0) { flag_p_remain = true; } @@ -2405,7 +2402,7 @@ void sgemm_prepacked_8x12(bool is_transB, } else { loadb(b_pannel, B, ldb, 0, K, x0, xmax); } -#pragma omp parallel for num_threads(threads)//在A的M方向,按照MBLOCK进行MP +#pragma omp parallel for num_threads(threads) for (unsigned int y = 0; y < M; y += MBLOCK) { unsigned int ymax = y + MBLOCK; if (ymax > M) { @@ -2424,7 +2421,7 @@ void sgemm_prepacked_8x12(bool is_transB, bias_local[7] = bias[y + 7]; } - float cout0[NBLOCK];//C 输出 8*12 + float cout0[NBLOCK]; float cout1[NBLOCK]; float cout2[NBLOCK]; float cout3[NBLOCK];