diff --git a/lite/backends/arm/math/packed_sgemm.cc b/lite/backends/arm/math/packed_sgemm.cc index 2e869f2df3a292b264dae948f13c64e05854d052..121b85b6a8a26ab15190b4cacb4ae4ace0818960 100644 --- a/lite/backends/arm/math/packed_sgemm.cc +++ b/lite/backends/arm/math/packed_sgemm.cc @@ -323,7 +323,7 @@ void sgemm_prepack(bool is_transB, (has_act == true && act_type == lite_api::ActivationType::kRelu); bool has_beta = fabsf(beta) > 1e-8f ? true : false; bool a53_sgemm = act_flag && !has_beta; - if (a53_sgemm) { + if (a53_sgemm) {//无act 无 beta sgemm_prepacked_6x8_a53(is_transB, M, N, @@ -2368,16 +2368,19 @@ void sgemm_prepacked_8x12(bool is_transB, //! MBLOCK * x (result) + MBLOCK * k (A) + x * k (B) = l2 int x_block = (l2_cache - (MBLOCK * K)) / (sizeof(float) * (K + MBLOCK)); x_block /= NBLOCK; - x_block *= NBLOCK; - int x_num = (N + (x_block - 1)) / x_block; - x_block = (N + x_num - 1) / x_num; - x_block = (x_block + NBLOCK - 1) / NBLOCK; - x_block *= NBLOCK; - x_block = x_block < NBLOCK ? NBLOCK : x_block; - + x_block *= NBLOCK;//一次可以放多少个B的列 为NBLOCK的整数倍 + int x_num = (N + (x_block - 1)) / x_block; //可以分x_num 进行计算, 一次放x_block列,可以分x_num计算完成。 + LOG(INFO) << "x_block:"< N) { xmax = N; } - int bblocks = (xmax - x0 + NBLOCK - 1) / NBLOCK; - remain = xmax - x0 - (bblocks - 1) * NBLOCK; + int bblocks = (xmax - x0 + NBLOCK - 1) / NBLOCK;//B 有多少个NBLOCK + remain = xmax - x0 - (bblocks - 1) * NBLOCK;//不够NBLOCK,的余数 if (remain > 0) { flag_p_remain = true; } @@ -2402,7 +2405,7 @@ void sgemm_prepacked_8x12(bool is_transB, } else { loadb(b_pannel, B, ldb, 0, K, x0, xmax); } -#pragma omp parallel for num_threads(threads) +#pragma omp parallel for num_threads(threads)//在A的M方向,按照MBLOCK进行MP for (unsigned int y = 0; y < M; y += MBLOCK) { unsigned int ymax = y + MBLOCK; if (ymax > M) { @@ -2421,7 +2424,7 @@ void sgemm_prepacked_8x12(bool is_transB, bias_local[7] = bias[y + 7]; } - float cout0[NBLOCK]; + float cout0[NBLOCK];//C 输出 8*12 float cout1[NBLOCK]; float cout2[NBLOCK]; float cout3[NBLOCK];