diff --git a/dnn/src/arm_common/conv_bias/fp32/f32_direct_nchw44_algo.cpp b/dnn/src/arm_common/conv_bias/fp32/f32_direct_nchw44_algo.cpp index a024525e75cdb59123ad62cbea2218b84971a5f6..b1ee38aa97e08bbea899aa1b1605b1672dbd13e5 100644 --- a/dnn/src/arm_common/conv_bias/fp32/f32_direct_nchw44_algo.cpp +++ b/dnn/src/arm_common/conv_bias/fp32/f32_direct_nchw44_algo.cpp @@ -37,19 +37,20 @@ static inline size_t get_perthread_cache_bytes(const int ic, const int ih2, static void get_rectified_size( const megdnn::fallback::ConvBiasImpl::NCBKernSizeParam& param, int& ih2, int& iw2, int& oh2, int& ow2) { + constexpr int cacheline = 64 / sizeof(float); int ic = param.filter_meta.icpg; int iw = param.isz[1]; int oh = param.osz[0]; int ow = param.osz[1]; - - oh2 = oh; - ow2 = ow; - constexpr int cacheline = 64 / sizeof(float); - int block_oh = - l2_block_helper(param.nr_threads, oh, ic * iw * sizeof(float) * 2); auto&& fm = param.filter_meta; const int stride_h = static_cast(fm.stride[0]); const int filter_h = static_cast(fm.spatial[0]); + + oh2 = oh; + ow2 = ow; + + int block_oh = l2_block_helper(param.nr_threads, oh, + ic * iw * sizeof(float) * stride_h); ih2 = block_oh * stride_h + filter_h - stride_h; iw2 = round_up(iw + 2 * static_cast(fm.padding[1]), cacheline); }