From d21a05dc0d2ca3d8f87802d9ed546322ca7e07c0 Mon Sep 17 00:00:00 2001 From: TianXiaogang Date: Wed, 27 Nov 2019 21:23:39 +0800 Subject: [PATCH] fix winograd reinitwhenneed (#2511) * add winograd c4 implement (#2494) test=develop fix: fix conv_block prepack_input_nxwc4 bug * fix: optimize sgemm_c4 in armv7 change condition of choose winograd kernel * fix: change conv choose kernel condition test=develop --- lite/kernels/arm/conv_compute.cc | 4 ---- lite/kernels/arm/conv_winograd.cc | 8 ++++++++ lite/kernels/arm/conv_winograd.h | 1 + 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/lite/kernels/arm/conv_compute.cc b/lite/kernels/arm/conv_compute.cc index de1a9190df..8fed33bf69 100644 --- a/lite/kernels/arm/conv_compute.cc +++ b/lite/kernels/arm/conv_compute.cc @@ -68,10 +68,6 @@ void ConvCompute::PrepareForRun() { VLOG(3) << "invoking dw conv"; } else if (param.groups == 1 && kw == 3 && stride == 1 && kps_equal && no_dilation) { - int tile_block = 8; -#ifdef __aarch64__ - tile_block = 16; -#endif bool use_winograd = (threads == 1 && oc >= 4 && ic >= 4 && hout >= 6 && wout >= 6 && pads_equal) || diff --git a/lite/kernels/arm/conv_winograd.cc b/lite/kernels/arm/conv_winograd.cc index af1b3aeacf..d02cabf277 100644 --- a/lite/kernels/arm/conv_winograd.cc +++ b/lite/kernels/arm/conv_winograd.cc @@ -49,6 +49,10 @@ void WinogradConv::ReInitWhenNeeded() { int parallel_threads = (((ow + 5) / 6) * ((oh + 5) / 6) + tile_block - 1) / tile_block; if (threads <= 2 && parallel_threads >= threads) { + if (last_kernel_is_c4_ == 1) { + return; + } + last_kernel_is_c4_ = 1; auto pad = *(param.paddings); int pad_h = pad[0]; int pad_w = pad[2]; @@ -68,6 +72,10 @@ void WinogradConv::ReInitWhenNeeded() { weights_data_, param.filter->data(), ic, oc, trans_tmp_ptr); free(trans_tmp_ptr); } else { + if (last_kernel_is_c4_ == 0) { + return; + } + last_kernel_is_c4_ = 0; int tile_w = (ow + 5) / 6; int tile_h = (oh + 5) / 6; diff --git a/lite/kernels/arm/conv_winograd.h b/lite/kernels/arm/conv_winograd.h index 33f0edc017..40ea54b291 100644 --- a/lite/kernels/arm/conv_winograd.h +++ b/lite/kernels/arm/conv_winograd.h @@ -40,6 +40,7 @@ class WinogradConv : public KernelLite { Tensor weights_; DDim last_shape_; int workspace_size_{0}; + int last_kernel_is_c4_{-1}; }; } // namespace arm -- GitLab