From 9ed3882a94ff382e50a49e4dda574755e6c1996e Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Fri, 5 Jun 2020 15:39:09 +0800 Subject: [PATCH] fix(opr/dnn): fix winograd fast run mismatch GitOrigin-RevId: d308085b9fe16f8aae874346a08f55428a85bb76 --- dnn/include/megdnn/oprs/nn.h | 6 ++ dnn/src/arm_common/conv_bias/int8/algos.cpp | 5 +- dnn/src/common/conv_bias.cpp | 99 +++++++++++++++++++++ src/gopt/impl/weights_preprocess.cpp | 12 ++- src/opr/impl/dnn/convolution.cpp | 45 +++++++++- 5 files changed, 154 insertions(+), 13 deletions(-) diff --git a/dnn/include/megdnn/oprs/nn.h b/dnn/include/megdnn/oprs/nn.h index e3c6e0461..d740929fd 100644 --- a/dnn/include/megdnn/oprs/nn.h +++ b/dnn/include/megdnn/oprs/nn.h @@ -351,6 +351,12 @@ public: const TensorLayout& bias, const TensorLayout& z, const TensorLayout& dst) = 0; + static void deduce_winograd_origin_layout_and_param( + const Param::Format format, const size_t output_block_size, + const TensorLayout& src_layout, + const TensorLayout& winograd_filter_layout, + TensorLayout& origin_layout, Param& origin_param); + enum class BiasMode : uint32_t { NO_BIAS = 0, //!< no bias BROADCAST_CHANNEL_BIAS, //!< broadcast channel bias, [1, c, 1, 1] diff --git a/dnn/src/arm_common/conv_bias/int8/algos.cpp b/dnn/src/arm_common/conv_bias/int8/algos.cpp index 348d7faee..8ef822292 100644 --- a/dnn/src/arm_common/conv_bias/int8/algos.cpp +++ b/dnn/src/arm_common/conv_bias/int8/algos.cpp @@ -285,6 +285,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( bool is_matmul_usable = false; using Strategy = winograd::winograd_2x3_4x4_s8_f32_nchw44; + using PackMode = fallback::MatrixMulImpl::AlgoBase::PackMode; Strategy strategy(param.src_type, param.filter_type, param.dst_type); is_matmul_usable = m_matmul_algo->usable( megdnn::winograd::ConvBiaspackmode() == PackMode::NO_PACK && ((opr->param().format == param::ConvBias::Format::NCHW44 && param.filter_type.enumv() == DTypeEnum::QuantizedS8) || ((opr->param().format == @@ -308,8 +310,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( (param.filter_meta.dilation[0] == param.filter_meta.dilation[1] && param.filter_meta.dilation[0] == 1) && - (param.compute_mode == param::ConvBias::ComputeMode::FLOAT32 || - param.compute_mode == param::ConvBias::ComputeMode::DEFAULT) && + param.compute_mode == param::ConvBias::ComputeMode::FLOAT32 && param.src_type.enumv() == DTypeEnum::QuantizedS8 && param.bias_type.enumv() == DTypeEnum::QuantizedS32 && param.dst_type.enumv() == DTypeEnum::QuantizedS8; diff --git a/dnn/src/common/conv_bias.cpp b/dnn/src/common/conv_bias.cpp index c486a1193..e4b7f013d 100644 --- a/dnn/src/common/conv_bias.cpp +++ b/dnn/src/common/conv_bias.cpp @@ -164,6 +164,105 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec( } return ret; } +/*! + * \brief deduce the origin filter layout and param after winograd transformed + */ +void ConvBiasForward::deduce_winograd_origin_layout_and_param( + const Param::Format format, const size_t output_block_size, + const TensorLayout& src_layout, + const TensorLayout& winograd_filter_layout, TensorLayout& origin_layout, + Param& origin_param) { + if (format == megdnn::param::ConvBias::Format::NCHW88_WINOGRAD || + format == megdnn::param::ConvBias::Format::NCHW44_WINOGRAD || + format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { + //! change NCHWxx_WINOGRAD to NCHWxx + size_t OC = 0; + size_t IC = 0; + size_t GROUP = 1; + size_t FH = winograd_filter_layout[1] - output_block_size + 1; + + //! {alpha, alpha, IC, OC} + if (winograd_filter_layout.ndim == 4) { + OC = winograd_filter_layout[3]; + IC = winograd_filter_layout[2]; + } + //! {group, alpha, alpha, IC, OC} + else if (winograd_filter_layout.ndim == 5) { + OC = winograd_filter_layout[4]; + IC = winograd_filter_layout[3]; + GROUP = winograd_filter_layout[0]; + } + //! {alpha, alpha, OC/f, IC/f, f, f} + else if (winograd_filter_layout.ndim == 6) { + OC = winograd_filter_layout[2] * winograd_filter_layout[5]; + IC = winograd_filter_layout[3] * winograd_filter_layout[4]; + } + //! {group, alpha, alpha, OC/f, IC/f, f, f} + else if (winograd_filter_layout.ndim == 7) { + OC = winograd_filter_layout[3] * winograd_filter_layout[6]; + IC = winograd_filter_layout[4] * winograd_filter_layout[5]; + GROUP = winograd_filter_layout[0]; + } + auto origin_data_type = winograd_filter_layout.dtype; + if (src_layout.dtype.enumv() == DTypeEnum::QuantizedS8) { + if (origin_data_type.enumv() == DTypeEnum::QuantizedS16) { + float scale = + origin_data_type.param().scale; + origin_data_type = megdnn::dtype::QuantizedS8(scale); + } else { + //! In order to braing the sacle of filter, the transformed + //! qint8 winograd filter computing with float dtype is Qint32 + megdnn_assert(origin_data_type.enumv() == + DTypeEnum::QuantizedS32); + float scale = + origin_data_type.param().scale; + origin_data_type = megdnn::dtype::QuantizedS8(scale); + } + } + + if (GROUP == 1) { + if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { + origin_layout = + TensorLayout({OC, IC, FH, FH}, origin_data_type); + } else if (format == + megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) { + origin_layout = TensorLayout({OC / 4, IC / 4, FH, FH, 4, 4}, + origin_data_type); + } else { + megdnn_assert(format == + megdnn::param::ConvBias::Format::NCHW88_WINOGRAD); + origin_layout = TensorLayout({OC / 8, IC / 8, FH, FH, 8, 8}, + origin_data_type); + } + } else { + if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { + origin_layout = + TensorLayout({GROUP, OC, IC, FH, FH}, origin_data_type); + } else if (format == + megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) { + origin_layout = + TensorLayout({GROUP, OC / 4, IC / 4, FH, FH, 4, 4}, + origin_data_type); + } else { + megdnn_assert(format == + megdnn::param::ConvBias::Format::NCHW88_WINOGRAD); + origin_layout = + TensorLayout({GROUP, OC / 8, IC / 8, FH, FH, 8, 8}, + origin_data_type); + } + } + origin_param.output_block_size = 0; + if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) { + origin_param.format = megdnn::param::ConvBias::Format::NCHW; + } else if (format == megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) { + origin_param.format = megdnn::param::ConvBias::Format::NCHW44; + } else { + megdnn_assert(format == + megdnn::param::ConvBias::Format::NCHW88_WINOGRAD); + origin_param.format = megdnn::param::ConvBias::Format::NCHW88; + } + } +} template struct NCHWParamTrait; diff --git a/src/gopt/impl/weights_preprocess.cpp b/src/gopt/impl/weights_preprocess.cpp index 31606613d..5c53caba0 100644 --- a/src/gopt/impl/weights_preprocess.cpp +++ b/src/gopt/impl/weights_preprocess.cpp @@ -103,18 +103,17 @@ void WinogradTransformReplacePass::apply(OptState& opt) const { winograd_preprocess_param.output_block_size = winograd_param.output_block_size; - size_t pack_c_size = 1; - if (new_inp[0]->shape().ndim == 5) { - pack_c_size = new_inp[0]->layout().shape[4]; - } - + auto conv_bias_param = conv_bias_opr.param(); + //! If input dtype is Qint8 and matmul format is MK4, The winograd + //! compute type is float. if (conv_bias_opr.input(0)->dtype().enumv() == DTypeEnum::QuantizedS8 && - pack_c_size == 4 && winograd_preprocess_param.format == megdnn::param::MatrixMul::Format::MK4) { winograd_preprocess_param.compute_mode = megdnn::param::ConvBias::ComputeMode::FLOAT32; + conv_bias_param.compute_mode = + megdnn::param::ConvBias::ComputeMode::FLOAT32; } auto winograd_preprocess_opr = opr::WinogradFilterPreprocess::make( @@ -124,7 +123,6 @@ void WinogradTransformReplacePass::apply(OptState& opt) const { inputs.size()); SymbolVar new_conv_bias_opr; - auto conv_bias_param = conv_bias_opr.param(); if (new_inp[0]->shape().ndim == 4) { conv_bias_param.format = megdnn::ConvBias::Param::Format::NCHW_WINOGRAD; diff --git a/src/opr/impl/dnn/convolution.cpp b/src/opr/impl/dnn/convolution.cpp index 6db500041..43e666d95 100644 --- a/src/opr/impl/dnn/convolution.cpp +++ b/src/opr/impl/dnn/convolution.cpp @@ -562,6 +562,10 @@ class AlgoChooser { } } + static void get_origin_param_and_layouts(const ExeContext&, + ConvTensorLayouts&, + typename Opr::Param&) {} + //! get all profile result, either by retrieving cache or profiling static AlgoChooserProfileCache::Result get_profile_result( ExeContext& ctx, bool enable_update); @@ -600,10 +604,14 @@ template AlgoChooserProfileCache::Result AlgoChooser::get_profile_result( ExeContext& ctx, bool enable_update) { AlgoChooserProfileCache& cache = ctx.mgb_opr()->profile_cache(); - auto param_blob = ctx.mgb_opr()->param_blob(); - AlgoChooserProfileCache::Key cache_key{ctx.layouts().data(), - ctx.layouts().size(), - param_blob.first, param_blob.second}; + + ConvTensorLayouts origin_layouts = ctx.layouts(); + typename Opr::Param origin_param = ctx.mgb_opr()->param(); + get_origin_param_and_layouts(ctx, origin_layouts, origin_param); + AlgoChooserProfileCache::Key cache_key{origin_layouts.data(), + origin_layouts.size(), &origin_param, + sizeof(origin_param)}; + { auto&& rst = cache.get(cache_key); if (rst.valid()) @@ -658,6 +666,23 @@ AlgoChooserProfileCache::Result AlgoChooser::get_profile_result( return prof_rst; } +template <> +void AlgoChooser::get_origin_param_and_layouts( + const ExeContext& ctx, ConvTensorLayouts& layouts, + megdnn::ConvBias::Param& param) { + auto format = static_cast( + ctx.megdnn_opr()->param().format); + size_t output_block_size = ctx.megdnn_opr()->param().output_block_size; + TensorLayout origin_layout; + megdnn::ConvBias::deduce_winograd_origin_layout_and_param( + format, output_block_size, ctx.layouts()[0], ctx.layouts()[1], + origin_layout, param); + for (size_t i = 0; i < ctx.layouts().size(); i++) { + layouts[i] = ctx.layouts()[i]; + } + layouts[1] = origin_layout; +} + template typename AlgoChooser::ImplAlgo AlgoChooser::choose_by_profile( ExeContext& ctx, bool require_reproducible, bool enable_update) { @@ -724,6 +749,18 @@ void AlgoChooser::ExeContext:: ConvBiasForward::get_matmul_format(winograd_param); winograd_preprocess_opr->param().output_block_size = winograd_param.output_block_size; + //! When filter input is qint8 and Matmul format is MK4, the winograd + //! compute type is float + if (m_layouts[1].dtype.enumv() == DTypeEnum::QuantizedS8 && + param.opr_param.format == megdnn::ConvBias::Param::Format::NCHW44) { + if (winograd_preprocess_opr->param().format == + megdnn::param::MatrixMul::Format::MK4){ + winograd_preprocess_opr->param().compute_mode = + ConvBias::Param::ComputeMode::FLOAT32; + param.opr_param.compute_mode = + ConvBias::Param::ComputeMode::FLOAT32; + } + } TensorLayout filter_transform_layout; winograd_preprocess_opr->deduce_layout(m_layouts[1], filter_transform_layout); -- GitLab