diff --git a/dnn/src/x86/conv_bias/int8/algo_usable_preferred.cpp b/dnn/src/x86/conv_bias/int8/algo_usable_preferred.cpp new file mode 100644 index 0000000000000000000000000000000000000000..65dc72e1cc2cfb93c02b7e67dc5abc160b0d037b --- /dev/null +++ b/dnn/src/x86/conv_bias/int8/algo_usable_preferred.cpp @@ -0,0 +1,247 @@ +/** + * \file dnn/src/x86/conv_bias/int8/algo_usable_preferred.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ + +#include "src/x86/conv_bias/int8/algo_usable_preferred.h" +#include "src/x86/utils.h" + +#if MEGDNN_X86_WITH_MKL_DNN +#include +#endif + +#include + +#if MEGDNN_X86_WITH_MKL_DNN +using namespace dnnl; +#endif +using namespace megdnn; +using namespace x86; + +namespace megdnn { +namespace x86 { + +bool chanwise_avx2_stride1_qint8_usable( + const ConvBiasImpl::NCBKernSizeParam& param) { + auto&& fm = param.filter_meta; + auto FH = fm.spatial[0]; + bool aviliable = + (param.bias_mode != BiasMode::BIAS) && + ((param.src_type.enumv() == DTypeEnum::QuantizedS8 && + param.filter_type.enumv() == DTypeEnum::QuantizedS8 && + param.dst_type.enumv() == DTypeEnum::QuantizedS8) || + (((param.src_type.enumv() == DTypeEnum::Int8 && + param.filter_type.enumv() == DTypeEnum::Int8 && + param.dst_type.enumv() == DTypeEnum::Int32) || + (param.src_type.enumv() == DTypeEnum::QuantizedS8 && + param.filter_type.enumv() == DTypeEnum::QuantizedS8 && + param.dst_type.enumv() == DTypeEnum::QuantizedS32)))) && + fm.format == ConvBiasImpl::Param::Format::NCHW && + fm.spatial_ndim == 2 && fm.dilation[0] == 1 && + fm.dilation[1] == 1 && (FH == 2 || FH == 3 || FH == 5 || FH == 7) && + fm.stride[0] == 1 && fm.stride[1] == 1 && (fm.icpg == 1) && + (fm.ocpg == 1) && is_supported(SIMDType::AVX2); + return aviliable; +} + +bool chanwise_avx2_stride1_qint8_preferred( + const ConvBiasImpl::NCBKernSizeParam& param) { + MEGDNN_MARK_USED_VAR(param); + return true; +} + +bool chanwise_avx2_stride1_qint8_usable_preferred( + const ConvBiasImpl::NCBKernSizeParam& param) { + return chanwise_avx2_stride1_qint8_usable(param) && + chanwise_avx2_stride1_qint8_preferred(param); +} + +bool chanwise_avx2_stride2_qint8_usable( + const ConvBiasImpl::NCBKernSizeParam& param) { + auto&& fm = param.filter_meta; + auto FH = fm.spatial[0]; + bool aviliable = + (param.bias_mode != BiasMode::BIAS) && + ((param.src_type.enumv() == DTypeEnum::QuantizedS8 && + param.filter_type.enumv() == DTypeEnum::QuantizedS8 && + param.dst_type.enumv() == DTypeEnum::QuantizedS8) || + (((param.src_type.enumv() == DTypeEnum::Int8 && + param.filter_type.enumv() == DTypeEnum::Int8 && + param.dst_type.enumv() == DTypeEnum::Int32) || + (param.src_type.enumv() == DTypeEnum::QuantizedS8 && + param.filter_type.enumv() == DTypeEnum::QuantizedS8 && + param.dst_type.enumv() == DTypeEnum::QuantizedS32)))) && + fm.format == ConvBiasImpl::Param::Format::NCHW && + fm.spatial_ndim == 2 && fm.dilation[0] == 1 && + fm.dilation[1] == 1 && (FH == 2 || FH == 3 || FH == 5 || FH == 7) && + fm.stride[0] == 2 && fm.stride[1] == 2 && (fm.icpg == 1) && + (fm.ocpg == 1) && is_supported(SIMDType::AVX2); + return aviliable; +} + +bool chanwise_avx2_stride2_qint8_preferred( + const ConvBiasImpl::NCBKernSizeParam& param) { + MEGDNN_MARK_USED_VAR(param); + return true; +} + +bool chanwise_avx2_stride2_qint8_usable_preferred( + const ConvBiasImpl::NCBKernSizeParam& param) { + return chanwise_avx2_stride2_qint8_usable(param) && + chanwise_avx2_stride2_qint8_preferred(param); +} + +bool direct_avx2_stride1_int8_usable( + const ConvBiasImpl::NCBKernSizeParam& param) { + auto&& fm = param.filter_meta; + auto FH = fm.spatial[0]; + bool aviliable = ((param.src_type.enumv() == DTypeEnum::QuantizedS8 && + param.filter_type.enumv() == DTypeEnum::QuantizedS8 && + param.dst_type.enumv() == DTypeEnum::QuantizedS8) || + (((param.src_type.enumv() == DTypeEnum::Int8 && + param.filter_type.enumv() == DTypeEnum::Int8 && + param.dst_type.enumv() == DTypeEnum::Int32) || + (param.src_type.enumv() == DTypeEnum::QuantizedS8 && + param.filter_type.enumv() == DTypeEnum::QuantizedS8 && + param.dst_type.enumv() == DTypeEnum::QuantizedS32)) && + param.bias_mode == BiasMode::NO_BIAS && + param.nonlineMode == NonlineMode::IDENTITY)) && + fm.format == ConvBiasImpl::Param::Format::NCHW && + fm.spatial_ndim == 2 && fm.dilation[0] == 1 && + fm.dilation[1] == 1 && + (FH == 2 || FH == 3 || FH == 5 || FH == 7) && + fm.stride[0] == 1 && fm.stride[1] == 1 && + is_supported(SIMDType::AVX2); + return aviliable; +} + +bool direct_avx2_stride1_int8_preferred( + const ConvBiasImpl::NCBKernSizeParam& param) { + auto&& fm = param.filter_meta; + auto IC = fm.icpg; + auto OC = fm.ocpg; + auto is_preferred = true; + if (IC > 128 && OC > 128) + is_preferred = false; + + return is_preferred; +} + +bool direct_avx2_stride1_int8_usable_preferred( + const ConvBiasImpl::NCBKernSizeParam& param) { + return direct_avx2_stride1_int8_usable(param) && + direct_avx2_stride1_int8_preferred(param); +} + +bool direct_avx2_stride2_int8_usable( + const ConvBiasImpl::NCBKernSizeParam& param) { + auto&& fm = param.filter_meta; + auto FH = fm.spatial[0]; + bool aviliable = ((param.src_type.enumv() == DTypeEnum::QuantizedS8 && + param.filter_type.enumv() == DTypeEnum::QuantizedS8 && + param.dst_type.enumv() == DTypeEnum::QuantizedS8) || + (((param.src_type.enumv() == DTypeEnum::Int8 && + param.filter_type.enumv() == DTypeEnum::Int8 && + param.dst_type.enumv() == DTypeEnum::Int32) || + (param.src_type.enumv() == DTypeEnum::QuantizedS8 && + param.filter_type.enumv() == DTypeEnum::QuantizedS8 && + param.dst_type.enumv() == DTypeEnum::QuantizedS32)) && + param.bias_mode == BiasMode::NO_BIAS && + param.nonlineMode == NonlineMode::IDENTITY)) && + fm.format == ConvBiasImpl::Param::Format::NCHW && + fm.spatial_ndim == 2 && fm.dilation[0] == 1 && + fm.dilation[1] == 1 && + (FH == 2 || FH == 3 || FH == 5 || FH == 7) && + fm.stride[0] == 2 && fm.stride[1] == 2 && + is_supported(SIMDType::AVX2); + return aviliable; +} + +bool direct_avx2_stride2_int8_preferred( + const ConvBiasImpl::NCBKernSizeParam& param) { + auto&& fm = param.filter_meta; + auto IC = fm.icpg; + auto OC = fm.ocpg; + auto is_preferred = false; + if (IC <= 31 && OC <= 31) + is_preferred = true; + + return is_preferred; +} + +bool direct_avx2_stride2_int8_usable_preferred( + const ConvBiasImpl::NCBKernSizeParam& param) { + return direct_avx2_stride2_int8_usable(param) && + direct_avx2_stride2_int8_preferred(param); +} + +#if MEGDNN_X86_WITH_MKL_DNN +bool mkldnn_qint8_usable(const ConvBiasImpl::NCBKernSizeParam& param) { + auto&& fm = param.filter_meta; + return (param.src_type.enumv() == DTypeEnum::QuantizedS8 || + param.src_type.enumv() == DTypeEnum::Int8) && + (param.dst_type.enumv() == DTypeEnum::QuantizedS32 || + param.dst_type.enumv() == DTypeEnum::Int32) && + fm.format == param::ConvBias::Format::NCHW && fm.spatial_ndim == 2 && + fm.dilation[0] == 1 && fm.dilation[1] == 1 && !fm.should_flip && + param.bias_mode == BiasMode::NO_BIAS && + param.nonlineMode == NonlineMode::IDENTITY; +} + +bool mkldnn_qint8_preferred(const ConvBiasImpl::NCBKernSizeParam& param) { + MEGDNN_MARK_USED_VAR(param); + return is_supported(SIMDType::VNNI); +} + +bool mkldnn_qint8_usable_preferred( + const ConvBiasImpl::NCBKernSizeParam& param) { + return mkldnn_qint8_usable(param) && mkldnn_qint8_preferred(param); +} + +bool mkldnn_matmul_qint8_usable(const ConvBiasImpl::NCBKernSizeParam& param) { + auto&& fm = param.filter_meta; + return (param.src_type.enumv() == DTypeEnum::QuantizedS8 || + param.src_type.enumv() == DTypeEnum::Int8) && + (param.dst_type.enumv() == DTypeEnum::QuantizedS32 || + param.dst_type.enumv() == DTypeEnum::Int32) && + fm.format == param::ConvBias::Format::NCHW && fm.spatial_ndim == 2 && + fm.group == 1 && fm.dilation[0] == 1 && fm.dilation[1] == 1 && + param.bias_mode == BiasMode::NO_BIAS && + param.nonlineMode == NonlineMode::IDENTITY && + //! The matmul opr is only used in single thread + //! TODO:support the no pack matmul algo in fallback im2col + matmul + param.nr_threads == 1_z; +} + +bool mkldnn_matmul_qint8_preferred( + const ConvBiasImpl::NCBKernSizeParam& param) { + auto is_preferred = true; + auto&& fm = param.filter_meta; + megdnn_assert_internal(fm.group == 1 && fm.dilation[0] == 1 && + fm.dilation[1] == 1); + + // single channel conv should never use matrix mul + if (fm.ocpg == 1 || fm.icpg == 1) + is_preferred = false; + + return is_preferred && is_supported(SIMDType::VNNI); +} + +bool mkldnn_matmul_qint8_usable_preferred( + const ConvBiasImpl::NCBKernSizeParam& param) { + return mkldnn_matmul_qint8_usable(param) && + mkldnn_matmul_qint8_preferred(param); +} +#endif + +} // namespace x86 +} // namespace megdnn + +// vim: syntax=cpp.doxygen diff --git a/dnn/src/x86/conv_bias/int8/algo_usable_preferred.h b/dnn/src/x86/conv_bias/int8/algo_usable_preferred.h new file mode 100644 index 0000000000000000000000000000000000000000..240a88d232bba4d96713e435dc367df38d880125 --- /dev/null +++ b/dnn/src/x86/conv_bias/int8/algo_usable_preferred.h @@ -0,0 +1,56 @@ +/** + * \file dnn/src/x86/conv_bias/int8/algo_usable_preferred.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ +#pragma once + +#include "src/common/utils.h" +#include "src/x86/conv_bias/opr_impl.h" + +namespace megdnn { +namespace x86 { + +bool chanwise_avx2_stride1_qint8_usable(const ConvBiasImpl::NCBKernSizeParam&); +bool chanwise_avx2_stride1_qint8_preferred( + const ConvBiasImpl::NCBKernSizeParam&); +bool chanwise_avx2_stride1_qint8_usable_preferred( + const ConvBiasImpl::NCBKernSizeParam&); + +bool chanwise_avx2_stride2_qint8_usable(const ConvBiasImpl::NCBKernSizeParam&); +bool chanwise_avx2_stride2_qint8_preferred( + const ConvBiasImpl::NCBKernSizeParam&); +bool chanwise_avx2_stride2_qint8_usable_preferred( + const ConvBiasImpl::NCBKernSizeParam&); + +bool direct_avx2_stride1_int8_usable(const ConvBiasImpl::NCBKernSizeParam&); +bool direct_avx2_stride1_int8_preferred(const ConvBiasImpl::NCBKernSizeParam&); +bool direct_avx2_stride1_int8_usable_preferred( + const ConvBiasImpl::NCBKernSizeParam&); + +bool direct_avx2_stride2_int8_usable(const ConvBiasImpl::NCBKernSizeParam&); +bool direct_avx2_stride2_int8_preferred(const ConvBiasImpl::NCBKernSizeParam&); +bool direct_avx2_stride2_int8_usable_preferred( + const ConvBiasImpl::NCBKernSizeParam&); + +#if MEGDNN_X86_WITH_MKL_DNN +bool mkldnn_qint8_usable(const ConvBiasImpl::NCBKernSizeParam&); +bool mkldnn_qint8_preferred(const ConvBiasImpl::NCBKernSizeParam&); +bool mkldnn_qint8_usable_preferred(const ConvBiasImpl::NCBKernSizeParam&); + +bool mkldnn_matmul_qint8_usable(const ConvBiasImpl::NCBKernSizeParam&); +bool mkldnn_matmul_qint8_preferred(const ConvBiasImpl::NCBKernSizeParam&); +bool mkldnn_matmul_qint8_usable_preferred( + const ConvBiasImpl::NCBKernSizeParam&); +#endif + +} // namespace x86 +} // namespace megdnn + +// vim: syntax=cpp.doxygen diff --git a/dnn/src/x86/conv_bias/int8/algos.cpp b/dnn/src/x86/conv_bias/int8/algos.cpp index 21caf2f1d9a094d17f4590a1cf83a5dd4c8c5de6..1e2a059bb504fff5643c66eaac46a4d05c889715 100644 --- a/dnn/src/x86/conv_bias/int8/algos.cpp +++ b/dnn/src/x86/conv_bias/int8/algos.cpp @@ -14,6 +14,7 @@ #include "src/common/opr_delegate.h" #include "src/common/utils.h" #include "src/fallback/convolution/img2col_helper.h" +#include "src/x86/conv_bias/int8/algo_usable_preferred.h" #include "src/x86/conv_bias/int8/avx2_chanwise_stride1.h" #include "src/x86/conv_bias/int8/avx2_chanwise_stride2.h" #include "src/x86/conv_bias/int8/avx2_direct_conv_stride1.h" @@ -37,25 +38,7 @@ using namespace x86; bool ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::usable( FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { - auto&& fm = param.filter_meta; - auto FH = fm.spatial[0]; - bool aviliable = - (param.bias_mode != BiasMode::BIAS) && - ((param.src_type.enumv() == DTypeEnum::QuantizedS8 && - param.filter_type.enumv() == DTypeEnum::QuantizedS8 && - param.dst_type.enumv() == DTypeEnum::QuantizedS8) || - (((param.src_type.enumv() == DTypeEnum::Int8 && - param.filter_type.enumv() == DTypeEnum::Int8 && - param.dst_type.enumv() == DTypeEnum::Int32) || - (param.src_type.enumv() == DTypeEnum::QuantizedS8 && - param.filter_type.enumv() == DTypeEnum::QuantizedS8 && - param.dst_type.enumv() == DTypeEnum::QuantizedS32)))) && - fm.format == Param::Format::NCHW && fm.spatial_ndim == 2 && - fm.dilation[0] == 1 && fm.dilation[1] == 1 && - (FH == 2 || FH == 3 || FH == 5 || FH == 7) && fm.stride[0] == 1 && - fm.stride[1] == 1 && (fm.icpg == 1) && (fm.ocpg == 1) && - is_supported(SIMDType::AVX2); - return aviliable; + return chanwise_avx2_stride1_qint8_usable(param); } WorkspaceBundle ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::get_bundle( @@ -94,28 +77,15 @@ ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::get_kimpls( return avx2_chanwise_stride1::get_kimpls(param, bundle); } +bool ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::is_preferred( + FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + return chanwise_avx2_stride1_qint8_preferred(param); +} + bool ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::usable( FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { - auto&& fm = param.filter_meta; - auto FH = fm.spatial[0]; - bool aviliable = - (param.bias_mode != BiasMode::BIAS) && - ((param.src_type.enumv() == DTypeEnum::QuantizedS8 && - param.filter_type.enumv() == DTypeEnum::QuantizedS8 && - param.dst_type.enumv() == DTypeEnum::QuantizedS8) || - (((param.src_type.enumv() == DTypeEnum::Int8 && - param.filter_type.enumv() == DTypeEnum::Int8 && - param.dst_type.enumv() == DTypeEnum::Int32) || - (param.src_type.enumv() == DTypeEnum::QuantizedS8 && - param.filter_type.enumv() == DTypeEnum::QuantizedS8 && - param.dst_type.enumv() == DTypeEnum::QuantizedS32)))) && - fm.format == Param::Format::NCHW && fm.spatial_ndim == 2 && - fm.dilation[0] == 1 && fm.dilation[1] == 1 && - (FH == 2 || FH == 3 || FH == 5 || FH == 7) && fm.stride[0] == 2 && - fm.stride[1] == 2 && (fm.icpg == 1) && (fm.ocpg == 1) && - is_supported(SIMDType::AVX2); - return aviliable; + return chanwise_avx2_stride2_qint8_usable(param); } WorkspaceBundle ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::get_bundle( @@ -154,28 +124,15 @@ ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::get_kimpls( return avx2_chanwise_stride2::get_kimpls(param, bundle); } +bool ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::is_preferred( + FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + return chanwise_avx2_stride2_qint8_preferred(param); +} + bool ConvBiasImpl::AlgoDirectAvx2Stride1Int8::usable( FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { - auto&& fm = param.filter_meta; - auto FH = fm.spatial[0]; - bool aviliable = ((param.src_type.enumv() == DTypeEnum::QuantizedS8 && - param.filter_type.enumv() == DTypeEnum::QuantizedS8 && - param.dst_type.enumv() == DTypeEnum::QuantizedS8) || - (((param.src_type.enumv() == DTypeEnum::Int8 && - param.filter_type.enumv() == DTypeEnum::Int8 && - param.dst_type.enumv() == DTypeEnum::Int32) || - (param.src_type.enumv() == DTypeEnum::QuantizedS8 && - param.filter_type.enumv() == DTypeEnum::QuantizedS8 && - param.dst_type.enumv() == DTypeEnum::QuantizedS32)) && - param.bias_mode == BiasMode::NO_BIAS && - param.nonlineMode == NonlineMode::IDENTITY)) && - fm.format == Param::Format::NCHW && fm.spatial_ndim == 2 && - fm.dilation[0] == 1 && fm.dilation[1] == 1 && - (FH == 2 || FH == 3 || FH == 5 || FH == 7) && - fm.stride[0] == 1 && fm.stride[1] == 1 && - is_supported(SIMDType::AVX2); - return aviliable; + return direct_avx2_stride1_int8_usable(param); } WorkspaceBundle ConvBiasImpl::AlgoDirectAvx2Stride1Int8::get_bundle( @@ -224,19 +181,75 @@ ConvBiasImpl::AlgoDirectAvx2Stride1Int8::get_kimpls( return direct_conv_avx2_stride1::get_kimpls(param, bundle); } +bool ConvBiasImpl::AlgoDirectAvx2Stride1Int8::is_preferred( + FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + return direct_avx2_stride1_int8_preferred(param); +} + +/* ===================== avx2 int8 stride 2 ===================== */ +bool ConvBiasImpl::AlgoAVX2DirectConvStride2::usable( + FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, + AlgoSelectionStrategy) const { + return direct_avx2_stride2_int8_usable(param); +} + +WorkspaceBundle ConvBiasImpl::AlgoAVX2DirectConvStride2::get_bundle( + const NCBKernSizeParam& param) { + auto&& fm = param.filter_meta; + size_t N = param.n; + size_t IC = fm.icpg; + size_t OC = fm.ocpg; + size_t IH = param.isz[0]; + size_t IW = param.isz[1]; + size_t OH = param.osz[0]; + size_t OW = param.osz[1]; + size_t FH = fm.spatial[0]; + size_t FW = fm.spatial[1]; + size_t GROUP = fm.group; + size_t IC_STEP = 2, OC_STEP = 4; + + size_t pad_h = fm.padding[0]; + size_t pad_w = fm.padding[1]; + size_t src_size = 0, filter_size = 0; + + //! pack filter, pack src + filter_size = GROUP * round_up(OC, OC_STEP) * round_up(IC, IC_STEP) * FH * + FW * sizeof(int16_t); + //! avx256 iw max offset 32, caused by w_remain < 16 + src_size = N * GROUP * div_ceil(IC, IC_STEP) * (IH + 2 * pad_h) * + (IW + 2 * pad_w) * 2 * sizeof(int8_t) + + 32; + bool need_post_process = param.dst_type.enumv() == DTypeEnum::QuantizedS8; + if (need_post_process) { + size_t dst_tmp = N * GROUP * OC * OW * OH * sizeof(int32_t); + return WorkspaceBundle(nullptr, {src_size, filter_size, dst_tmp}); + } else { + return WorkspaceBundle(nullptr, {src_size, filter_size}); + } +} + +size_t ConvBiasImpl::AlgoAVX2DirectConvStride2::get_workspace( + FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + return get_bundle(param).total_size_in_bytes(); +} + +SmallVector +ConvBiasImpl::AlgoAVX2DirectConvStride2::get_kimpls( + const NCBKernSizeParam& param) const { + auto bundle = get_bundle(param); + return direct_conv_avx2_stride2::get_kimpls(param, bundle); +} + +bool ConvBiasImpl::AlgoAVX2DirectConvStride2::is_preferred( + FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + return direct_avx2_stride2_int8_preferred(param); +} + #if MEGDNN_X86_WITH_MKL_DNN bool ConvBiasImpl::AlgoMkldnnQint8::usable(FallbackConvBiasImpl*, const NCBKernSizeParam& param, AlgoSelectionStrategy) const { - auto&& fm = param.filter_meta; - return (param.src_type.enumv() == DTypeEnum::QuantizedS8 || - param.src_type.enumv() == DTypeEnum::Int8) && - (param.dst_type.enumv() == DTypeEnum::QuantizedS32 || - param.dst_type.enumv() == DTypeEnum::Int32) && - fm.format == param::ConvBias::Format::NCHW && fm.spatial_ndim == 2 && - fm.dilation[0] == 1 && fm.dilation[1] == 1 && !fm.should_flip && - param.bias_mode == BiasMode::NO_BIAS && - param.nonlineMode == NonlineMode::IDENTITY; + return mkldnn_qint8_usable(param); } WorkspaceBundle ConvBiasImpl::AlgoMkldnnQint8::get_bundle( @@ -412,39 +425,25 @@ void ConvBiasImpl::AlgoMkldnnQint8::kern_mkldnn_s8x8x32( stream_mkldnn.wait(); } } - #undef REORDER_MEMORY -#endif -#if MEGDNN_X86_WITH_MKL_DNN +bool ConvBiasImpl::AlgoMkldnnQint8::is_preferred( + FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + return mkldnn_qint8_preferred(param); +} + /* ===================== mkldnn qint8 matmul algo ===================== */ bool ConvBiasImpl::AlgoMkldnnMatmulQint8::usable(FallbackConvBiasImpl*, const NCBKernSizeParam& param, AlgoSelectionStrategy) const { - auto&& fm = param.filter_meta; - return (param.src_type.enumv() == DTypeEnum::QuantizedS8 || - param.src_type.enumv() == DTypeEnum::Int8) && - (param.dst_type.enumv() == DTypeEnum::QuantizedS32 || - param.dst_type.enumv() == DTypeEnum::Int32) && - fm.format == param::ConvBias::Format::NCHW && fm.spatial_ndim == 2 && - fm.group == 1 && fm.dilation[0] == 1 && fm.dilation[1] == 1 && - param.bias_mode == BiasMode::NO_BIAS && - param.nonlineMode == NonlineMode::IDENTITY && - //! The matmul opr is only used in single thread - //! TODO:support the no pack matmul algo in fallback im2col + matmul - param.nr_threads == 1_z; + return mkldnn_matmul_qint8_usable(param); } + bool ConvBiasImpl::AlgoMkldnnMatmulQint8::is_preferred( FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { - auto&& fm = param.filter_meta; - megdnn_assert_internal(fm.group == 1 && fm.dilation[0] == 1 && - fm.dilation[1] == 1); - - // single channel conv should never use matrix mul - if (fm.ocpg == 1 || fm.icpg == 1) - return false; - return true; + return mkldnn_matmul_qint8_preferred(param); } + WorkspaceBundle ConvBiasImpl::AlgoMkldnnMatmulQint8::get_bundle( const NCBKernSizeParam& param) { UNPACK_CONV_F32_NCB_KERN_SIZES(param); @@ -473,6 +472,7 @@ WorkspaceBundle ConvBiasImpl::AlgoMkldnnMatmulQint8::get_bundle( } return {nullptr, {part0, part1, part2}}; } + MatrixMul* ConvBiasImpl::AlgoMkldnnMatmulQint8::get_matmul_opr() { static CpuOprDelegationStorage<> storage; return storage.get(); @@ -553,76 +553,5 @@ void ConvBiasImpl::AlgoMkldnnMatmulQint8::kern_mkldnn_matmul_s8x8x32( } #endif -/* ===================== avx2 int8 stride 2 ===================== */ -bool ConvBiasImpl::AlgoAVX2DirectConvStride2::usable( - FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, - AlgoSelectionStrategy) const { - auto&& fm = param.filter_meta; - auto FH = fm.spatial[0]; - bool aviliable = ((param.src_type.enumv() == DTypeEnum::QuantizedS8 && - param.filter_type.enumv() == DTypeEnum::QuantizedS8 && - param.dst_type.enumv() == DTypeEnum::QuantizedS8) || - (((param.src_type.enumv() == DTypeEnum::Int8 && - param.filter_type.enumv() == DTypeEnum::Int8 && - param.dst_type.enumv() == DTypeEnum::Int32) || - (param.src_type.enumv() == DTypeEnum::QuantizedS8 && - param.filter_type.enumv() == DTypeEnum::QuantizedS8 && - param.dst_type.enumv() == DTypeEnum::QuantizedS32)) && - param.bias_mode == BiasMode::NO_BIAS && - param.nonlineMode == NonlineMode::IDENTITY)) && - fm.format == Param::Format::NCHW && fm.spatial_ndim == 2 && - fm.dilation[0] == 1 && fm.dilation[1] == 1 && - (FH == 2 || FH == 3 || FH == 5 || FH == 7) && - fm.stride[0] == 2 && fm.stride[1] == 2 && - is_supported(SIMDType::AVX2); - return aviliable; -} - -WorkspaceBundle ConvBiasImpl::AlgoAVX2DirectConvStride2::get_bundle( - const NCBKernSizeParam& param) { - auto&& fm = param.filter_meta; - size_t N = param.n; - size_t IC = fm.icpg; - size_t OC = fm.ocpg; - size_t IH = param.isz[0]; - size_t IW = param.isz[1]; - size_t OH = param.osz[0]; - size_t OW = param.osz[1]; - size_t FH = fm.spatial[0]; - size_t FW = fm.spatial[1]; - size_t GROUP = fm.group; - size_t IC_STEP = 2, OC_STEP = 4; - - size_t pad_h = fm.padding[0]; - size_t pad_w = fm.padding[1]; - size_t src_size = 0, filter_size = 0; - - //! pack filter, pack src - filter_size = GROUP * round_up(OC, OC_STEP) * round_up(IC, IC_STEP) * FH * - FW * sizeof(int16_t); - //! avx256 iw max offset 32, caused by w_remain < 16 - src_size = N * GROUP * div_ceil(IC, IC_STEP) * (IH + 2 * pad_h) * - (IW + 2 * pad_w) * 2 * sizeof(int8_t) + - 32; - bool need_post_process = param.dst_type.enumv() == DTypeEnum::QuantizedS8; - if (need_post_process) { - size_t dst_tmp = N * GROUP * OC * OW * OH * sizeof(int32_t); - return WorkspaceBundle(nullptr, {src_size, filter_size, dst_tmp}); - } else { - return WorkspaceBundle(nullptr, {src_size, filter_size}); - } -} - -size_t ConvBiasImpl::AlgoAVX2DirectConvStride2::get_workspace( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { - return get_bundle(param).total_size_in_bytes(); -} - -SmallVector -ConvBiasImpl::AlgoAVX2DirectConvStride2::get_kimpls( - const NCBKernSizeParam& param) const { - auto bundle = get_bundle(param); - return direct_conv_avx2_stride2::get_kimpls(param, bundle); -} // vim: syntax=cpp.doxygen diff --git a/dnn/src/x86/conv_bias/int8/algos.h b/dnn/src/x86/conv_bias/int8/algos.h index a7fe96c95a1ac5405401f7d15d8cdce1085388ef..a85a5b4835869b87f861f870f255cf6f35e03e4d 100644 --- a/dnn/src/x86/conv_bias/int8/algos.h +++ b/dnn/src/x86/conv_bias/int8/algos.h @@ -35,6 +35,8 @@ public: return get_kimpls(param); } void* type() const override; + bool is_preferred(FallbackConvBiasImpl*, + const NCBKernSizeParam& param) const override; }; /* ===================== avx2 stride2 chanwise algo ===================== */ @@ -57,6 +59,8 @@ public: return get_kimpls(param); } void* type() const override; + bool is_preferred(FallbackConvBiasImpl*, + const NCBKernSizeParam& param) const override; }; /* ===================== avx2 stride1 direct algo ===================== */ @@ -79,6 +83,32 @@ public: return get_kimpls(param); } void* type() const override; + bool is_preferred(FallbackConvBiasImpl*, + const NCBKernSizeParam& param) const override; +}; + +/* ================== avx2 int8 direct conv stride2 algo ================== */ +class ConvBiasImpl::AlgoAVX2DirectConvStride2 final : public AlgoBase { + SmallVector get_kimpls(const NCBKernSizeParam& param) const; + static WorkspaceBundle get_bundle(const NCBKernSizeParam& param); + +public: + bool is_reproducible() const override { return true; } + const char* name() const override { + return "X86_CONV_BIAS_DIRECT_AVX2_INT8_STRIDE2"; + } + bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, + AlgoSelectionStrategy algo_selection_strategy) const override; + size_t get_workspace(FallbackConvBiasImpl* opr, + const NCBKernSizeParam& param) const override; + SmallVector dispatch_kerns( + fallback::ConvBiasImpl*, + const NCBKernSizeParam& param) const override { + return get_kimpls(param); + } + void* type() const override; + bool is_preferred(FallbackConvBiasImpl*, + const NCBKernSizeParam& param) const override; }; #if MEGDNN_X86_WITH_MKL_DNN @@ -117,6 +147,8 @@ public: return {{kern, {group, n, 1_z}}}; } void* type() const override; + bool is_preferred(FallbackConvBiasImpl*, + const NCBKernSizeParam& param) const override; }; /* ===================== mkldnn qint8 matmul algo ===================== */ class ConvBiasImpl::AlgoMkldnnMatmulQint8 final : public AlgoBase { @@ -148,27 +180,7 @@ public: void* type() const override; }; #endif -/* ================== avx2 int8 direct conv stride2 algo ================== */ -class ConvBiasImpl::AlgoAVX2DirectConvStride2 final : public AlgoBase { - SmallVector get_kimpls(const NCBKernSizeParam& param) const; - static WorkspaceBundle get_bundle(const NCBKernSizeParam& param); -public: - bool is_reproducible() const override { return true; } - const char* name() const override { - return "X86_CONV_BIAS_DIRECT_AVX2_INT8_STRIDE2"; - } - bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, - AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(FallbackConvBiasImpl* opr, - const NCBKernSizeParam& param) const override; - SmallVector dispatch_kerns( - fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override { - return get_kimpls(param); - } - void* type() const override; -}; } // namespace x86 } // namespace megdnn diff --git a/dnn/src/x86/conv_bias/opr_impl.cpp b/dnn/src/x86/conv_bias/opr_impl.cpp index 2ca8f4174975190d0e5aab67884262eb387663b4..a38d9ad9e466a078729a1924f046c521111a87de 100644 --- a/dnn/src/x86/conv_bias/opr_impl.cpp +++ b/dnn/src/x86/conv_bias/opr_impl.cpp @@ -16,6 +16,7 @@ #include "src/common/metahelper.h" #include "src/common/opr_delegate.h" #include "src/x86/conv_bias/f32/algos.h" +#include "src/x86/conv_bias/int8/algo_usable_preferred.h" #include "src/x86/conv_bias/int8/algos.h" #include "src/x86/matrix_mul/opr_impl.h" @@ -94,12 +95,6 @@ class ConvBiasImpl::AlgoPack : NonCopyableObj { public: AlgoPack() { -#if MEGDNN_X86_WITH_MKL_DNN - //! Create the mkldnn algo - all_algos.emplace_back(&mkldnn_conv_fp32); - all_algos.emplace_back(&mkldnn_matmul_qint8); - all_algos.emplace_back(&mkldnn_qint8); -#endif all_algos.emplace_back(&stride1_direct_large_group); all_algos.emplace_back(&stride1_direct_small_group); all_algos.emplace_back(&stride2_direct_large_group); @@ -110,6 +105,14 @@ public: all_algos.emplace_back(&avx2_stride2_chanwsie_qint8); all_algos.emplace_back(&matmul); + //! preference to use mkldnn algo on VNNI devices +#if MEGDNN_X86_WITH_MKL_DNN + //! Create the mkldnn algo + all_algos.emplace_back(&mkldnn_conv_fp32); + all_algos.emplace_back(&mkldnn_matmul_qint8); + all_algos.emplace_back(&mkldnn_qint8); +#endif + static CpuOprDelegationStorage<> storage; auto matmul_opr = storage.get(); auto&& matmul_algos = @@ -159,4 +162,25 @@ const char* ConvBiasImpl::get_algorithm_set_name() const { return "X0"; } +bool ConvBiasImpl::is_matmul_quantized_prefer( + const ConvBiasImpl::NCBKernSizeParam& param) { + bool conv_direct_chanwise_mkldnn_usable = true; + if (param.dst_type.enumv() == DTypeEnum::QuantizedS8 || + param.dst_type.enumv() == DTypeEnum::QuantizedS32) { + conv_direct_chanwise_mkldnn_usable = + chanwise_avx2_stride1_qint8_usable_preferred(param) || + chanwise_avx2_stride2_qint8_usable_preferred(param) || + direct_avx2_stride1_int8_usable_preferred(param) || + direct_avx2_stride2_int8_usable_preferred(param); + } +#if MEGDNN_X86_WITH_MKL_DNN + conv_direct_chanwise_mkldnn_usable = + conv_direct_chanwise_mkldnn_usable || + mkldnn_qint8_usable_preferred(param) || + mkldnn_matmul_qint8_usable_preferred(param); +#endif + + return !conv_direct_chanwise_mkldnn_usable; +} + // vim: syntax=cpp.doxygen diff --git a/dnn/src/x86/conv_bias/opr_impl.h b/dnn/src/x86/conv_bias/opr_impl.h index 3a948d5b8fc74351ccac28fce909f1e477412d00..ea1626be364ab71ed1c42f596044a8330f85faa8 100644 --- a/dnn/src/x86/conv_bias/opr_impl.h +++ b/dnn/src/x86/conv_bias/opr_impl.h @@ -53,6 +53,9 @@ public: size_t& IW2, size_t& OH2, size_t& OW2); const char* get_algorithm_set_name() const override; + + bool is_matmul_quantized_prefer( + const ConvBiasImpl::NCBKernSizeParam& ncb_param) override; }; } // namespace x86