diff --git a/dnn/src/aarch64/conv_bias/fp16/algos.cpp b/dnn/src/aarch64/conv_bias/fp16/algos.cpp index cab6df546e708df0923273fc8c222b6cc55e841a..cd05b6068e5d790460d5cc3501e13ad2271deee2 100644 --- a/dnn/src/aarch64/conv_bias/fp16/algos.cpp +++ b/dnn/src/aarch64/conv_bias/fp16/algos.cpp @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #include "src/aarch64/conv_bias/fp16/algos.h" @@ -22,7 +23,7 @@ using namespace aarch64; MIDOUT_DECL(megdnn_aarch64_conv_bias_stride2_conv2357_fp16) bool ConvBiasImpl::AlgoF16DirectStride2::usable( - FallbackConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { MIDOUT_BEGIN(megdnn_aarch64_conv_bias_stride2_conv2357_fp16, 0, 0) { auto&& fm = param.filter_meta; @@ -47,7 +48,7 @@ bool ConvBiasImpl::AlgoF16DirectStride2::usable( } size_t ConvBiasImpl::AlgoF16DirectStride2::get_workspace( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_aarch64_conv_bias_stride2_conv2357_fp16, 0, 1) { auto wbundle = arm_common::MultithreadDirectConvCommon< dt_float16, __fp16>::get_bundle_stride(param, m_large_group); @@ -59,7 +60,7 @@ size_t ConvBiasImpl::AlgoF16DirectStride2::get_workspace( SmallVector ConvBiasImpl::AlgoF16DirectStride2::dispatch_kerns( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_aarch64_conv_bias_stride2_conv2357_fp32, 0, 2) { return get_kimpls(param); } diff --git a/dnn/src/aarch64/conv_bias/fp16/algos.h b/dnn/src/aarch64/conv_bias/fp16/algos.h index 367006ee31cf61b4fe20aa1e8739d0de22bf3287..2e0321bdc8034e92dede8284039015652cbad51f 100644 --- a/dnn/src/aarch64/conv_bias/fp16/algos.h +++ b/dnn/src/aarch64/conv_bias/fp16/algos.h @@ -19,6 +19,7 @@ namespace aarch64 { class ConvBiasImpl::AlgoF16DirectStride2 final : public AlgoBase { SmallVector get_kimpls(const NCBKernSizeParam& param) const; bool m_large_group; + public: AlgoF16DirectStride2(bool large_group) : m_large_group(large_group) {} bool is_reproducible() const override { return true; } @@ -26,15 +27,12 @@ public: return m_large_group ? "ARMV8F16STRD2_LARGE_GROUP" : "ARMV8F16STRD2_SMALL_GROUP"; } - - bool usable(FallbackConvBiasImpl*, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(FallbackConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; - SmallVector dispatch_kerns(FallbackConvBiasImpl*, - const NCBKernSizeParam&) const override; + SmallVector dispatch_kerns(const NCBKernSizeParam&) const override; }; } // namespace aarch64 } // namespace megdnn diff --git a/dnn/src/aarch64/conv_bias/fp32/algos.cpp b/dnn/src/aarch64/conv_bias/fp32/algos.cpp index 38848b46e4ce563123e3fa153b346ff0cad11c61..4436837d259b7b28abb538202f478babcf1916e8 100644 --- a/dnn/src/aarch64/conv_bias/fp32/algos.cpp +++ b/dnn/src/aarch64/conv_bias/fp32/algos.cpp @@ -22,7 +22,7 @@ using namespace aarch64; MIDOUT_DECL(megdnn_aarch64_conv_bias_stride2_conv2357_fp32) bool ConvBiasImpl::AlgoF32DirectStride2::usable( - FallbackConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { MIDOUT_BEGIN(megdnn_aarch64_conv_bias_stride2_conv2357_fp32, 0, 0) { auto&& fm = param.filter_meta; @@ -47,7 +47,7 @@ bool ConvBiasImpl::AlgoF32DirectStride2::usable( } size_t ConvBiasImpl::AlgoF32DirectStride2::get_workspace( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_aarch64_conv_bias_stride2_conv2357_fp32, 0, 1) { auto wbundle = arm_common::MultithreadDirectConvCommon< float, float>::get_bundle_stride(param, m_large_group); @@ -58,7 +58,7 @@ size_t ConvBiasImpl::AlgoF32DirectStride2::get_workspace( } SmallVector ConvBiasImpl::AlgoF32DirectStride2::dispatch_kerns( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_aarch64_conv_bias_stride2_conv2357_fp32, 0, 2) { return get_kimpls(param); } diff --git a/dnn/src/aarch64/conv_bias/fp32/algos.h b/dnn/src/aarch64/conv_bias/fp32/algos.h index 24c798580554b12fb5885e27dba402dc491fd498..1947fd199ea5d9066122b1bd325d2e1e4eb5b663 100644 --- a/dnn/src/aarch64/conv_bias/fp32/algos.h +++ b/dnn/src/aarch64/conv_bias/fp32/algos.h @@ -23,6 +23,7 @@ using FallbackConvBiasImpl = fallback::ConvBiasImpl; class ConvBiasImpl::AlgoF32DirectStride2 final : public AlgoBase { SmallVector get_kimpls(const NCBKernSizeParam& param) const; bool m_large_group; + public: AlgoF32DirectStride2(bool large_group) : m_large_group(large_group) {} bool is_reproducible() const override { return true; } @@ -31,14 +32,12 @@ public: : "ARMV8F32STRD2_SMALL_GROUP"; } - bool usable(FallbackConvBiasImpl*, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(FallbackConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; - SmallVector dispatch_kerns(FallbackConvBiasImpl*, - const NCBKernSizeParam&) const override; + SmallVector dispatch_kerns(const NCBKernSizeParam&) const override; }; } // namespace aarch64 diff --git a/dnn/src/aarch64/conv_bias/int8/algos.cpp b/dnn/src/aarch64/conv_bias/int8/algos.cpp index 8b17cfa6dccf3c4c4984d0d769a334f6d54419b0..802793d7d91968cd06faa39af0350c8f00acf9d4 100644 --- a/dnn/src/aarch64/conv_bias/int8/algos.cpp +++ b/dnn/src/aarch64/conv_bias/int8/algos.cpp @@ -30,9 +30,8 @@ using megdnn::arm_common::TypeCvtOp; /* ===================== matrix mul algo ===================== */ bool ConvBiasImpl::AlgoS8MatrixMul::usable( - FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { - MEGDNN_MARK_USED_VAR(opr); auto&& fm = param.filter_meta; return param.src_type.enumv() == DTypeEnum::QuantizedS8 && param.dst_type.enumv() == DTypeEnum::QuantizedS8 && diff --git a/dnn/src/aarch64/conv_bias/int8/algos.h b/dnn/src/aarch64/conv_bias/int8/algos.h index 403d755d63bdcafdb9ce3c022ead522afad39ac2..7d79f0cd4b9ee63ee5a55f296d4fea9c5d0c58e5 100644 --- a/dnn/src/aarch64/conv_bias/int8/algos.h +++ b/dnn/src/aarch64/conv_bias/int8/algos.h @@ -13,6 +13,7 @@ #include "src/aarch64/conv_bias/opr_impl.h" #include "src/fallback/conv_bias/opr_impl.h" +#include "src/common/opr_delegate.h" namespace megdnn { namespace aarch64 { @@ -27,21 +28,21 @@ public: bool is_reproducible() const override { return true; } const char* name() const override { return "S8MATMUL"; } - bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(FallbackConvBiasImpl*, - const NCBKernSizeParam& param) const override { + size_t get_workspace(const NCBKernSizeParam& param) const override { return get_bundle(param).total_size_in_bytes(); } SmallVector dispatch_kerns( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const override { + const NCBKernSizeParam& param) const override { size_t group = param.filter_meta.group; return {{kimpl, {group, 1_z, 1_z}}}; } //! select matmul to the highest preference - bool is_preferred(FallbackConvBiasImpl* opr, - const NCBKernSizeParam& param) const override { - return static_cast(opr) + bool is_preferred(const NCBKernSizeParam& param) const override { + static CpuOprDelegationStorage<1> storage; + auto conv_bias_opr = storage.get(); + return static_cast(conv_bias_opr) ->is_matmul_quantized_prefer(param); } }; diff --git a/dnn/src/aarch64/conv_bias/quint8/algos.cpp b/dnn/src/aarch64/conv_bias/quint8/algos.cpp index dc8a1233a1ac14c9920fafb20df88fa1c49a18b3..a34d840d381e5c85da401432eb5839e975358819 100644 --- a/dnn/src/aarch64/conv_bias/quint8/algos.cpp +++ b/dnn/src/aarch64/conv_bias/quint8/algos.cpp @@ -32,9 +32,8 @@ using megdnn::arm_common::TypeCvtOp; /* ===================== matrix mul algo ===================== */ bool ConvBiasImpl::AlgoQU8MatrixMul::usable( - FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { - MEGDNN_MARK_USED_VAR(opr); auto&& fm = param.filter_meta; return param.src_type.enumv() == DTypeEnum::Quantized8Asymm && param.dst_type.enumv() == DTypeEnum::Quantized8Asymm && diff --git a/dnn/src/aarch64/conv_bias/quint8/algos.h b/dnn/src/aarch64/conv_bias/quint8/algos.h index afa58b9d687d1d8aa074018c2155971eb3b50e84..9f99b0d0f730119ddcd549ca7f86b4d1d7fdb340 100644 --- a/dnn/src/aarch64/conv_bias/quint8/algos.h +++ b/dnn/src/aarch64/conv_bias/quint8/algos.h @@ -13,6 +13,7 @@ #include "src/aarch64/conv_bias/opr_impl.h" #include "src/fallback/conv_bias/opr_impl.h" +#include "src/common/opr_delegate.h" namespace megdnn { namespace aarch64 { @@ -27,22 +28,21 @@ public: bool is_reproducible() const override { return true; } const char* name() const override { return "QU8MATMUL"; } - bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(FallbackConvBiasImpl*, - const NCBKernSizeParam& param) const override { + size_t get_workspace(const NCBKernSizeParam& param) const override { return get_bundle(param).total_size_in_bytes(); } SmallVector dispatch_kerns( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const override { size_t group = param.filter_meta.group; return {{kimpl, {group, 1_z, 1_z}}}; } //! select matmul to the highest preference - bool is_preferred(FallbackConvBiasImpl* opr, - const NCBKernSizeParam& param) const override { - return static_cast(opr) + bool is_preferred(const NCBKernSizeParam& param) const override { + static CpuOprDelegationStorage<1> storage; + auto conv_bias_opr = storage.get(); + return static_cast(conv_bias_opr) ->is_matmul_quantized_prefer(param); } }; diff --git a/dnn/src/arm_common/conv_bias/f16/algos.cpp b/dnn/src/arm_common/conv_bias/f16/algos.cpp index a5986d70ba59e8fca0aea23993375c25d380001a..d5d095e37446d6b15192eb37d48fb58039feaf73 100644 --- a/dnn/src/arm_common/conv_bias/f16/algos.cpp +++ b/dnn/src/arm_common/conv_bias/f16/algos.cpp @@ -27,10 +27,9 @@ using namespace arm_common; /* ======================= AlgoFP16WinogradF23 ======================== */ bool ConvBiasImpl::AlgoFP16WinogradF23::usable( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MEGDNN_MARK_USED_VAR(param); - MEGDNN_MARK_USED_VAR(opr); MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 0) { using Strategy = winograd::winograd_2x3_4x4_f16; Strategy strategy(param.src_type, param.filter_type, param.dst_type); @@ -38,13 +37,13 @@ bool ConvBiasImpl::AlgoFP16WinogradF23::usable( strategy, m_tile_size, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (opr->param().format == param::ConvBias::Format::NCHW || - (opr->param().format == + (param.filter_meta.format == param::ConvBias::Format::NCHW || + (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD && - opr->param().output_block_size == 2 && + param.output_block_size == 2 && param.winograd_matmul_format == param::MatrixMul::Format::DEFAULT)) && - opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && + !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && (param.filter_meta.stride[0] == param.filter_meta.stride[1] && @@ -69,10 +68,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF23, /* ======================= AlgoFP16WinogradF45 ======================== */ bool ConvBiasImpl::AlgoFP16WinogradF45::usable( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MEGDNN_MARK_USED_VAR(param); - MEGDNN_MARK_USED_VAR(opr); MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 1, 0) { using Strategy = winograd::winograd_4x5_1x1_f16; Strategy strategy(param.src_type, param.filter_type, param.dst_type); @@ -80,13 +78,13 @@ bool ConvBiasImpl::AlgoFP16WinogradF45::usable( strategy, m_tile_size, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (opr->param().format == param::ConvBias::Format::NCHW || - (opr->param().format == + (param.filter_meta.format == param::ConvBias::Format::NCHW || + (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD && - opr->param().output_block_size == 4 && + param.output_block_size == 4 && param.winograd_matmul_format == param::MatrixMul::Format::DEFAULT)) && - opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && + !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 5) && (param.filter_meta.stride[0] == param.filter_meta.stride[1] && @@ -109,10 +107,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF45, /* ======================= AlgoFP16WinogradF63 ======================== */ bool ConvBiasImpl::AlgoFP16WinogradF63::usable( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MEGDNN_MARK_USED_VAR(param); - MEGDNN_MARK_USED_VAR(opr); MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 2, 0) { using Strategy = winograd::winograd_6x3_1x1_f16; Strategy strategy(param.src_type, param.filter_type, param.dst_type); @@ -120,13 +117,13 @@ bool ConvBiasImpl::AlgoFP16WinogradF63::usable( strategy, m_tile_size, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (opr->param().format == param::ConvBias::Format::NCHW || - (opr->param().format == + (param.filter_meta.format == param::ConvBias::Format::NCHW || + (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD && - opr->param().output_block_size == 6 && + param.output_block_size == 6 && param.winograd_matmul_format == param::MatrixMul::Format::DEFAULT)) && - opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && + !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && (param.filter_meta.stride[0] == param.filter_meta.stride[1] && @@ -149,10 +146,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF63, /* ======================= AlgoFP16WinogradF23_8x8 ======================== */ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MEGDNN_MARK_USED_VAR(param); - MEGDNN_MARK_USED_VAR(opr); MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 3, 0) { if (param.filter_meta.icpg % 8 != 0 || param.filter_meta.ocpg % 8 != 0) return false; @@ -166,13 +162,13 @@ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable( .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && m_matmul_algo->packmode() == PackMode::NO_PACK && - (opr->param().format == param::ConvBias::Format::NCHW || - (opr->param().format == + (param.filter_meta.format == param::ConvBias::Format::NCHW || + (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD && - opr->param().output_block_size == 2 && + param.output_block_size == 2 && param.winograd_matmul_format == param::MatrixMul::Format::MK8)) && - opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && + !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && (param.filter_meta.stride[0] == param.filter_meta.stride[1] && @@ -197,7 +193,7 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF23_8x8, MIDOUT_DECL(megdnn_arm_common_conv_bias_fp16_kimpl) bool ConvBiasImpl::AlgoF16Direct::usable( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_fp16_kimpl, 0, 0) { auto&& fm = param.filter_meta; @@ -227,7 +223,7 @@ bool ConvBiasImpl::AlgoF16Direct::usable( } size_t ConvBiasImpl::AlgoF16Direct::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_fp16_kimpl, 0, 1) { auto wbundle = MultithreadDirectConvCommon::get_bundle( @@ -310,7 +306,7 @@ SmallVector ConvBiasImpl::AlgoF16Direct::get_kimpls( } SmallVector ConvBiasImpl::AlgoF16Direct::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_fp16_kimpl, 0, 1) { return get_kimpls(param); } @@ -321,7 +317,7 @@ SmallVector ConvBiasImpl::AlgoF16Direct::dispatch_kerns( /* ===================== stride-1 algo ===================== */ bool ConvBiasImpl::AlgoF16DirectStride1::usable( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_fp16_kimpl, 1, 0) { auto&& fm = param.filter_meta; @@ -425,7 +421,7 @@ ConvBiasImpl::AlgoF16DirectStride1::get_kimpls( } size_t ConvBiasImpl::AlgoF16DirectStride1::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_fp16_kimpl, 1, 1) { auto bundle = MultithreadDirectConvCommon< dt_float16, __fp16>::get_bundle_stride(param, m_large_group); @@ -437,7 +433,7 @@ size_t ConvBiasImpl::AlgoF16DirectStride1::get_workspace( SmallVector ConvBiasImpl::AlgoF16DirectStride1::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_fp16_kimpl, 1, 2) { return get_kimpls(param); } diff --git a/dnn/src/arm_common/conv_bias/f16/algos.h b/dnn/src/arm_common/conv_bias/f16/algos.h index a429a6dba40a4cac22df7f51396d5426b5b842ea..a38c8607b490233e04cebd9c0ba41dbd7522fe8c 100644 --- a/dnn/src/arm_common/conv_bias/f16/algos.h +++ b/dnn/src/arm_common/conv_bias/f16/algos.h @@ -88,14 +88,12 @@ public: return m_large_group ? "F16DIRECT_LARGE_GROUP" : "F16DIRECT_SMALL_GROUP"; } - bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl* opr, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; @@ -109,12 +107,10 @@ public: const char* name() const override { return m_large_group ? "F16STRD1_LARGE_GROUP" : "F16STRD1_SMALL_GROUP"; } - bool usable(fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; diff --git a/dnn/src/arm_common/conv_bias/fp32/algos.cpp b/dnn/src/arm_common/conv_bias/fp32/algos.cpp index 96efd692e05d9536579aac0a5aef8610b55420f0..e25d40ae1e97a30ae1c235378a3d81b70e50ae44 100644 --- a/dnn/src/arm_common/conv_bias/fp32/algos.cpp +++ b/dnn/src/arm_common/conv_bias/fp32/algos.cpp @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #include "src/arm_common/conv_bias/fp32/algos.h" @@ -30,9 +31,8 @@ using namespace arm_common; /* ======================= AlgoFP32WinogradF23_4x4 ======================== */ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { - MEGDNN_MARK_USED_VAR(opr); MEGDNN_MARK_USED_VAR(param); MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 0, 0) { if (param.filter_meta.icpg % 4 != 0 || param.filter_meta.ocpg % 4 != 0) @@ -47,13 +47,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable( .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && m_matmul_algo->packmode() == PackMode::NO_PACK && - (opr->param().format == param::ConvBias::Format::NCHW || - (opr->param().format == + (param.filter_meta.format == param::ConvBias::Format::NCHW || + (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD && - opr->param().output_block_size == 2 && + param.output_block_size == 2 && param.winograd_matmul_format == param::MatrixMul::Format::MK4)) && - opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && + !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && (param.filter_meta.stride[0] == param.filter_meta.stride[1] && @@ -76,10 +76,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF23_4x4, /* ======================= AlgoFP32WinogradF63 ======================== */ bool ConvBiasImpl::AlgoFP32WinogradF63::usable( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MEGDNN_MARK_USED_VAR(param); - MEGDNN_MARK_USED_VAR(opr); MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 0) { using Strategy = winograd::winograd_6x3_1x1_f; Strategy strategy(param.src_type, param.filter_type, param.dst_type); @@ -87,13 +86,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF63::usable( strategy, m_tile_size, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (opr->param().format == param::ConvBias::Format::NCHW || - (opr->param().format == + (param.filter_meta.format == param::ConvBias::Format::NCHW || + (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD && - opr->param().output_block_size == 6 && + param.output_block_size == 6 && param.winograd_matmul_format == param::MatrixMul::Format::DEFAULT)) && - opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && + !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && (param.filter_meta.stride[0] == param.filter_meta.stride[1] && @@ -116,10 +115,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63, /* ======================= AlgoFP32WinogradF54 ======================== */ bool ConvBiasImpl::AlgoFP32WinogradF54::usable( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MEGDNN_MARK_USED_VAR(param); - MEGDNN_MARK_USED_VAR(opr); MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 0) { using Strategy = winograd::winograd_5x4_1x1_f; Strategy strategy(param.src_type, param.filter_type, param.dst_type); @@ -127,13 +125,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF54::usable( strategy, m_tile_size, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (opr->param().format == param::ConvBias::Format::NCHW || - (opr->param().format == + (param.filter_meta.format == param::ConvBias::Format::NCHW || + (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD && - opr->param().output_block_size == 5 && + param.output_block_size == 5 && param.winograd_matmul_format == param::MatrixMul::Format::DEFAULT)) && - opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && + !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 4) && (param.filter_meta.stride[0] == param.filter_meta.stride[1] && @@ -156,10 +154,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF54, /* ======================= AlgoFP32WinogradF45 ======================== */ bool ConvBiasImpl::AlgoFP32WinogradF45::usable( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MEGDNN_MARK_USED_VAR(param); - MEGDNN_MARK_USED_VAR(opr); MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 0) { using Strategy = winograd::winograd_4x5_1x1_f; Strategy strategy(param.src_type, param.filter_type, param.dst_type); @@ -167,13 +164,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF45::usable( strategy, m_tile_size, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (opr->param().format == param::ConvBias::Format::NCHW || - (opr->param().format == + (param.filter_meta.format == param::ConvBias::Format::NCHW || + (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD && - opr->param().output_block_size == 4 && + param.output_block_size == 4 && param.winograd_matmul_format == param::MatrixMul::Format::DEFAULT)) && - opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && + !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 5) && (param.filter_meta.stride[0] == param.filter_meta.stride[1] && @@ -196,10 +193,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF45, /* ======================= AlgoFP32WinogradF63_4x4 ======================== */ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MEGDNN_MARK_USED_VAR(param); - MEGDNN_MARK_USED_VAR(opr); MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 4, 0) { if (param.filter_meta.icpg % 4 != 0 || param.filter_meta.ocpg % 4 != 0) return false; @@ -213,13 +209,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable( .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && m_matmul_algo->packmode() == PackMode::NO_PACK && - (opr->param().format == param::ConvBias::Format::NCHW || - (opr->param().format == + (param.filter_meta.format == param::ConvBias::Format::NCHW || + (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD && - opr->param().output_block_size == 6 && + param.output_block_size == 6 && param.winograd_matmul_format == param::MatrixMul::Format::MK4)) && - opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && + !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && (param.filter_meta.stride[0] == param.filter_meta.stride[1] && @@ -244,9 +240,8 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_4x4, /* =================== AlgoFP32WinogradF23_4x4_NCHW44 =================== */ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { - MEGDNN_MARK_USED_VAR(opr); MEGDNN_MARK_USED_VAR(param); MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, midout_iv("AlgoFP32WinogradF23_4x4_NCHW44"_hash)) { @@ -262,13 +257,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable( return m_matmul_algo->usable(matmul_param) && m_matmul_algo->packmode() == fallback::MatrixMulImpl::AlgoBase::PackMode::NO_PACK && - (opr->param().format == param::ConvBias::Format::NCHW44 || - (opr->param().format == + (param.filter_meta.format == param::ConvBias::Format::NCHW44 || + (param.filter_meta.format == param::ConvBias::Format::NCHW44_WINOGRAD && - opr->param().output_block_size == 2 && + param.output_block_size == 2 && param.winograd_matmul_format == param::MatrixMul::Format::MK4)) && - opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && + !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && (param.filter_meta.stride[0] == param.filter_meta.stride[1] && @@ -291,10 +286,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF23_4x4_NCHW44, /* =================== AlgoFP32WinogradF63_4x4_NCHW44 ===================== */ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MEGDNN_MARK_USED_VAR(param); - MEGDNN_MARK_USED_VAR(opr); MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, midout_iv("AlgoFP32WinogradF63_4x4_NCHW44"_hash)) { if (param.filter_meta.icpg % 4 != 0 || param.filter_meta.ocpg % 4 != 0) @@ -309,13 +303,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable( return m_matmul_algo->usable(matmul_param) && m_matmul_algo->packmode() == fallback::MatrixMulImpl::AlgoBase::PackMode::NO_PACK && - (opr->param().format == param::ConvBias::Format::NCHW44 || - (opr->param().format == + (param.filter_meta.format == param::ConvBias::Format::NCHW44 || + (param.filter_meta.format == param::ConvBias::Format::NCHW44_WINOGRAD && - opr->param().output_block_size == 6 && + param.output_block_size == 6 && param.winograd_matmul_format == param::MatrixMul::Format::MK4)) && - opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && + !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && (param.filter_meta.stride[0] == param.filter_meta.stride[1] && @@ -341,7 +335,7 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_4x4_NCHW44, MIDOUT_DECL(megdnn_arm_common_conv_bias_f32_kimpl); bool ConvBiasImpl::AlgoF32Direct::usable( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 0, 0) { auto&& fm = param.filter_meta; @@ -370,7 +364,7 @@ bool ConvBiasImpl::AlgoF32Direct::usable( return false; } size_t ConvBiasImpl::AlgoF32Direct::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 0, 1) { auto wbundle = MultithreadDirectConvCommon::get_bundle( param, m_large_group); @@ -409,7 +403,8 @@ SmallVector ConvBiasImpl::AlgoF32Direct::get_kimpls( } for (size_t ic = 0; ic < IC; ic++) { MultithreadDirectConvCommon::copy_padding_kern( - bundle, kern_param, ncb_index, {ncb_index.thread_id, 0, ic}); + bundle, kern_param, ncb_index, + {ncb_index.thread_id, 0, ic}); } for (size_t oc = 0; oc < OC; oc++) { MultithreadDirectConvCommon::do_conv_kern( @@ -449,7 +444,7 @@ SmallVector ConvBiasImpl::AlgoF32Direct::get_kimpls( } SmallVector ConvBiasImpl::AlgoF32Direct::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 0, 1) { return get_kimpls(param); } @@ -458,7 +453,7 @@ SmallVector ConvBiasImpl::AlgoF32Direct::dispatch_kerns( } /* ===================== stride-1 algo ===================== */ bool ConvBiasImpl::AlgoF32DirectStride1::usable( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 1, 1) { auto&& fm = param.filter_meta; @@ -484,7 +479,7 @@ bool ConvBiasImpl::AlgoF32DirectStride1::usable( } size_t ConvBiasImpl::AlgoF32DirectStride1::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 1, 1) { auto bundle = MultithreadDirectConvCommon::get_bundle_stride( @@ -575,7 +570,7 @@ ConvBiasImpl::AlgoF32DirectStride1::get_kimpls( SmallVector ConvBiasImpl::AlgoF32DirectStride1::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 1, 2) { return get_kimpls(param); } @@ -586,7 +581,7 @@ ConvBiasImpl::AlgoF32DirectStride1::dispatch_kerns( /* ===================== stride-2 algo ===================== */ bool ConvBiasImpl::AlgoF32DirectStride2::usable( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 2, 0) { auto&& fm = param.filter_meta; @@ -611,7 +606,7 @@ bool ConvBiasImpl::AlgoF32DirectStride2::usable( return false; } size_t ConvBiasImpl::AlgoF32DirectStride2::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 2, 1) { auto bundle = MultithreadDirectConvCommon::get_bundle_stride( @@ -701,7 +696,7 @@ ConvBiasImpl::AlgoF32DirectStride2::get_kimpls( SmallVector ConvBiasImpl::AlgoF32DirectStride2::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_f32_kimpl, 2, 2) { return get_kimpls(param); } diff --git a/dnn/src/arm_common/conv_bias/fp32/algos.h b/dnn/src/arm_common/conv_bias/fp32/algos.h index cda5bf3f7bc9d8774e686d7ff425ea2466720694..af290b4c53b2cdcb467709105504b8ea1065a568 100644 --- a/dnn/src/arm_common/conv_bias/fp32/algos.h +++ b/dnn/src/arm_common/conv_bias/fp32/algos.h @@ -137,13 +137,11 @@ public: return m_large_group ? "F32DIRECT_LARGE_GROUP" : "F32DIRECT_SMALL_GROUP"; } - bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl* opr, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; @@ -157,13 +155,11 @@ public: const char* name() const override { return m_large_group ? "F32STRD1_LARGE_GROUP" : "F32STRD1_SMALL_GROUP"; } - bool usable(fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; @@ -177,13 +173,11 @@ public: const char* name() const override { return m_large_group ? "F32STRD2_LARGE_GROUP" : "F32STRD2_SMALL_GROUP"; } - bool usable(fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; @@ -194,13 +188,11 @@ public: AlgoF32DirectNCHW44() {} bool is_reproducible() const override { return true; } const char* name() const override { return "F32_CONV_NCHW44_DIRECT"; } - bool usable(fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; @@ -211,13 +203,11 @@ public: AlgoF32DirectNCHWNCHW44() {} bool is_reproducible() const override { return true; } const char* name() const override { return "F32_CONV_NCHW_NCHW44"; } - bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl* opr, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; @@ -227,13 +217,11 @@ class ConvBiasImpl::AlgoF32ChannelWiseNCHW44 final : public AlgoBase { public: bool is_reproducible() const override { return true; } const char* name() const override { return "F32_CHANNEL_WISE_NCHW44"; } - bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl* opr, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; diff --git a/dnn/src/arm_common/conv_bias/fp32/channel_wise_nchw44_algo.cpp b/dnn/src/arm_common/conv_bias/fp32/channel_wise_nchw44_algo.cpp index 97c0484f5f86543208c81c7f2c78d3c56ffd2c8e..5e0784cc77f7fda807ff0d163a9779d8217f3c97 100644 --- a/dnn/src/arm_common/conv_bias/fp32/channel_wise_nchw44_algo.cpp +++ b/dnn/src/arm_common/conv_bias/fp32/channel_wise_nchw44_algo.cpp @@ -10,8 +10,8 @@ * implied. */ -#include "src/arm_common/conv_bias/fp32/channel_wise_nchw44_kern.h" #include "src/arm_common/conv_bias/fp32/algos.h" +#include "src/arm_common/conv_bias/fp32/channel_wise_nchw44_kern.h" #include "src/arm_common/elemwise_op.h" #include "midout.h" @@ -26,8 +26,7 @@ using conv_fun = std::function ConvBiasImpl::AlgoF32ChannelWiseNCHW44::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { const constexpr size_t pack_group_size = 4_z; auto fm = param.filter_meta; const int batch = param.n; diff --git a/dnn/src/arm_common/conv_bias/fp32/f32_direct_nchw44_algo.cpp b/dnn/src/arm_common/conv_bias/fp32/f32_direct_nchw44_algo.cpp index 920aa183a5d140234218b11dbbc759c1e49f56f7..8f564fee320c3ef7796754835521a2a05319294e 100644 --- a/dnn/src/arm_common/conv_bias/fp32/f32_direct_nchw44_algo.cpp +++ b/dnn/src/arm_common/conv_bias/fp32/f32_direct_nchw44_algo.cpp @@ -159,8 +159,7 @@ static void do_conv_kern(const WorkspaceBundle& bundle, } // namespace /* ===================== stride1 algo ===================== */ -bool ConvBiasImpl::AlgoF32DirectNCHW44::usable(fallback::ConvBiasImpl*, - const NCBKernSizeParam& param, +bool ConvBiasImpl::AlgoF32DirectNCHW44::usable(const NCBKernSizeParam& param, AlgoSelectionStrategy) const { auto&& fm = param.filter_meta; auto fh = fm.spatial[0]; @@ -182,13 +181,13 @@ bool ConvBiasImpl::AlgoF32DirectNCHW44::usable(fallback::ConvBiasImpl*, } size_t ConvBiasImpl::AlgoF32DirectNCHW44::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { return get_bundle(param).total_size_in_bytes(); } SmallVector ConvBiasImpl::AlgoF32DirectNCHW44::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { auto fm = param.filter_meta; const int batch = param.n; const int group = fm.group; diff --git a/dnn/src/arm_common/conv_bias/fp32/f32_direct_nchw_nchw44_algo.cpp b/dnn/src/arm_common/conv_bias/fp32/f32_direct_nchw_nchw44_algo.cpp index 82dd32313f4d6a522ae82d5f47a62e3d6cdfca28..9ed34735723ac2d7e66640adce2a55ac06b5d1db 100644 --- a/dnn/src/arm_common/conv_bias/fp32/f32_direct_nchw_nchw44_algo.cpp +++ b/dnn/src/arm_common/conv_bias/fp32/f32_direct_nchw_nchw44_algo.cpp @@ -188,8 +188,7 @@ static void do_conv_kern(const WorkspaceBundle& bundle, } // namespace bool ConvBiasImpl::AlgoF32DirectNCHWNCHW44::usable( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param, - AlgoSelectionStrategy) const { + const NCBKernSizeParam& param, AlgoSelectionStrategy) const { auto&& fm = param.filter_meta; auto fh = fm.spatial[0]; int oc = fm.ocpg; @@ -209,13 +208,13 @@ bool ConvBiasImpl::AlgoF32DirectNCHWNCHW44::usable( } size_t ConvBiasImpl::AlgoF32DirectNCHWNCHW44::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { return get_bundle(param).total_size_in_bytes(); } SmallVector ConvBiasImpl::AlgoF32DirectNCHWNCHW44::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { auto fm = param.filter_meta; const int batch = param.n; const int group = fm.group; diff --git a/dnn/src/arm_common/conv_bias/int8/algos.cpp b/dnn/src/arm_common/conv_bias/int8/algos.cpp index 832482626f9640ea24cae14e9f96dfce5acb4e7a..2361361597164a257ea6aee73575ff89668ce059 100644 --- a/dnn/src/arm_common/conv_bias/int8/algos.cpp +++ b/dnn/src/arm_common/conv_bias/int8/algos.cpp @@ -28,7 +28,7 @@ using namespace arm_common; MIDOUT_DECL(megdnn_arm_common_conv_bias_int8) /* ===================== stride1 algo ===================== */ bool ConvBiasImpl::AlgoS8DirectStride1::usable( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { bool avaible = direct_int8_stride1::can_conv_direct_stride1_int8(param); auto fm = param.filter_meta; @@ -40,7 +40,7 @@ bool ConvBiasImpl::AlgoS8DirectStride1::usable( return avaible; } bool ConvBiasImpl::AlgoS8DirectStride1::is_preferred( - megdnn::fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { auto&& fm = param.filter_meta; auto FH = fm.spatial[0]; auto OC = fm.ocpg; @@ -53,14 +53,14 @@ bool ConvBiasImpl::AlgoS8DirectStride1::is_preferred( } size_t ConvBiasImpl::AlgoS8DirectStride1::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { auto bundle = direct_int8_stride1::get_bundle(param, m_large_group); return bundle.total_size_in_bytes(); } SmallVector ConvBiasImpl::AlgoS8DirectStride1::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, 1, 0) { return direct_int8_stride1::get_kimpls(param, m_large_group); } @@ -70,20 +70,20 @@ ConvBiasImpl::AlgoS8DirectStride1::dispatch_kerns( /* ===================== stride1 algo ===================== */ bool ConvBiasImpl::AlgoS8ChanWiseStride1NCHW44::usable( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy) const { return channel_wise_nchw44::stride1::is_available(param); } size_t ConvBiasImpl::AlgoS8ChanWiseStride1NCHW44::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { auto bundle = channel_wise_nchw44::stride1::get_bundle(param); return bundle.total_size_in_bytes(); } SmallVector ConvBiasImpl::AlgoS8ChanWiseStride1NCHW44::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, midout_iv("AlgoS8ChanWiseStride1NCHW44"_hash)) { return channel_wise_nchw44::stride1::get_kimpls(param); @@ -94,20 +94,20 @@ ConvBiasImpl::AlgoS8ChanWiseStride1NCHW44::dispatch_kerns( /* ===================== stride2 algo ===================== */ bool ConvBiasImpl::AlgoS8ChanWiseStride2NCHW44::usable( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy) const { return channel_wise_nchw44::stride2::is_available(param); } size_t ConvBiasImpl::AlgoS8ChanWiseStride2NCHW44::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { auto bundle = channel_wise_nchw44::stride2::get_bundle(param); return bundle.total_size_in_bytes(); } SmallVector ConvBiasImpl::AlgoS8ChanWiseStride2NCHW44::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, midout_iv("AlgoS8ChanWiseStride2NCHW44"_hash)) { return channel_wise_nchw44::stride2::get_kimpls(param); @@ -118,7 +118,7 @@ ConvBiasImpl::AlgoS8ChanWiseStride2NCHW44::dispatch_kerns( /* ===================== stride2 algo ===================== */ bool ConvBiasImpl::AlgoS8DirectStride2::usable( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { bool avaible = direct_int8_stride2::can_conv_direct_stride2_int8(param); if (algo_selection_strategy == @@ -130,14 +130,14 @@ bool ConvBiasImpl::AlgoS8DirectStride2::usable( } size_t ConvBiasImpl::AlgoS8DirectStride2::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { auto bundle = direct_int8_stride2::get_bundle(param, m_large_group); return bundle.total_size_in_bytes(); } SmallVector ConvBiasImpl::AlgoS8DirectStride2::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, 1, 1) { return direct_int8_stride2::get_kimpls(param, m_large_group); } @@ -148,7 +148,7 @@ ConvBiasImpl::AlgoS8DirectStride2::dispatch_kerns( #if __ARM_FEATURE_DOTPROD /* ===================== dot stride1 algo ======================== */ bool ConvBiasImpl::AlgoDotS8DirectStride1::usable( - FallbackConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { bool avaible = direct_dotprod_int8_stride1::can_conv_direct_stride1_int8(param); @@ -163,14 +163,14 @@ bool ConvBiasImpl::AlgoDotS8DirectStride1::usable( } size_t ConvBiasImpl::AlgoDotS8DirectStride1::get_workspace( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { auto bundle = direct_dotprod_int8_stride1::get_bundle(param, m_large_group); return bundle.total_size_in_bytes(); } SmallVector ConvBiasImpl::AlgoDotS8DirectStride1::dispatch_kerns( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, 2, 1) { return direct_dotprod_int8_stride1::get_kimpls(param, m_large_group); } @@ -180,7 +180,7 @@ ConvBiasImpl::AlgoDotS8DirectStride1::dispatch_kerns( /* ===================== dot stride2 algo ======================== */ bool ConvBiasImpl::AlgoDotS8DirectStride2::usable( - FallbackConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { bool avaible = direct_dotprod_int8_stride2::can_conv_direct_stride2_int8(param); @@ -193,14 +193,14 @@ bool ConvBiasImpl::AlgoDotS8DirectStride2::usable( } size_t ConvBiasImpl::AlgoDotS8DirectStride2::get_workspace( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { auto bundle = direct_dotprod_int8_stride2::get_bundle(param, m_large_group); return bundle.total_size_in_bytes(); } SmallVector ConvBiasImpl::AlgoDotS8DirectStride2::dispatch_kerns( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, 2, 2) { return direct_dotprod_int8_stride2::get_kimpls(param, m_large_group); } @@ -212,7 +212,7 @@ ConvBiasImpl::AlgoDotS8DirectStride2::dispatch_kerns( /* ======================= AlgoS8WinogradF23_8x8 ======================== */ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { if (param.filter_meta.icpg % 8 != 0 || param.filter_meta.ocpg % 8 != 0) return false; @@ -225,13 +225,14 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable( .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && m_matmul_algo->packmode() == PackMode::NO_PACK && - ((opr->param().format == param::ConvBias::Format::NCHW && + ((param.filter_meta.format == param::ConvBias::Format::NCHW && param.filter_type.enumv() == DTypeEnum::QuantizedS8) || - (opr->param().format == param::ConvBias::Format::NCHW_WINOGRAD && - opr->param().output_block_size == 2 && + (param.filter_meta.format == + param::ConvBias::Format::NCHW_WINOGRAD && + param.output_block_size == 2 && param.winograd_matmul_format == param::MatrixMul::Format::MK8 && param.filter_type.enumv() == DTypeEnum::QuantizedS16)) && - opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && + !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && (param.filter_meta.stride[0] == param.filter_meta.stride[1] && @@ -251,7 +252,7 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoS8WinogradF23_8x8, //=========================== input int8 compute float32 ========= bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MEGDNN_MARK_USED_VAR(param); MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, @@ -270,14 +271,14 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( .get_matmul_kern_param(param)); return is_matmul_usable && m_matmul_algo->packmode() == PackMode::NO_PACK && - ((opr->param().format == param::ConvBias::Format::NCHW44 && + ((param.filter_meta.format == param::ConvBias::Format::NCHW44 && param.filter_type.enumv() == DTypeEnum::QuantizedS8) || - ((opr->param().format == + ((param.filter_meta.format == param::ConvBias::Format::NCHW44_WINOGRAD) && - opr->param().output_block_size == 2 && + param.output_block_size == 2 && param.winograd_matmul_format == param::MatrixMul::Format::MK4)) && - opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && + !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && (param.filter_meta.stride[0] == param.filter_meta.stride[1] && @@ -302,40 +303,42 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoS8CF32WinogradF23_4x4_NCHW44, /* ======================= AlgoS8WinogradF23_8x8_NCHW44 ======================== */ bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { - MIDOUT_BEGIN( megdnn_arm_common_conv_bias_int8, - midout_iv( - "arm_common_AlgoS8WinogradF23_8x8_NCHW44::usable"_hash)) { - if (param.filter_meta.icpg % 8 != 0 || param.filter_meta.ocpg % 8 != 0) - return false; - using Strategy = winograd::winograd_2x3_8x8_s8_nchw44; - Strategy strategy(param.src_type, param.filter_type, param.dst_type); - auto&& matmul_param = - megdnn::winograd::ConvBias( - strategy, m_tile_size, param) - .get_matmul_kern_param(param); - bool is_matmul_usable = m_matmul_algo->usable(matmul_param); - return is_matmul_usable && - ((opr->param().format == param::ConvBias::Format::NCHW44 && - param.filter_type.enumv() == DTypeEnum::QuantizedS8) || - (opr->param().format == param::ConvBias::Format::NCHW44_WINOGRAD && - opr->param().output_block_size == 2 && - param.winograd_matmul_format == param::MatrixMul::Format::MK8 && - param.filter_type.enumv() == DTypeEnum::QuantizedS16)) && - opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && - (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && - param.filter_meta.spatial[0] == 3) && - (param.filter_meta.stride[0] == param.filter_meta.stride[1] && - param.filter_meta.stride[0] == 1) && - (param.filter_meta.dilation[0] == param.filter_meta.dilation[1] && - param.filter_meta.dilation[0] == 1) && - param.compute_mode == param::ConvBias::ComputeMode::DEFAULT && - param.src_type.enumv() == DTypeEnum::QuantizedS8 && - param.bias_type.enumv() == DTypeEnum::QuantizedS32 && - param.dst_type.enumv() == DTypeEnum::QuantizedS8; + midout_iv("arm_common_AlgoS8WinogradF23_8x8_NCHW44::usable"_hash)) { + if (param.filter_meta.icpg % 8 != 0 || param.filter_meta.ocpg % 8 != 0) + return false; + using Strategy = winograd::winograd_2x3_8x8_s8_nchw44; + Strategy strategy(param.src_type, param.filter_type, param.dst_type); + auto&& matmul_param = + megdnn::winograd::ConvBias( + strategy, m_tile_size, param) + .get_matmul_kern_param(param); + bool is_matmul_usable = m_matmul_algo->usable(matmul_param); + return is_matmul_usable && + ((param.filter_meta.format == param::ConvBias::Format::NCHW44 && + param.filter_type.enumv() == DTypeEnum::QuantizedS8) || + (param.filter_meta.format == + param::ConvBias::Format::NCHW44_WINOGRAD && + param.output_block_size == 2 && + param.winograd_matmul_format == + param::MatrixMul::Format::MK8 && + param.filter_type.enumv() == DTypeEnum::QuantizedS16)) && + !param.filter_meta.should_flip && + (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && + param.filter_meta.spatial[0] == 3) && + (param.filter_meta.stride[0] == param.filter_meta.stride[1] && + param.filter_meta.stride[0] == 1) && + (param.filter_meta.dilation[0] == + param.filter_meta.dilation[1] && + param.filter_meta.dilation[0] == 1) && + param.compute_mode == param::ConvBias::ComputeMode::DEFAULT && + param.src_type.enumv() == DTypeEnum::QuantizedS8 && + param.bias_type.enumv() == DTypeEnum::QuantizedS32 && + param.dst_type.enumv() == DTypeEnum::QuantizedS8; } MIDOUT_END(); return false; diff --git a/dnn/src/arm_common/conv_bias/int8/algos.h b/dnn/src/arm_common/conv_bias/int8/algos.h index a2dc2b449490e1b7733b29be8f0a56c820cba9b5..9bbdb194d10ac6a0cb40d2314eb95f28e8b791bd 100644 --- a/dnn/src/arm_common/conv_bias/int8/algos.h +++ b/dnn/src/arm_common/conv_bias/int8/algos.h @@ -26,16 +26,13 @@ public: const char* name() const override { return m_large_group ? "S8STRD1_LARGE_GROUP" : "S8STRD1_SMALL_GROUP"; } - bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; - bool is_preferred(megdnn::fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + bool is_preferred(const NCBKernSizeParam& param) const override; }; class ConvBiasImpl::AlgoS8DirectStride2 final : public AlgoBase { @@ -47,13 +44,11 @@ public: const char* name() const override { return m_large_group ? "S8STRD2_LARGE_GROUP" : "S8STRD2_SMALL_GROUP"; } - bool usable(fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; @@ -62,15 +57,12 @@ public: AlgoS8DirectNCHW44() {} bool is_reproducible() const override { return true; } const char* name() const override { return "S8_NCHW44_DIRECT"; } - bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; - bool is_preferred(megdnn::fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + bool is_preferred(const NCBKernSizeParam& param) const override; }; class ConvBiasImpl::AlgoS8DirectNCHWNCHW44 final : public AlgoBase { @@ -78,27 +70,22 @@ public: AlgoS8DirectNCHWNCHW44() {} bool is_reproducible() const override { return true; } const char* name() const override { return "S8_CONV_NCHW_NCHW44"; } - bool usable(fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; - bool is_preferred(megdnn::fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + bool is_preferred(const NCBKernSizeParam& param) const override; }; class ConvBiasImpl::AlgoS8ChanWiseStride1NCHW44 final : public AlgoBase { public: bool is_reproducible() const override { return true; } const char* name() const override { return "S8_CHAN_WISE_STRD1_NCHW44"; } - bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; @@ -106,12 +93,10 @@ class ConvBiasImpl::AlgoS8ChanWiseStride2NCHW44 final : public AlgoBase { public: bool is_reproducible() const override { return true; } const char* name() const override { return "S8_CHAN_WISE_STRD2_NCHW44"; } - bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; @@ -121,13 +106,11 @@ class ConvBiasImpl::AlgoDotS8DirectNCHWNCHW44 final : public AlgoBase { public: bool is_reproducible() const override { return true; } const char* name() const override { return "ARMDOTS8_NCHW_NCHW44"; } - bool usable(FallbackConvBiasImpl*, const NCBKernSizeParam&, + bool usable(const NCBKernSizeParam&, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(FallbackConvBiasImpl*, - const NCBKernSizeParam&) const override; + size_t get_workspace(const NCBKernSizeParam&) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; @@ -142,13 +125,11 @@ public: return m_large_group ? "ARMDOTS8STRD1_LARGE_GROUP" : "ARMDOTS8STRD1_SMALL_GROUP"; } - bool usable(FallbackConvBiasImpl*, const NCBKernSizeParam&, + bool usable(const NCBKernSizeParam&, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(FallbackConvBiasImpl*, - const NCBKernSizeParam&) const override; + size_t get_workspace(const NCBKernSizeParam&) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; @@ -163,13 +144,11 @@ public: : "ARMDOTS8STRD2_SMALL_GROUP"; } - bool usable(FallbackConvBiasImpl*, const NCBKernSizeParam&, + bool usable(const NCBKernSizeParam&, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(FallbackConvBiasImpl*, - const NCBKernSizeParam&) const override; + size_t get_workspace(const NCBKernSizeParam&) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; @@ -178,21 +157,16 @@ public: AlgoDotS8Direct_NCHW44() {} bool is_reproducible() const override { return true; } - const char* name() const override { - return "ARMDOTS8DIRECT_NCHW44"; - } - bool usable(FallbackConvBiasImpl*, const NCBKernSizeParam&, + const char* name() const override { return "ARMDOTS8DIRECT_NCHW44"; } + bool usable(const NCBKernSizeParam&, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(FallbackConvBiasImpl*, - const NCBKernSizeParam&) const override; + size_t get_workspace(const NCBKernSizeParam&) const override; SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; - bool is_preferred(megdnn::fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + bool is_preferred(const NCBKernSizeParam& param) const override; }; #endif diff --git a/dnn/src/arm_common/conv_bias/int8/direct_dotprod_nchw44_algo.cpp b/dnn/src/arm_common/conv_bias/int8/direct_dotprod_nchw44_algo.cpp index f7b08374d059d64486e9cb75bd198fafa5aa49e4..00829c9e554680ae37ab41e081a12b98d1835c39 100644 --- a/dnn/src/arm_common/conv_bias/int8/direct_dotprod_nchw44_algo.cpp +++ b/dnn/src/arm_common/conv_bias/int8/direct_dotprod_nchw44_algo.cpp @@ -161,7 +161,7 @@ static void conv_kern(const WorkspaceBundle& bundle, } // namespace bool ConvBiasImpl::AlgoDotS8Direct_NCHW44::usable( - FallbackConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { MEGDNN_MARK_USED_VAR(algo_selection_strategy); auto&& fm = param.filter_meta; @@ -199,19 +199,19 @@ bool ConvBiasImpl::AlgoDotS8Direct_NCHW44::usable( } bool ConvBiasImpl::AlgoDotS8Direct_NCHW44::is_preferred( - megdnn::fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MEGDNN_MARK_USED_VAR(param); return true; } size_t ConvBiasImpl::AlgoDotS8Direct_NCHW44::get_workspace( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { return get_bundle(param).total_size_in_bytes(); } SmallVector ConvBiasImpl::AlgoDotS8Direct_NCHW44::dispatch_kerns( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, midout_iv("ALGODOTS8DIRECT_NCHW44"_hash)) { auto fm = param.filter_meta; diff --git a/dnn/src/arm_common/conv_bias/int8/direct_nchw44_algo.cpp b/dnn/src/arm_common/conv_bias/int8/direct_nchw44_algo.cpp index 0626b23ee8ab71de16809cd47bcfdd7bf3c35246..54a2f431bb57bb6b767772465bd5a3ad920b6322 100644 --- a/dnn/src/arm_common/conv_bias/int8/direct_nchw44_algo.cpp +++ b/dnn/src/arm_common/conv_bias/int8/direct_nchw44_algo.cpp @@ -189,7 +189,7 @@ static void do_conv_kern(const WorkspaceBundle& bundle, } bool ConvBiasImpl::AlgoS8DirectNCHW44::usable( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { MEGDNN_MARK_USED_VAR(algo_selection_strategy); auto&& fm = param.filter_meta; @@ -213,22 +213,20 @@ bool ConvBiasImpl::AlgoS8DirectNCHW44::usable( } bool ConvBiasImpl::AlgoS8DirectNCHW44::is_preferred( - megdnn::fallback::ConvBiasImpl* conv_bias_impl_ptr, - const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { // TODO: benchmark and fix - MEGDNN_MARK_USED_VAR(conv_bias_impl_ptr); MEGDNN_MARK_USED_VAR(param); return false; } size_t ConvBiasImpl::AlgoS8DirectNCHW44::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { return get_bundle(param).total_size_in_bytes(); } SmallVector ConvBiasImpl::AlgoS8DirectNCHW44::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { auto fm = param.filter_meta; size_t N = param.n; size_t IC = fm.icpg; diff --git a/dnn/src/arm_common/conv_bias/int8/direct_nchw_nchw44_algo.cpp b/dnn/src/arm_common/conv_bias/int8/direct_nchw_nchw44_algo.cpp index 93999a6124d901def40052e15c8639e4ec196955..ea4265c50f030ef8485e691047cd1657453705be 100644 --- a/dnn/src/arm_common/conv_bias/int8/direct_nchw_nchw44_algo.cpp +++ b/dnn/src/arm_common/conv_bias/int8/direct_nchw_nchw44_algo.cpp @@ -214,7 +214,7 @@ static void do_conv_kern(const WorkspaceBundle& bundle, } bool ConvBiasImpl::AlgoS8DirectNCHWNCHW44::usable( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { MEGDNN_MARK_USED_VAR(algo_selection_strategy); auto&& fm = param.filter_meta; @@ -236,22 +236,20 @@ bool ConvBiasImpl::AlgoS8DirectNCHWNCHW44::usable( } bool ConvBiasImpl::AlgoS8DirectNCHWNCHW44::is_preferred( - megdnn::fallback::ConvBiasImpl* conv_bias_impl_ptr, const NCBKernSizeParam& param) const { // TODO: benchmark and fix - MEGDNN_MARK_USED_VAR(conv_bias_impl_ptr); MEGDNN_MARK_USED_VAR(param); return false; } size_t ConvBiasImpl::AlgoS8DirectNCHWNCHW44::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { return get_bundle(param).total_size_in_bytes(); } SmallVector ConvBiasImpl::AlgoS8DirectNCHWNCHW44::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { auto fm = param.filter_meta; size_t N = param.n; size_t OC = fm.ocpg; diff --git a/dnn/src/arm_common/conv_bias/int8/dot_direct_nchw_nchw44_algo.cpp b/dnn/src/arm_common/conv_bias/int8/dot_direct_nchw_nchw44_algo.cpp index 798dc967e45c2b042909cc95bffac6ff2c42f032..ec69a87d678c77fb16a23c49d90a3f5102d6ba1b 100644 --- a/dnn/src/arm_common/conv_bias/int8/dot_direct_nchw_nchw44_algo.cpp +++ b/dnn/src/arm_common/conv_bias/int8/dot_direct_nchw_nchw44_algo.cpp @@ -172,8 +172,7 @@ static void do_conv_kern(const WorkspaceBundle& bundle, } // namespace bool ConvBiasImpl::AlgoDotS8DirectNCHWNCHW44::usable( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param, - AlgoSelectionStrategy) const { + const NCBKernSizeParam& param, AlgoSelectionStrategy) const { auto&& fm = param.filter_meta; auto fh = fm.spatial[0]; int oc = fm.ocpg; @@ -194,13 +193,13 @@ bool ConvBiasImpl::AlgoDotS8DirectNCHWNCHW44::usable( } size_t ConvBiasImpl::AlgoDotS8DirectNCHWNCHW44::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { return get_bundle(param).total_size_in_bytes(); } SmallVector ConvBiasImpl::AlgoDotS8DirectNCHWNCHW44::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { auto fm = param.filter_meta; const int batch = param.n; const int group = fm.group; diff --git a/dnn/src/arm_common/conv_bias/int8x8x16/algos.cpp b/dnn/src/arm_common/conv_bias/int8x8x16/algos.cpp index c8ba37e210ef77790ba30058b84bce8cac6aed61..f9bf53b3130f99254643b699b715c27763517785 100644 --- a/dnn/src/arm_common/conv_bias/int8x8x16/algos.cpp +++ b/dnn/src/arm_common/conv_bias/int8x8x16/algos.cpp @@ -83,7 +83,7 @@ void get_rectified_size_str2(size_t IH, size_t IW, size_t OH, size_t OW, /* ===================== direct algo ===================== */ bool ConvBiasImpl::AlgoI8x8x16Direct::usable( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 1, 0) { auto&& fm = param.filter_meta; @@ -129,7 +129,7 @@ WorkspaceBundle ConvBiasImpl::AlgoI8x8x16Direct::get_bundle( return {nullptr, {part0, part1}}; } size_t ConvBiasImpl::AlgoI8x8x16Direct::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 1, 1) { auto bundle = get_bundle(param); return bundle.total_size_in_bytes(); @@ -293,7 +293,7 @@ SmallVector ConvBiasImpl::AlgoI8x8x16Direct::get_kimpls( } SmallVector ConvBiasImpl::AlgoI8x8x16Direct::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 1, 2) { return get_kimpls(param); } @@ -303,7 +303,7 @@ ConvBiasImpl::AlgoI8x8x16Direct::dispatch_kerns( /* ===================== stride-2 algo ===================== */ bool ConvBiasImpl::AlgoI8x8x16Stride2::usable( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 2, 0) { auto&& fm = param.filter_meta; @@ -350,7 +350,7 @@ WorkspaceBundle ConvBiasImpl::AlgoI8x8x16Stride2::get_bundle( return {nullptr, {part0, part1}}; } size_t ConvBiasImpl::AlgoI8x8x16Stride2::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 2, 1) { auto bundle = get_bundle(param); return bundle.total_size_in_bytes(); @@ -513,7 +513,7 @@ SmallVector ConvBiasImpl::AlgoI8x8x16Stride2::get_kimpls( } SmallVector ConvBiasImpl::AlgoI8x8x16Stride2::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 2, 2) { return get_kimpls(param); } @@ -521,7 +521,7 @@ ConvBiasImpl::AlgoI8x8x16Stride2::dispatch_kerns( return {}; } bool ConvBiasImpl::AlgoI8x8x16Stride2Filter2::usable( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 3, 0) { return param.bias_mode == BiasMode::NO_BIAS && @@ -534,7 +534,7 @@ bool ConvBiasImpl::AlgoI8x8x16Stride2Filter2::usable( } size_t ConvBiasImpl::AlgoI8x8x16Stride2Filter2::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 3, 1) { return conv_bias::get_workspace_in_bytes_conv_int8x8x16_stride2_flt2( param); @@ -545,7 +545,7 @@ size_t ConvBiasImpl::AlgoI8x8x16Stride2Filter2::get_workspace( SmallVector ConvBiasImpl::AlgoI8x8x16Stride2Filter2::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { // return {conv_bias::conv_int8x8x16_stride2_flt2,true}; auto kern = [](const NCBKernParam& param, const NCBKernIndex& ncb_index) { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8816_kimpl, 3, 2) { diff --git a/dnn/src/arm_common/conv_bias/int8x8x16/algos.h b/dnn/src/arm_common/conv_bias/int8x8x16/algos.h index cec502586128c4e894a95b6671b5e6af16bea6a7..acabe888dd4355791f97ffb3eef849f128f2efef 100644 --- a/dnn/src/arm_common/conv_bias/int8x8x16/algos.h +++ b/dnn/src/arm_common/conv_bias/int8x8x16/algos.h @@ -35,12 +35,10 @@ public: return m_large_group ? "I8816DIRECT_LARGE_GROUP" : "I8816DIRECT_SMALL_GROUP"; } - bool usable(fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; @@ -64,13 +62,11 @@ public: return m_large_group ? "I8816STRD2_LARGE_GROUP" : "I8816STRD2_SMALL_GROUP"; } - bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; @@ -79,13 +75,11 @@ public: bool is_reproducible() const override { return true; } const char* name() const override { return "I8816STRD2F2"; } - bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; diff --git a/dnn/src/arm_common/conv_bias/opr_impl.cpp b/dnn/src/arm_common/conv_bias/opr_impl.cpp index 2c88b4004583322a0801cebb16ffbba38c7313a0..cae7c4ae6671345089208dc7146b46661adbef30 100644 --- a/dnn/src/arm_common/conv_bias/opr_impl.cpp +++ b/dnn/src/arm_common/conv_bias/opr_impl.cpp @@ -232,7 +232,7 @@ void* const ConvBiasImpl::sm_arm_common_algo_type = &arm_common_algo_type_storage; bool ConvBiasImpl::is_matmul_quantized_prefer( - const ConvBiasImpl::NCBKernSizeParam& param) { + const ConvBiasImpl::NCBKernSizeParam& param) const { // fallback::ConvBiasImpl::NCBKernParam conv_ncb_param; fallback::ConvBiasImpl::NCBKernSizeParam conv_ncb_param( param, 0, param::MatrixMul::Format::DEFAULT, {}, 0, diff --git a/dnn/src/arm_common/conv_bias/opr_impl.h b/dnn/src/arm_common/conv_bias/opr_impl.h index 50b73a4b2a8cb99dacac6736840d0cc7ef1a2a77..f50762e449be7840cc492905608bf676b06a025c 100644 --- a/dnn/src/arm_common/conv_bias/opr_impl.h +++ b/dnn/src/arm_common/conv_bias/opr_impl.h @@ -27,7 +27,7 @@ public: SmallVector algo_pack() override; bool is_matmul_quantized_prefer( - const ConvBiasImpl::NCBKernSizeParam& ncb_param) override; + const ConvBiasImpl::NCBKernSizeParam& ncb_param) const override; class AlgoPack; protected: diff --git a/dnn/src/arm_common/conv_bias/quint8/algos.cpp b/dnn/src/arm_common/conv_bias/quint8/algos.cpp index 8c743da93bd238b92ae8252df68d8d10aeb732a4..ae48792f51c6f71374db2cf33e1fa7d0ed936f41 100644 --- a/dnn/src/arm_common/conv_bias/quint8/algos.cpp +++ b/dnn/src/arm_common/conv_bias/quint8/algos.cpp @@ -6,17 +6,18 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #include "src/arm_common/conv_bias/quint8/algos.h" +#include "midout.h" #include "src/arm_common/conv_bias/quint8/stride1.h" -#include "src/arm_common/conv_bias/quint8/stride2.h" #include "src/arm_common/conv_bias/quint8/stride1_dotprod.h" +#include "src/arm_common/conv_bias/quint8/stride2.h" #include "src/arm_common/conv_bias/quint8/stride2_dotprod.h" #include "src/arm_common/elemwise_op.h" #include "src/fallback/conv_bias/common.h" -#include "midout.h" MIDOUT_DECL(megdnn_arm_common_conv_bias_quint8) @@ -25,7 +26,7 @@ using namespace arm_common; /* ===================== stride1 algo ===================== */ bool ConvBiasImpl::AlgoQU8DirectStride1::usable( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { bool avaible = direct_quint8_stride1::can_conv_direct_stride1_quint8(param); if (algo_selection_strategy == @@ -37,14 +38,14 @@ bool ConvBiasImpl::AlgoQU8DirectStride1::usable( } size_t ConvBiasImpl::AlgoQU8DirectStride1::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { auto bundle = direct_quint8_stride1::get_bundle(param, m_large_group); return bundle.total_size_in_bytes(); } SmallVector ConvBiasImpl::AlgoQU8DirectStride1::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_quint8, 0, 0) { return direct_quint8_stride1::get_kimpls(param, m_large_group); } @@ -54,7 +55,7 @@ ConvBiasImpl::AlgoQU8DirectStride1::dispatch_kerns( /* ===================== stride2 algo ===================== */ bool ConvBiasImpl::AlgoQU8DirectStride2::usable( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { bool avaible = direct_quint8_stride2::can_conv_direct_stride2_quint8(param); if (algo_selection_strategy == @@ -66,14 +67,14 @@ bool ConvBiasImpl::AlgoQU8DirectStride2::usable( } size_t ConvBiasImpl::AlgoQU8DirectStride2::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { auto bundle = direct_quint8_stride2::get_bundle(param, m_large_group); return bundle.total_size_in_bytes(); } SmallVector ConvBiasImpl::AlgoQU8DirectStride2::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_quint8, 0, 1) { return direct_quint8_stride2::get_kimpls(param, m_large_group); } @@ -83,7 +84,7 @@ ConvBiasImpl::AlgoQU8DirectStride2::dispatch_kerns( #if __ARM_FEATURE_DOTPROD /* ===================== stride1 algo ===================== */ bool ConvBiasImpl::AlgoDotU8DirectStride1::usable( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { bool avaible = direct_dotprod_quint8_stride1::can_conv_direct_stride1_quint8( @@ -97,7 +98,7 @@ bool ConvBiasImpl::AlgoDotU8DirectStride1::usable( } size_t ConvBiasImpl::AlgoDotU8DirectStride1::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { auto bundle = direct_dotprod_quint8_stride1::get_bundle(param, m_large_group); return bundle.total_size_in_bytes(); @@ -105,7 +106,7 @@ size_t ConvBiasImpl::AlgoDotU8DirectStride1::get_workspace( SmallVector ConvBiasImpl::AlgoDotU8DirectStride1::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_quint8, 1, 0) { return direct_dotprod_quint8_stride1::get_kimpls(param, m_large_group); } @@ -115,7 +116,7 @@ ConvBiasImpl::AlgoDotU8DirectStride1::dispatch_kerns( /* ===================== stride2 algo ===================== */ bool ConvBiasImpl::AlgoDotU8DirectStride2::usable( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { bool avaible = direct_dotprod_quint8_stride2::can_conv_direct_stride2_quint8( @@ -129,7 +130,7 @@ bool ConvBiasImpl::AlgoDotU8DirectStride2::usable( } size_t ConvBiasImpl::AlgoDotU8DirectStride2::get_workspace( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { auto bundle = direct_dotprod_quint8_stride2::get_bundle(param, m_large_group); return bundle.total_size_in_bytes(); @@ -137,7 +138,7 @@ size_t ConvBiasImpl::AlgoDotU8DirectStride2::get_workspace( SmallVector ConvBiasImpl::AlgoDotU8DirectStride2::dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_arm_common_conv_bias_quint8, 1, 1) { return direct_dotprod_quint8_stride2::get_kimpls(param, m_large_group); } diff --git a/dnn/src/arm_common/conv_bias/quint8/algos.h b/dnn/src/arm_common/conv_bias/quint8/algos.h index 33a457e6358434048d14f1f9f6056e58ba4c0e1e..2ff7dcdfa465aa0daa8fc0c28105b4d595a69db0 100644 --- a/dnn/src/arm_common/conv_bias/quint8/algos.h +++ b/dnn/src/arm_common/conv_bias/quint8/algos.h @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #pragma once @@ -26,13 +27,11 @@ public: return m_large_group ? "QU8STRD1_LARGE_GROUP" : "QU8STRD1_SMALL_GROUP"; } - bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; @@ -45,16 +44,14 @@ public: const char* name() const override { return m_large_group ? "QU8STRD2_LARGE_GROUP" : "QU8STRD2_SMALL_GROUP"; } - bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; -#if __ARM_FEATURE_DOTPROD +#if __ARM_FEATURE_DOTPROD class ConvBiasImpl::AlgoDotU8DirectStride1 final : public AlgoBase { bool m_large_group; @@ -66,13 +63,11 @@ public: : "ARMDOTU8STRD1_SMALL_GROUP"; } - bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; @@ -86,13 +81,11 @@ public: return m_large_group ? "ARMDOTU8STRD2_LARGE_GROUP" : "ARMDOTU8STRD2_SMALL_GROUP"; } - bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(fallback::ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; }; #endif diff --git a/dnn/src/armv7/conv_bias/int8/algos.cpp b/dnn/src/armv7/conv_bias/int8/algos.cpp index 5ead3cf11f8614ee7e6c18394af3dddf3dcea7ca..e4e9450cfac3f83e26d77f95234f31954b3e603d 100644 --- a/dnn/src/armv7/conv_bias/int8/algos.cpp +++ b/dnn/src/armv7/conv_bias/int8/algos.cpp @@ -26,9 +26,8 @@ using namespace armv7; /* ===================== matrix mul algo ===================== */ bool ConvBiasImpl::AlgoS8MatrixMul::usable( - FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { - MEGDNN_MARK_USED_VAR(opr); auto&& fm = param.filter_meta; return param.src_type.enumv() == DTypeEnum::QuantizedS8 && param.dst_type.enumv() == DTypeEnum::QuantizedS8 && diff --git a/dnn/src/armv7/conv_bias/int8/algos.h b/dnn/src/armv7/conv_bias/int8/algos.h index 9199584ef2d0ee22543e765f7409591d302fe3d5..8dc948bb2d4d4283521331dd3946b778f2d1c189 100644 --- a/dnn/src/armv7/conv_bias/int8/algos.h +++ b/dnn/src/armv7/conv_bias/int8/algos.h @@ -27,14 +27,12 @@ public: bool is_reproducible() const override { return true; } const char* name() const override { return "S8MATMUL"; } - bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(FallbackConvBiasImpl*, - const NCBKernSizeParam& param) const override { + size_t get_workspace(const NCBKernSizeParam& param) const override { return get_bundle(param).total_size_in_bytes(); } SmallVector dispatch_kerns( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const override { size_t group = param.filter_meta.group; return {{kimpl, {group, 1_z, 1_z}}}; diff --git a/dnn/src/armv7/conv_bias/quint8/algos.cpp b/dnn/src/armv7/conv_bias/quint8/algos.cpp index 648dda0528519d868c2de81e25f3b39d7036a2e9..9ba1f560efcc55d90280d7d1cbcafe482121796f 100644 --- a/dnn/src/armv7/conv_bias/quint8/algos.cpp +++ b/dnn/src/armv7/conv_bias/quint8/algos.cpp @@ -26,9 +26,8 @@ using namespace armv7; /* ===================== matrix mul algo ===================== */ bool ConvBiasImpl::AlgoQU8MatrixMul::usable( - FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { - MEGDNN_MARK_USED_VAR(opr); auto&& fm = param.filter_meta; return param.src_type.enumv() == DTypeEnum::Quantized8Asymm && param.dst_type.enumv() == DTypeEnum::Quantized8Asymm && diff --git a/dnn/src/armv7/conv_bias/quint8/algos.h b/dnn/src/armv7/conv_bias/quint8/algos.h index d7399cd15e5e64fdf9f99f023ace8d6d652bad68..b3e2c1315ba1f8a98d67ac179864aace52c1a40d 100644 --- a/dnn/src/armv7/conv_bias/quint8/algos.h +++ b/dnn/src/armv7/conv_bias/quint8/algos.h @@ -27,15 +27,13 @@ public: bool is_reproducible() const override { return true; } const char* name() const override { return "QU8MATMUL"; } - bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(FallbackConvBiasImpl*, - const NCBKernSizeParam& param) const override { + size_t get_workspace(const NCBKernSizeParam& param) const override { return get_bundle(param).total_size_in_bytes(); } SmallVector dispatch_kerns( - fallback::ConvBiasImpl* /*opr*/, const NCBKernSizeParam& param) const override { size_t group = param.filter_meta.group; return {{kimpl, {group, 1_z, 1_z}}}; diff --git a/dnn/src/fallback/conv_bias/algos.cpp b/dnn/src/fallback/conv_bias/algos.cpp index 4ec43d0ef619e03bee265e2912bded3ce60bb75e..80100c6a6b8d02e4e3c6d84bf0a575e6337bb6a5 100644 --- a/dnn/src/fallback/conv_bias/algos.cpp +++ b/dnn/src/fallback/conv_bias/algos.cpp @@ -10,6 +10,7 @@ */ #include "src/fallback/conv_bias/algos.h" +#include "megdnn/opr_param_defs.h" #include "src/common/opr_delegate.h" #include "src/fallback/conv_bias/winograd/strategy.h" #include "src/naive/convolution/helper.h" @@ -21,18 +22,28 @@ using namespace fallback; namespace { -param::Convolution get_param_convolution(const param::ConvBias param) { - param::Convolution ret{param.mode, param.pad_h, - param.pad_w, param.stride_h, - param.stride_w, param.dilate_h, - param.dilate_w, param::Convolution::Sparse::DENSE, - param.format}; - return ret; +param::Convolution get_param_convolution( + const ConvBiasImpl::NCBKernSizeParam& param) { + param::Convolution::Mode mode; + param::Convolution::Sparse sparse; + if (param.filter_meta.should_flip) { + mode = param::Convolution::Mode::CONVOLUTION; + } else { + mode = param::Convolution::Mode::CROSS_CORRELATION; + } + return param::Convolution{mode, + param.filter_meta.padding[0], + param.filter_meta.padding[1], + param.filter_meta.stride[0], + param.filter_meta.stride[1], + param.filter_meta.dilation[1], + param.filter_meta.dilation[0], + sparse = param::Convolution::Sparse::DENSE, + param.filter_meta.format}; } -TensorLayoutArray get_layouts(const param::ConvBias& param, - const ConvBiasImpl::NCBKernSizeParam& p) { - megdnn_assert(param.format == param::ConvBias::Format::NCHW); +TensorLayoutArray get_layouts(const ConvBiasImpl::NCBKernSizeParam& p) { + megdnn_assert(p.filter_meta.format == param::ConvBias::Format::NCHW); UNPACK_CONV_NCB_KERN_SIZES(p); MEGDNN_MARK_USED_VAR(SH); MEGDNN_MARK_USED_VAR(SW); @@ -53,14 +64,14 @@ TensorLayoutArray get_layouts(const param::ConvBias& param, return {src_layout, filter_layout, bias_layout, dst_layout}; } -void kern_default(param::ConvBias param, const ConvBiasImpl::NCBKernParam& p) { +void kern_default(const ConvBiasImpl::NCBKernParam& p) { dt_byte* workspace_ptr = static_cast(p.workspace_ptr); auto filter_meta_ptr = reinterpret_cast( &p.filter_meta); auto filter_meta = *filter_meta_ptr; - auto layouts = get_layouts(param, p); + auto layouts = get_layouts(p); TensorND src{reinterpret_cast(const_cast(p.src_ptr)), layouts[0]}; @@ -83,7 +94,7 @@ void kern_default(param::ConvBias param, const ConvBiasImpl::NCBKernParam& p) { bias.layout.dtype.enumv() == \ DTypeTrait::enumv) && \ sfb.layout.dtype.enumv() == DTypeTrait::enumv && \ - param.compute_mode == param::ConvBias::ComputeMode::cmode) { \ + p.compute_mode == param::ConvBias::ComputeMode::cmode) { \ func(src, filter, bias, sfb, workspace_ptr, filter_meta); \ } #define DISPATCH(in_dt, out_dt) \ @@ -118,7 +129,7 @@ void kern_default(param::ConvBias param, const ConvBiasImpl::NCBKernParam& p) { auto res = sfb; using NonlineMode = param::ConvBias::NonlineMode; - switch (param.nonlineMode) { + switch (p.nonlineMode) { #define cb(_mode) \ case NonlineMode::_mode: { \ if (res.layout.dtype.category() != DTypeCategory::QUANTIZED) { \ @@ -168,24 +179,23 @@ MIDOUT_DECL(megdnn_fallback_naive) /* ======================= AlgoNaive ======================== */ bool ConvBiasImpl::AlgoNaive::usable( - ConvBiasImpl* opr, const NCBKernSizeParam&, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MIDOUT_BEGIN(megdnn_fallback_naive, 0) { - return opr->param().format == param::ConvBias::Format::NCHW; + return param.filter_meta.format == param::ConvBias::Format::NCHW; } MIDOUT_END(); return false; } -size_t ConvBiasImpl::AlgoNaive::get_workspace(ConvBiasImpl* opr, - const NCBKernSizeParam& p) const { +size_t ConvBiasImpl::AlgoNaive::get_workspace(const NCBKernSizeParam& p) const { MIDOUT_BEGIN(megdnn_fallback_naive, 1) { - auto layouts = get_layouts(opr->param(), p); + auto layouts = get_layouts(p); //! When group>1 or n>1, this algo will parallel by group and n size_t nr_threads = p.nr_threads; auto conv_opr = inplace_cpu_handle()->create_operator(); - conv_opr->param() = get_param_convolution(opr->param()); + conv_opr->param() = get_param_convolution(p); if (p.dst_type.enumv() == DTypeEnum::QuantizedS8 || p.dst_type.enumv() == DTypeEnum::Quantized8Asymm) { TensorLayout conv_dst_layout; @@ -201,15 +211,14 @@ size_t ConvBiasImpl::AlgoNaive::get_workspace(ConvBiasImpl* opr, } SmallVector ConvBiasImpl::AlgoNaive::dispatch_kerns( - ConvBiasImpl* opr, const NCBKernSizeParam& p) const { - param::ConvBias opr_param = opr->param(); - size_t workspace_size = get_workspace(opr, p); + const NCBKernSizeParam& p) const { + size_t workspace_size = get_workspace(p); //! When group>1 or n>1, this algo will parallel by group and n size_t nr_threads = p.nr_threads; size_t GROUP = p.filter_meta.group; size_t N = p.n; size_t workspace_per_thread = workspace_size / nr_threads; - auto kern = [opr_param, workspace_per_thread]( + auto kern = [workspace_per_thread]( const NCBKernParam& param, const NCBKernIndex& ncb_index) { MIDOUT_BEGIN(megdnn_fallback_naive, 2) { @@ -224,7 +233,7 @@ SmallVector ConvBiasImpl::AlgoNaive::dispatch_kerns( thread_param.dst_ptr = param.dst(batch_id, group_id); thread_param.src_ptr = param.src(batch_id, group_id); thread_param.bias_ptr = param.bias(batch_id, group_id); - kern_default(opr_param, thread_param); + kern_default(thread_param); } MIDOUT_END(); }; @@ -235,10 +244,9 @@ MIDOUT_DECL(megdnn_fallback_winograd) /* ======================= AlgoWinogradF32 ======================== */ bool ConvBiasImpl::AlgoWinogradF32::usable( - ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MEGDNN_MARK_USED_VAR(param); - MEGDNN_MARK_USED_VAR(opr); MIDOUT_BEGIN(megdnn_fallback_winograd, 1, 0) { using Strategy = fallback::winograd::winograd_2x3_1x1_f; Strategy strategy(param.src_type, param.filter_type, param.dst_type); @@ -246,13 +254,13 @@ bool ConvBiasImpl::AlgoWinogradF32::usable( strategy, UNIT_TILE_SIZE, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (opr->param().format == param::ConvBias::Format::NCHW || - (opr->param().format == + (param.filter_meta.format == param::ConvBias::Format::NCHW || + (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD && - opr->param().output_block_size == 2 && + param.output_block_size == 2 && param.winograd_matmul_format == param::MatrixMul::Format::DEFAULT)) && - opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && + param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && (param.filter_meta.stride[0] == param.filter_meta.stride[1] && @@ -268,7 +276,7 @@ bool ConvBiasImpl::AlgoWinogradF32::usable( } size_t ConvBiasImpl::AlgoWinogradF32::get_workspace( - ConvBiasImpl*, const NCBKernSizeParam& p) const { + const NCBKernSizeParam& p) const { MEGDNN_MARK_USED_VAR(p); MIDOUT_BEGIN(megdnn_fallback_winograd, 1, 1) { fallback::winograd::winograd_2x3_1x1_f strategy( @@ -284,7 +292,7 @@ size_t ConvBiasImpl::AlgoWinogradF32::get_workspace( SmallVector ConvBiasImpl::AlgoWinogradF32::dispatch_kerns( - ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MEGDNN_MARK_USED_VAR(param); MIDOUT_BEGIN(megdnn_fallback_winograd, 1, 2) { fallback::winograd::winograd_2x3_1x1_f strategy( @@ -302,10 +310,9 @@ ConvBiasImpl::AlgoWinogradF32::dispatch_kerns( /* ======================= AlgoWinogradF32 4x4 ======================== */ bool ConvBiasImpl::AlgoWinogradF32_4x4::usable( - ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MEGDNN_MARK_USED_VAR(param); - MEGDNN_MARK_USED_VAR(opr); MIDOUT_BEGIN(megdnn_fallback_winograd, 2, 0) { if (param.filter_meta.icpg % 4 != 0 || param.filter_meta.ocpg % 4 != 0) return false; @@ -317,13 +324,13 @@ bool ConvBiasImpl::AlgoWinogradF32_4x4::usable( strategy, UNIT_TILE_SIZE, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (opr->param().format == param::ConvBias::Format::NCHW || - (opr->param().format == + (param.filter_meta.format == param::ConvBias::Format::NCHW || + (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD && - opr->param().output_block_size == 2 && + param.output_block_size == 2 && param.winograd_matmul_format == param::MatrixMul::Format::MK4)) && - opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && + param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && (param.filter_meta.stride[0] == param.filter_meta.stride[1] && @@ -339,7 +346,7 @@ bool ConvBiasImpl::AlgoWinogradF32_4x4::usable( } size_t ConvBiasImpl::AlgoWinogradF32_4x4::get_workspace( - ConvBiasImpl*, const NCBKernSizeParam& p) const { + const NCBKernSizeParam& p) const { MEGDNN_MARK_USED_VAR(p); MIDOUT_BEGIN(megdnn_fallback_winograd, 2, 1) { fallback::winograd::winograd_2x3_4x4_f strategy( @@ -356,7 +363,7 @@ size_t ConvBiasImpl::AlgoWinogradF32_4x4::get_workspace( SmallVector ConvBiasImpl::AlgoWinogradF32_4x4::dispatch_kerns( - ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MEGDNN_MARK_USED_VAR(param); MIDOUT_BEGIN(megdnn_fallback_winograd, 2, 2) { fallback::winograd::winograd_2x3_4x4_f strategy( @@ -374,10 +381,9 @@ ConvBiasImpl::AlgoWinogradF32_4x4::dispatch_kerns( /* ======================= AlgoWinogradQS8 ======================== */ bool ConvBiasImpl::AlgoWinogradQS8::usable( - ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MEGDNN_MARK_USED_VAR(param); - MEGDNN_MARK_USED_VAR(opr); MIDOUT_BEGIN(megdnn_fallback_winograd, 3, 0) { using Strategy = fallback::winograd::winograd_2x3_1x1_qs8; Strategy strategy(param.src_type, param.filter_type, param.dst_type); @@ -386,13 +392,13 @@ bool ConvBiasImpl::AlgoWinogradQS8::usable( .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (opr->param().format == param::ConvBias::Format::NCHW || - (opr->param().format == + (param.filter_meta.format == param::ConvBias::Format::NCHW || + (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD && - opr->param().output_block_size == 2 && + param.output_block_size == 2 && param.winograd_matmul_format == param::MatrixMul::Format::DEFAULT)) && - opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && + param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && (param.filter_meta.stride[0] == param.filter_meta.stride[1] && @@ -408,7 +414,7 @@ bool ConvBiasImpl::AlgoWinogradQS8::usable( } size_t ConvBiasImpl::AlgoWinogradQS8::get_workspace( - ConvBiasImpl*, const NCBKernSizeParam& p) const { + const NCBKernSizeParam& p) const { MEGDNN_MARK_USED_VAR(p); MIDOUT_BEGIN(megdnn_fallback_winograd, 3, 1) { fallback::winograd::winograd_2x3_1x1_qs8 strategy( @@ -424,7 +430,7 @@ size_t ConvBiasImpl::AlgoWinogradQS8::get_workspace( SmallVector ConvBiasImpl::AlgoWinogradQS8::dispatch_kerns( - ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MEGDNN_MARK_USED_VAR(param); MIDOUT_BEGIN(megdnn_fallback_winograd, 3, 2) { fallback::winograd::winograd_2x3_1x1_qs8 strategy( @@ -442,10 +448,9 @@ ConvBiasImpl::AlgoWinogradQS8::dispatch_kerns( /* ======================= AlgoWinogradQS8 8x8 ======================== */ bool ConvBiasImpl::AlgoWinogradQS8_8x8::usable( - ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MEGDNN_MARK_USED_VAR(param); - MEGDNN_MARK_USED_VAR(opr); MIDOUT_BEGIN(megdnn_fallback_winograd, 4, 0) { if (param.filter_meta.icpg % 8 != 0 || param.filter_meta.ocpg % 8 != 0) return false; @@ -457,13 +462,13 @@ bool ConvBiasImpl::AlgoWinogradQS8_8x8::usable( strategy, UNIT_TILE_SIZE, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (opr->param().format == param::ConvBias::Format::NCHW || - (opr->param().format == + (param.filter_meta.format == param::ConvBias::Format::NCHW || + (param.filter_meta.format == param::ConvBias::Format::NCHW_WINOGRAD && - opr->param().output_block_size == 2 && + param.output_block_size == 2 && param.winograd_matmul_format == param::MatrixMul::Format::MK8)) && - opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && + param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && (param.filter_meta.stride[0] == param.filter_meta.stride[1] && @@ -479,7 +484,7 @@ bool ConvBiasImpl::AlgoWinogradQS8_8x8::usable( } size_t ConvBiasImpl::AlgoWinogradQS8_8x8::get_workspace( - ConvBiasImpl*, const NCBKernSizeParam& p) const { + const NCBKernSizeParam& p) const { MEGDNN_MARK_USED_VAR(p); MIDOUT_BEGIN(megdnn_fallback_winograd, 4, 1) { fallback::winograd::winograd_2x3_8x8_qs8 strategy( @@ -496,7 +501,7 @@ size_t ConvBiasImpl::AlgoWinogradQS8_8x8::get_workspace( SmallVector ConvBiasImpl::AlgoWinogradQS8_8x8::dispatch_kerns( - ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MEGDNN_MARK_USED_VAR(param); MIDOUT_BEGIN(megdnn_fallback_winograd, 4, 2) { fallback::winograd::winograd_2x3_8x8_qs8 strategy( diff --git a/dnn/src/fallback/conv_bias/algos.h b/dnn/src/fallback/conv_bias/algos.h index 29a2d55d37f5ccf4938cdf97e95e7087ac727fcd..c0f95521ea91f4f823e932b7ce28e8f3bae6c914 100644 --- a/dnn/src/fallback/conv_bias/algos.h +++ b/dnn/src/fallback/conv_bias/algos.h @@ -22,12 +22,10 @@ class ConvBiasImpl::AlgoNaive final : public AlgoBase { public: bool is_reproducible() const override { return true; } const char* name() const override { return "FALLBACK_NAIVE"; } - bool usable(ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(ConvBiasImpl*, - const NCBKernSizeParam& param) const override; - SmallVector dispatch_kerns(ConvBiasImpl*, - const NCBKernSizeParam&) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; + SmallVector dispatch_kerns(const NCBKernSizeParam&) const override; }; class ConvBiasImpl::AlgoWinogradF32 final : public AlgoBase { @@ -43,12 +41,10 @@ public: } return m_name.c_str(); } - bool usable(ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(ConvBiasImpl*, - const NCBKernSizeParam& param) const override; - SmallVector dispatch_kerns(ConvBiasImpl*, - const NCBKernSizeParam&) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; + SmallVector dispatch_kerns(const NCBKernSizeParam&) const override; private: MatrixMulImpl::AlgoBase* m_matmul_algo; @@ -69,12 +65,10 @@ public: } return m_name.c_str(); } - bool usable(ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(ConvBiasImpl*, - const NCBKernSizeParam& param) const override; - SmallVector dispatch_kerns(ConvBiasImpl*, - const NCBKernSizeParam&) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; + SmallVector dispatch_kerns(const NCBKernSizeParam&) const override; private: MatrixMulImpl::AlgoBase* m_matmul_algo; @@ -95,12 +89,10 @@ public: } return m_name.c_str(); } - bool usable(ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(ConvBiasImpl*, - const NCBKernSizeParam& param) const override; - SmallVector dispatch_kerns(ConvBiasImpl*, - const NCBKernSizeParam&) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; + SmallVector dispatch_kerns(const NCBKernSizeParam&) const override; private: MatrixMulImpl::AlgoBase* m_matmul_algo; @@ -121,12 +113,10 @@ public: } return m_name.c_str(); } - bool usable(ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(ConvBiasImpl*, - const NCBKernSizeParam& param) const override; - SmallVector dispatch_kerns(ConvBiasImpl*, - const NCBKernSizeParam&) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; + SmallVector dispatch_kerns(const NCBKernSizeParam&) const override; private: MatrixMulImpl::AlgoBase* m_matmul_algo; diff --git a/dnn/src/fallback/conv_bias/common.h b/dnn/src/fallback/conv_bias/common.h index 620e053079c1225d8c9037daf1eca16fe6298f49..abe313dda963d30b2a60b242fef80d6ceab80d1f 100644 --- a/dnn/src/fallback/conv_bias/common.h +++ b/dnn/src/fallback/conv_bias/common.h @@ -140,22 +140,17 @@ using BiasMode = ConvBiasForward::BiasMode; #define MEGDNN_WINOGRAD_ALGO_FUN_DECLARE() \ bool is_reproducible() const override { return true; } \ - bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, \ + bool usable(const NCBKernSizeParam& param, \ AlgoSelectionStrategy algo_selection_strategy) const override; \ - size_t get_workspace(fallback::ConvBiasImpl*, \ - const NCBKernSizeParam& param) const override; \ - virtual SmallVector dispatch_kerns(fallback::ConvBiasImpl* opr, \ - const NCBKernSizeParam& param) \ + size_t get_workspace(const NCBKernSizeParam& param) const override; \ + virtual SmallVector dispatch_kerns(const NCBKernSizeParam& param) \ const override; \ SmallVector deduce_preprocessed_filter_layout( \ - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) \ - const override; \ - size_t get_preprocess_workspace(fallback::ConvBiasImpl*, \ - const NCBKernSizeParam& param) \ + const NCBKernSizeParam& param) const override; \ + size_t get_preprocess_workspace(const NCBKernSizeParam& param) \ const override; \ virtual SmallVector dispatch_preprocess_kerns( \ - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) \ - const override; \ + const NCBKernSizeParam& param) const override; \ \ private: \ fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; \ diff --git a/dnn/src/fallback/conv_bias/conv1x1/algos.cpp b/dnn/src/fallback/conv_bias/conv1x1/algos.cpp index 84a8bcb778ed04d25c558ce1ad1afb3a0fb9a1ad..af9fcb6015446a384dda1b7c6be52c365dd11066 100644 --- a/dnn/src/fallback/conv_bias/conv1x1/algos.cpp +++ b/dnn/src/fallback/conv_bias/conv1x1/algos.cpp @@ -48,7 +48,7 @@ size_t ConvBiasImpl::AlgoConv1x1::get_oc_tile_size_heuristic( } size_t ConvBiasImpl::AlgoConv1x1::get_workspace( - ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { size_t OH = param.osz[0]; size_t OW = param.osz[1]; size_t compt_oc_block_size = get_oc_tile_size_heuristic(param); @@ -90,7 +90,7 @@ size_t ConvBiasImpl::AlgoConv1x1::get_workspace( } SmallVector ConvBiasImpl::AlgoConv1x1::dispatch_kerns( - ConvBiasImpl* opr, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { SmallVector ret_kern; size_t OH = param.osz[0]; size_t OW = param.osz[1]; @@ -138,11 +138,11 @@ SmallVector ConvBiasImpl::AlgoConv1x1::dispatch_kerns( //! get thread bundle thread_bundle = utils::get_thread_bundle(param, matmul_bundle.get_size(2), - compt_oc_block_size); + compt_oc_block_size); Conv1x1StrategyBase* conv1x1_strategy = Conv1x1Factory::make_conv1x1_strategy(param, pack_mode, - opr->param().format); + param.filter_meta.format); auto kern_packA = [this, whole_bundle, matmul_bundle, param, compt_oc_block_size, conv1x1_strategy]( @@ -180,13 +180,12 @@ SmallVector ConvBiasImpl::AlgoConv1x1::dispatch_kerns( return ret_kern; } -bool ConvBiasImpl::AlgoConv1x1::usable(ConvBiasImpl* opr, - const NCBKernSizeParam& param, +bool ConvBiasImpl::AlgoConv1x1::usable(const NCBKernSizeParam& param, AlgoSelectionStrategy) const { MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 2) { - if (opr->param().format != param::ConvBias::Format::NCHW && - opr->param().format != param::ConvBias::Format::NCHW44 && - opr->param().format != param::ConvBias::Format::NCHW44_DOT) + if (param.filter_meta.format != param::ConvBias::Format::NCHW && + param.filter_meta.format != param::ConvBias::Format::NCHW44 && + param.filter_meta.format != param::ConvBias::Format::NCHW44_DOT) return false; size_t FH = param.filter_meta.spatial[0], @@ -199,7 +198,7 @@ bool ConvBiasImpl::AlgoConv1x1::usable(ConvBiasImpl* opr, if (FH != 1 || FW != 1 || PH || PW || SH != 1 || SW != 1) return false; - if(param.src_type.enumv() != param.filter_type.enumv()) { + if (param.src_type.enumv() != param.filter_type.enumv()) { return false; } @@ -225,8 +224,8 @@ bool ConvBiasImpl::AlgoConv1x1::usable(ConvBiasImpl* opr, } } - if (opr->param().format == param::ConvBias::Format::NCHW44 || - opr->param().format == param::ConvBias::Format::NCHW44_DOT) { + if (param.filter_meta.format == param::ConvBias::Format::NCHW44 || + param.filter_meta.format == param::ConvBias::Format::NCHW44_DOT) { if (param.filter_meta.icpg < 4_z || param.filter_meta.icpg == 1 || param.filter_meta.ocpg == 1) { return false; @@ -236,13 +235,14 @@ bool ConvBiasImpl::AlgoConv1x1::usable(ConvBiasImpl* opr, size_t OH = param.osz[0]; size_t OW = param.osz[1]; - MatrixMulImpl::KernSizeParam matmul_param = utils::get_matmul_kern_param( - param, OH * OW, get_oc_tile_size_heuristic(param)); + MatrixMulImpl::KernSizeParam matmul_param = + utils::get_matmul_kern_param(param, OH * OW, + get_oc_tile_size_heuristic(param)); bool matmul_usable = m_matmul_algo->usable(matmul_param); auto pack_mode = m_matmul_algo->packmode(); bool strategy_usable = Conv1x1Factory::can_make_conv1x1_strategy( - param, pack_mode, opr->param().format); + param, pack_mode, param.filter_meta.format); return matmul_usable && strategy_usable && (param.filter_meta.dilation[0] == @@ -255,7 +255,7 @@ bool ConvBiasImpl::AlgoConv1x1::usable(ConvBiasImpl* opr, } bool ConvBiasImpl::AlgoConv1x1::is_preferred( - ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { size_t OH = param.osz[0]; size_t OW = param.osz[1]; if (OH * OW != 1) { @@ -265,8 +265,8 @@ bool ConvBiasImpl::AlgoConv1x1::is_preferred( if (param.src_type.enumv() == DTypeEnum::Int8 && param.filter_type.enumv() == DTypeEnum::Int8 && param.dst_type.enumv() == DTypeEnum::Int16) { - return true; - } + return true; + } #elif MEGDNN_X86 size_t OC = param.filter_meta.ocpg; if (OC > 2 || param.src_type.enumv() == DTypeEnum::Float32) @@ -276,4 +276,4 @@ bool ConvBiasImpl::AlgoConv1x1::is_preferred( } } -// vim: syntax=cpp.doxygen \ No newline at end of file +// vim: syntax=cpp.doxygen diff --git a/dnn/src/fallback/conv_bias/conv1x1/algos.h b/dnn/src/fallback/conv_bias/conv1x1/algos.h index 860f1f4285106aecd9ae4f35391c5857f09207d7..f7bab4b0748f168927dfa51e432b80e80c5a7966 100644 --- a/dnn/src/fallback/conv_bias/conv1x1/algos.h +++ b/dnn/src/fallback/conv_bias/conv1x1/algos.h @@ -34,14 +34,13 @@ public: return m_name.c_str(); } - bool usable(ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; SmallVector dispatch_kerns( - ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; + const NCBKernSizeParam& param) const override; - bool is_preferred(ConvBiasImpl*, const NCBKernSizeParam&) const override; + bool is_preferred(const NCBKernSizeParam&) const override; protected: size_t get_oc_tile_size_heuristic(const NCBKernSizeParam& param) const; diff --git a/dnn/src/fallback/conv_bias/conv1x1/algos_conv1x1_gemv.cpp b/dnn/src/fallback/conv_bias/conv1x1/algos_conv1x1_gemv.cpp index ccfa2bade32b6e53333ef4d213e8e0281d04bfaa..82ab37473e2e4a438ac59988085d746103df1847 100644 --- a/dnn/src/fallback/conv_bias/conv1x1/algos_conv1x1_gemv.cpp +++ b/dnn/src/fallback/conv_bias/conv1x1/algos_conv1x1_gemv.cpp @@ -249,7 +249,7 @@ size_t ConvBiasImpl::AlgoConv1x1Gemv::get_oc_tile_size_heuristic( } size_t ConvBiasImpl::AlgoConv1x1Gemv::get_workspace( - ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_fallback_conv1x1_gemv, midout_iv("AlgoConv1x1Gemv::get_workspace"_hash)) { size_t compt_oc_block_size = get_oc_tile_size_heuristic(param); @@ -265,7 +265,7 @@ size_t ConvBiasImpl::AlgoConv1x1Gemv::get_workspace( SmallVector ConvBiasImpl::AlgoConv1x1Gemv::dispatch_kerns( - ConvBiasImpl* opr, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { SmallVector ret_kern; size_t OC = param.filter_meta.ocpg; size_t compt_oc_block_size = get_oc_tile_size_heuristic(param); @@ -311,7 +311,7 @@ ConvBiasImpl::AlgoConv1x1Gemv::dispatch_kerns( } \ MIDOUT_END() - switch (opr->param().format) { + switch (param.filter_meta.format) { case param::ConvBias::Format::NCHW: cb1(param::ConvBias::Format::NCHW, dt_float32, dt_float32, PostprocessMode::FLOAT, "NCHW::GEMV::FLOAT"_hash); @@ -401,18 +401,18 @@ ConvBiasImpl::AlgoConv1x1Gemv::dispatch_kerns( return ret_kern; } -bool ConvBiasImpl::AlgoConv1x1Gemv::usable(ConvBiasImpl* opr, - const NCBKernSizeParam& param, +bool ConvBiasImpl::AlgoConv1x1Gemv::usable(const NCBKernSizeParam& param, AlgoSelectionStrategy) const { MIDOUT_BEGIN(megdnn_fallback_conv1x1_gemv, midout_iv("AlgoConv1x1Gemv::usable"_hash)) { + auto format = param.filter_meta.format; #if MEGDNN_X86 - if (opr->param().format != param::ConvBias::Format::NCHW) + if (format != param::ConvBias::Format::NCHW) return false; #elif MEGDNN_AARCH64 || MEGDNN_ARMV7 - if (opr->param().format != param::ConvBias::Format::NCHW && - opr->param().format != param::ConvBias::Format::NCHW44 && - opr->param().format != param::ConvBias::Format::NCHW44_DOT) + if (format != param::ConvBias::Format::NCHW && + format != param::ConvBias::Format::NCHW44 && + format != param::ConvBias::Format::NCHW44_DOT) return false; #endif @@ -469,13 +469,13 @@ bool ConvBiasImpl::AlgoConv1x1Gemv::usable(ConvBiasImpl* opr, return false; } #if MEGDNN_AARCH64 || MEGDNN_ARMV7 - if (opr->param().format == param::ConvBias::Format::NCHW44) { + if (format == param::ConvBias::Format::NCHW44) { if (param.src_type.enumv() != DTypeEnum::Float32 && param.src_type.enumv() != DTypeEnum::Int8 && param.src_type.enumv() != DTypeEnum::QuantizedS8) { return false; } - } else if (opr->param().format == param::ConvBias::Format::NCHW44_DOT) { + } else if (format == param::ConvBias::Format::NCHW44_DOT) { if (param.src_type.enumv() != DTypeEnum::Int8 && param.src_type.enumv() != DTypeEnum::QuantizedS8) { return false; @@ -492,11 +492,11 @@ bool ConvBiasImpl::AlgoConv1x1Gemv::usable(ConvBiasImpl* opr, } bool ConvBiasImpl::AlgoConv1x1Gemv::is_preferred( - ConvBiasImpl* opr, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_fallback_conv1x1_gemv, midout_iv("AlgoConv1x1Gemv::is_preferred"_hash)) { #if (MEGDNN_ARMV7 || MEGDNN_AARCH64) - if (opr->param().format == param::ConvBias::Format::NCHW && + if (param.filter_meta.format == param::ConvBias::Format::NCHW && param.src_type.enumv() == DTypeEnum::Quantized8Asymm) { return false; } @@ -507,4 +507,4 @@ bool ConvBiasImpl::AlgoConv1x1Gemv::is_preferred( return false; } -// vim: syntax=cpp.doxygen \ No newline at end of file +// vim: syntax=cpp.doxygen diff --git a/dnn/src/fallback/conv_bias/conv1x1/algos_conv1x1_gemv.h b/dnn/src/fallback/conv_bias/conv1x1/algos_conv1x1_gemv.h index 38b00a20d0796f229989713746994e95f2a9f65c..3f266a60d7b862e0158b8160f0eefb86118b06ea 100644 --- a/dnn/src/fallback/conv_bias/conv1x1/algos_conv1x1_gemv.h +++ b/dnn/src/fallback/conv_bias/conv1x1/algos_conv1x1_gemv.h @@ -24,18 +24,15 @@ public: bool is_reproducible() const override { return true; } - const char* name() const override { - return "CONV1x1_GEMV"; - } + const char* name() const override { return "CONV1x1_GEMV"; } - bool usable(ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; SmallVector dispatch_kerns( - ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; + const NCBKernSizeParam& param) const override; - bool is_preferred(ConvBiasImpl*, const NCBKernSizeParam&) const override; + bool is_preferred(const NCBKernSizeParam&) const override; protected: size_t get_oc_tile_size_heuristic(const NCBKernSizeParam& param) const; diff --git a/dnn/src/fallback/conv_bias/im2col/algos.cpp b/dnn/src/fallback/conv_bias/im2col/algos.cpp index 6d94e8e504790442876ececc812f17bbb7bbfb4b..d79bd62fc1483749779854a991482daa0b923e67 100644 --- a/dnn/src/fallback/conv_bias/im2col/algos.cpp +++ b/dnn/src/fallback/conv_bias/im2col/algos.cpp @@ -478,7 +478,7 @@ WorkspaceBundle ConvBiasImpl::AlgoIm2col::get_bundle( } size_t ConvBiasImpl::AlgoIm2col::get_workspace( - ConvBiasImpl*, const NCBKernSizeParam& p) const { + const NCBKernSizeParam& p) const { MIDOUT_BEGIN(megdnn_fallback_im2col, 0, 0) { return get_bundle(p).total_size_in_bytes(); } @@ -487,7 +487,7 @@ size_t ConvBiasImpl::AlgoIm2col::get_workspace( } SmallVector ConvBiasImpl::AlgoIm2col::dispatch_kerns( - ConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { MIDOUT_BEGIN(megdnn_fallback_im2col, 0, 1) { UNPACK_CONV_F32_NCB_KERN_SIZES(param); MEGDNN_MARK_USED_VAR(SH); @@ -660,12 +660,13 @@ SmallVector ConvBiasImpl::AlgoIm2col::dispatch_kerns( } bool ConvBiasImpl::AlgoIm2col::usable( - ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MIDOUT_BEGIN(megdnn_fallback_im2col, 0, 2) { - if (opr->param().format != param::ConvBias::Format::NCHW && - opr->param().format != param::ConvBias::Format::NCHW44_DOT && - opr->param().format != param::ConvBias::Format::NCHW44) { + auto format = param.filter_meta.format; + if (format != param::ConvBias::Format::NCHW && + format != param::ConvBias::Format::NCHW44_DOT && + format != param::ConvBias::Format::NCHW44) { return false; } @@ -695,8 +696,8 @@ bool ConvBiasImpl::AlgoIm2col::usable( } fallback::MatrixMulImpl::AlgoBase::MatmulDescription mdesc = m_matmul_algo->matmul_description(); - if (opr->param().format == param::ConvBias::Format::NCHW44 || - opr->param().format == param::ConvBias::Format::NCHW44_DOT) { + if (format == param::ConvBias::Format::NCHW44 || + format == param::ConvBias::Format::NCHW44_DOT) { //! current NCHW44 im2col only support DEFAULT mode matmul if (mdesc.packmode != Pack_Mode::DEFAULT) { return false; diff --git a/dnn/src/fallback/conv_bias/im2col/algos.h b/dnn/src/fallback/conv_bias/im2col/algos.h index 243732d6626b173a306e381d01f677c895c866e7..40f6f557fe0a139f43574325726090c5ea081d89 100644 --- a/dnn/src/fallback/conv_bias/im2col/algos.h +++ b/dnn/src/fallback/conv_bias/im2col/algos.h @@ -15,6 +15,8 @@ #include "src/common/utils.h" #include "src/fallback/conv_bias/opr_impl.h" #include "src/fallback/matrix_mul/opr_impl.h" +#include "src/common/opr_delegate.h" + namespace megdnn { namespace fallback { @@ -54,16 +56,18 @@ public: } return m_name.c_str(); } - bool usable(ConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(ConvBiasImpl*, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; SmallVector dispatch_kerns( - ConvBiasImpl* opr, const NCBKernSizeParam& param) const override; - bool is_preferred(fallback::ConvBiasImpl* opr, + const NCBKernSizeParam& param) const override; + bool is_preferred( const NCBKernSizeParam& param) const override { if (param.src_type.category() == DTypeCategory::QUANTIZED) { - return opr->is_matmul_quantized_prefer(param); + static CpuOprDelegationStorage<1> storage; + auto conv_bias_opr = storage.get(); + return static_cast(conv_bias_opr) + ->is_matmul_quantized_prefer(param); } auto&& fm = param.filter_meta; auto OC = fm.ocpg, IC = fm.icpg; diff --git a/dnn/src/fallback/conv_bias/opr_impl.cpp b/dnn/src/fallback/conv_bias/opr_impl.cpp index 01026d15035bfc63ff7d132acfdf57ba339847d1..d519fc4029204fd6114fdcd43f7d9eb4180a90c0 100644 --- a/dnn/src/fallback/conv_bias/opr_impl.cpp +++ b/dnn/src/fallback/conv_bias/opr_impl.cpp @@ -54,7 +54,6 @@ class ConvBiasImpl::AlgoPack : NonCopyableObj { public: AlgoPack() { - refhold.emplace_back(new AlgoConv1x1Gemv()); all_algos.emplace_back(refhold.back().get()); @@ -121,7 +120,7 @@ bool ConvBiasImpl::is_naive_algo(ConvBiasImpl::Algorithm* algo) { } #define NCB_ALGO_FUNC(name, algo, param) \ - static_cast(algo)->name(this, param) + static_cast(algo)->name(param) void ConvBiasImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_tensor_in bias, _megdnn_tensor_in z, @@ -243,11 +242,10 @@ ConvBiasImpl::Algorithm* ConvBiasImpl::get_algorithm_heuristic_with_ncb( const NCBKernSizeParam& param, size_t workspace_limit_in_bytes, bool reproducible) { for (auto i : get_all_algorithms_with_ncb(param)) { - size_t need_workspace = NCB_ALGO_FUNC(get_workspace, i, param); if (static_cast(i)->usable_reproducible( - this, param, AlgoSelectionStrategy::HEURISTIC, - reproducible) && - need_workspace <= workspace_limit_in_bytes) { + param, AlgoSelectionStrategy::HEURISTIC, reproducible) && + NCB_ALGO_FUNC(get_workspace, i, param) <= + workspace_limit_in_bytes) { return i; } } @@ -392,8 +390,8 @@ std::vector ConvBiasImpl::get_all_algorithms_with_ncb( std::vector algos; std::vector prefer_algos; for (auto&& algo : algo_pack()) { - if (algo->usable(this, param, AlgoSelectionStrategy::FULL_RUN)) { - if (algo->is_preferred(this, param)) { + if (algo->usable(param, AlgoSelectionStrategy::FULL_RUN)) { + if (algo->is_preferred(param)) { prefer_algos.push_back(algo); } else { algos.push_back(algo); diff --git a/dnn/src/fallback/conv_bias/opr_impl.h b/dnn/src/fallback/conv_bias/opr_impl.h index e35541dca47d9b6f464479d59246d1fedc539d00..84fc7198b4e4aaa4e9d018dffb9265c7a91e2fbe 100644 --- a/dnn/src/fallback/conv_bias/opr_impl.h +++ b/dnn/src/fallback/conv_bias/opr_impl.h @@ -193,7 +193,7 @@ public: //! move arm_common to fallback virtual bool is_matmul_quantized_prefer( - const ConvBiasImpl::NCBKernSizeParam& ncb_param) { + const ConvBiasImpl::NCBKernSizeParam& ncb_param) const { MEGDNN_MARK_USED_VAR(ncb_param); return true; }; @@ -209,43 +209,39 @@ public: public: virtual ~AlgoBase() = default; virtual bool usable( - ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const = 0; - virtual size_t get_workspace(ConvBiasImpl* opr, - const NCBKernSizeParam& param) const = 0; + virtual size_t get_workspace(const NCBKernSizeParam& param) const = 0; virtual SmallVector dispatch_kerns( - ConvBiasImpl* opr, const NCBKernSizeParam& param) const = 0; + const NCBKernSizeParam& param) const = 0; virtual SmallVector dispatch_preprocess_kerns( - ConvBiasImpl*, const NCBKernSizeParam&) const { + const NCBKernSizeParam&) const { return {}; }; //! get the layouts of weight_prerocess dst virtual SmallVector deduce_preprocessed_filter_layout( - ConvBiasImpl*, const NCBKernSizeParam&) const { + const NCBKernSizeParam&) const { return {}; }; //! get the workspace when weight_prerocess - virtual size_t get_preprocess_workspace(ConvBiasImpl*, - const NCBKernSizeParam&) const { + virtual size_t get_preprocess_workspace(const NCBKernSizeParam&) const { return 0_z; }; //! Temporarily used to identify whether the matmul algorithm is //! is_preferred. - virtual bool is_preferred(ConvBiasImpl*, - const NCBKernSizeParam&) const { + virtual bool is_preferred(const NCBKernSizeParam&) const { return false; } - bool usable_reproducible(ConvBiasImpl* opr, - const NCBKernSizeParam& param, + bool usable_reproducible(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy, bool reproducible = true) const { return (!reproducible || is_reproducible()) && - usable(opr, param, algo_selection_strategy); + usable(param, algo_selection_strategy); } }; diff --git a/dnn/src/fallback/conv_bias/winograd/winograd.h b/dnn/src/fallback/conv_bias/winograd/winograd.h index a8e77ca309074a9c7caf0560e6f753c7a52a751e..868605e9807e12de19970b4b2530942a2e0247f5 100644 --- a/dnn/src/fallback/conv_bias/winograd/winograd.h +++ b/dnn/src/fallback/conv_bias/winograd/winograd.h @@ -501,9 +501,10 @@ public: Strategy strategy = m_strategy; SmallVector kerns; auto filter_process_kern = - [strategy, bundle, &preprocessed_dst]( + [strategy, bundle, &preprocessed_dst, this]( const NCBKernParam& ncb_param, const NCBKernIndex& ncb_index) mutable { + MEGDNN_MARK_USED_VAR(this); MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, midout_iv("filter_preprocess"_hash)) { bundle.set(ncb_param.workspace_ptr); @@ -569,9 +570,10 @@ public: param.filter_meta.format == param::ConvBias::Format::NCHW88 || param.filter_meta.format == param::ConvBias::Format::NCHW44)) { auto filter_process_kern = - [strategy = m_strategy, bundle_top, bundle_compute]( + [strategy = m_strategy, bundle_top, bundle_compute, this]( const NCBKernParam& ncb_param, const NCBKernIndex& ncb_index) mutable { + MEGDNN_MARK_USED_VAR(this); MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, midout_iv("filter_process"_hash)) { bundle_top.set(ncb_param.workspace_ptr); @@ -594,9 +596,10 @@ public: } auto winograd_compute_kern = [strategy = m_strategy, bundle_top, bundle_compute, matmul_algo, - matmul_param, unit_tile_size, - unit_oc_size](const NCBKernParam& ncb_param, - const NCBKernIndex& ncb_index) mutable { + matmul_param, unit_tile_size, unit_oc_size, + this](const NCBKernParam& ncb_param, + const NCBKernIndex& ncb_index) mutable { + MEGDNN_MARK_USED_VAR(this); MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, midout_iv("winograd_compute"_hash)) { bundle_top.set(ncb_param.workspace_ptr); @@ -728,43 +731,43 @@ public: } \ MIDOUT_END(); -#define MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(_class, _strategy, _midout_flag, \ - _matmul_format) \ - size_t ConvBiasImpl::_class::get_workspace( \ - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \ - MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_workspace_size, \ - _strategy, _midout_flag, \ - _matmul_format); \ - return 0; \ - } \ - size_t ConvBiasImpl::_class::get_preprocess_workspace( \ - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \ - MEGDNN_WINOGRADS_ALGO_FUN_DEFINE( \ - _class, get_preprocess_workspace_size, _strategy, \ - _midout_flag, _matmul_format); \ - return 0; \ - } \ - SmallVector \ - ConvBiasImpl::_class::deduce_preprocessed_filter_layout( \ - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \ - MEGDNN_WINOGRADS_ALGO_FUN_DEFINE( \ - _class, deduce_preprocessed_filter_layout, _strategy, \ - _midout_flag, _matmul_format); \ - return {}; \ - } \ - SmallVector \ - ConvBiasImpl::_class::dispatch_preprocess_kerns( \ - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \ - MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_preprocess_kerns, \ - _strategy, _midout_flag, \ - _matmul_format); \ - return {}; \ - } \ - SmallVector ConvBiasImpl::_class::dispatch_kerns( \ - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \ - MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_kerns, _strategy, \ - _midout_flag, _matmul_format); \ - return {}; \ +#define MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(_class, _strategy, _midout_flag, \ + _matmul_format) \ + size_t ConvBiasImpl::_class::get_workspace(const NCBKernSizeParam& param) \ + const { \ + MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_workspace_size, \ + _strategy, _midout_flag, \ + _matmul_format); \ + return 0; \ + } \ + size_t ConvBiasImpl::_class::get_preprocess_workspace( \ + const NCBKernSizeParam& param) const { \ + MEGDNN_WINOGRADS_ALGO_FUN_DEFINE( \ + _class, get_preprocess_workspace_size, _strategy, \ + _midout_flag, _matmul_format); \ + return 0; \ + } \ + SmallVector \ + ConvBiasImpl::_class::deduce_preprocessed_filter_layout( \ + const NCBKernSizeParam& param) const { \ + MEGDNN_WINOGRADS_ALGO_FUN_DEFINE( \ + _class, deduce_preprocessed_filter_layout, _strategy, \ + _midout_flag, _matmul_format); \ + return {}; \ + } \ + SmallVector \ + ConvBiasImpl::_class::dispatch_preprocess_kerns( \ + const NCBKernSizeParam& param) const { \ + MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_preprocess_kerns, \ + _strategy, _midout_flag, \ + _matmul_format); \ + return {}; \ + } \ + SmallVector ConvBiasImpl::_class::dispatch_kerns( \ + const NCBKernSizeParam& param) const { \ + MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_kerns, _strategy, \ + _midout_flag, _matmul_format); \ + return {}; \ } // vim: syntax=cpp.doxygen diff --git a/dnn/src/fallback/convolution/algos.cpp b/dnn/src/fallback/convolution/algos.cpp index 392d54b0eb7a050e32471293ec4403330886c022..993df3db6ce55909d473c22784edabd8c129e41e 100644 --- a/dnn/src/fallback/convolution/algos.cpp +++ b/dnn/src/fallback/convolution/algos.cpp @@ -164,7 +164,7 @@ void kern_direct(const NCBKernParam& param) { /* ===================== fallback algo ===================== */ bool ConvolutionImpl::AlgoFallback::usable( - ConvolutionImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { auto&& fm = param.filter_meta; return fm.format == param::Convolution::Format::NCHW && @@ -175,7 +175,7 @@ bool ConvolutionImpl::AlgoFallback::usable( } size_t ConvolutionImpl::AlgoFallback::get_workspace( - ConvolutionImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { auto FH = param.filter_meta.spatial[0], FW = param.filter_meta.spatial[1]; size_t nr_threads = param.nr_threads; if (param.filter_meta.should_flip) { @@ -190,11 +190,11 @@ size_t ConvolutionImpl::AlgoFallback::get_workspace( SmallVector ConvolutionImpl::AlgoFallback::dispatch_kern( - ConvolutionImpl* opr, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { size_t group = param.filter_meta.group; size_t N = param.n; size_t nr_threads = param.nr_threads; - size_t workspace_per_thread = get_workspace(opr, param) / nr_threads; + size_t workspace_per_thread = get_workspace( param) / nr_threads; auto kern_fallback = [workspace_per_thread](const NCBKernParam& p, const NCBKernIndex& ncb_index) { UNPACK_CONV_F32_NCB_KERN_SIZES(p); @@ -218,7 +218,7 @@ ConvolutionImpl::AlgoFallback::dispatch_kern( /* ===================== naive algo ===================== */ bool ConvolutionImpl::AlgoNaive::usable( - ConvolutionImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { bool ret = false; @@ -241,7 +241,7 @@ bool ConvolutionImpl::AlgoNaive::usable( } SmallVector ConvolutionImpl::AlgoNaive::dispatch_kern( - ConvolutionImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { size_t N = param.n; size_t group = param.filter_meta.group; #define cb(dt, cmode, compute_type) \ @@ -289,75 +289,42 @@ SmallVector ConvolutionImpl::AlgoNaive::dispatch_kern( /* ===================== default algo ===================== */ -ConvolutionImpl::AlgoDefault::AlgoDefault(fallback::ConvBiasImpl* conv_bias_opr, - ConvBiasImpl::AlgoBase* algorithm) - : m_conv_bias_opr(conv_bias_opr), m_algorithm(algorithm) { +ConvolutionImpl::AlgoDefault::AlgoDefault(ConvBiasImpl::AlgoBase* algorithm) + : m_algorithm(algorithm) { megdnn_assert_internal(algorithm); m_name = ssprintf("CONVOLUTION_DEFAULT_%s", m_algorithm->name()); } ConvBiasImpl::NCBKernSizeParam -ConvolutionImpl::AlgoDefault::AlgoDefault::init_convbias_opr_and_param( - ConvBiasImpl* conv_bias_opr, const NCBKernSizeParam& param) { +ConvolutionImpl::AlgoDefault::init_conv_bias_param( + const NCBKernSizeParam& param) { DType bias_type = param.dst_type; if (bias_type.category() == DTypeCategory::QUANTIZED) { bias_type = dtype::QuantizedS32( mul_scale(param.src_type, param.filter_type)); } - - ::ConvBiasImpl::NCBKernSizeParam conv_bias_size_param( - param, 0, param::MatrixMul::Format::DEFAULT, bias_type, 0, - BiasMode::NO_BIAS, param::ConvBias::NonlineMode::IDENTITY); - // nonline mode - conv_bias_opr->param().nonlineMode = conv_bias_size_param.nonlineMode; - // convolution mode - if (conv_bias_size_param.filter_meta.should_flip) { - conv_bias_opr->param().mode = param::ConvolutionV0::Mode::CONVOLUTION; - } else { - conv_bias_opr->param().mode = - param::ConvolutionV0::Mode::CROSS_CORRELATION; - } - // sparse - if (conv_bias_size_param.filter_meta.group > 1) { - conv_bias_opr->param().sparse = param::ConvolutionV0::Sparse::GROUP; - } else { - conv_bias_opr->param().sparse = param::ConvolutionV0::Sparse::DENSE; - } - // format - conv_bias_opr->param().format = conv_bias_size_param.filter_meta.format; - // pad stride dilate - conv_bias_opr->param().pad_h = conv_bias_size_param.filter_meta.padding[0]; - conv_bias_opr->param().pad_w = conv_bias_size_param.filter_meta.padding[1]; - conv_bias_opr->param().stride_h = - conv_bias_size_param.filter_meta.stride[0]; - conv_bias_opr->param().stride_w = - conv_bias_size_param.filter_meta.stride[1]; - conv_bias_opr->param().dilate_h = - conv_bias_size_param.filter_meta.dilation[0]; - conv_bias_opr->param().dilate_w = - conv_bias_size_param.filter_meta.dilation[1]; - // output_block_size - conv_bias_opr->param().output_block_size = - conv_bias_size_param.output_block_size; - // compute_mode - conv_bias_opr->param().compute_mode = conv_bias_size_param.compute_mode; - - return conv_bias_size_param; + return {param, + 0, + param::MatrixMul::Format::DEFAULT, + bias_type, + 0, + BiasMode::NO_BIAS, + param::ConvBias::NonlineMode::IDENTITY}; } bool ConvolutionImpl::AlgoDefault::is_preferred( - ConvolutionImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { ::ConvBiasImpl::NCBKernSizeParam conv_bias_param = - init_convbias_opr_and_param(m_conv_bias_opr, param); - return m_algorithm->is_preferred(m_conv_bias_opr, conv_bias_param); + init_conv_bias_param(param); + return m_algorithm->is_preferred(conv_bias_param); } bool ConvolutionImpl::AlgoDefault::usable( - ConvolutionImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { ::ConvBiasImpl::NCBKernSizeParam conv_bias_param = - init_convbias_opr_and_param(m_conv_bias_opr, param); - return m_algorithm->usable(m_conv_bias_opr, conv_bias_param, + init_conv_bias_param(param); + return m_algorithm->usable(conv_bias_param, static_cast( algo_selection_strategy)); } @@ -365,69 +332,62 @@ bool ConvolutionImpl::AlgoDefault::usable( WorkspaceBundle ConvolutionImpl::AlgoDefault::get_bundle( const NCBKernSizeParam& param) const { ::ConvBiasImpl::NCBKernSizeParam conv_bias_param = - init_convbias_opr_and_param(m_conv_bias_opr, param); - m_conv_bias_opr->execution_policy() = {m_algorithm}; + init_conv_bias_param(param); return WorkspaceBundle(nullptr, {m_algorithm->get_workspace( - m_conv_bias_opr, conv_bias_param)}); + conv_bias_param)}); } size_t ConvolutionImpl::AlgoDefault::get_workspace( - ConvolutionImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { return get_bundle(param).total_size_in_bytes(); } size_t ConvolutionImpl::AlgoDefault::get_preprocess_workspace( - ConvolutionImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { ::ConvBiasImpl::NCBKernSizeParam conv_bias_param = - init_convbias_opr_and_param(m_conv_bias_opr, param); - m_conv_bias_opr->execution_policy() = {m_algorithm}; - return m_algorithm->get_preprocess_workspace(m_conv_bias_opr, - conv_bias_param); + init_conv_bias_param(param); + return m_algorithm->get_preprocess_workspace(conv_bias_param); } SmallVector ConvolutionImpl::AlgoDefault::deduce_preprocessed_filter_layout( - ConvolutionImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { ::ConvBiasImpl::NCBKernSizeParam conv_bias_param = - init_convbias_opr_and_param(m_conv_bias_opr, param); - m_conv_bias_opr->execution_policy() = {m_algorithm}; - return m_algorithm->deduce_preprocessed_filter_layout(m_conv_bias_opr, - conv_bias_param); + init_conv_bias_param( param); + return m_algorithm->deduce_preprocessed_filter_layout(conv_bias_param); } //! Return the implement preprocess kernel SmallVector ConvolutionImpl::AlgoDefault::get_preprocess_kimpl( - ::ConvBiasImpl* conv_bias_opr, ConvBiasImpl::AlgoBase* algo, + ConvBiasImpl::AlgoBase* algo, const NCBKernSizeParam& param) { MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv("get_preprocess_kimpl"_hash)) { // construct the conv_bias kern param ::ConvBiasImpl::NCBKernParam conv_bias_param; - ::ConvBiasImpl::NCBKernSizeParam conv_bias_size_param = - init_convbias_opr_and_param(conv_bias_opr, param); static_cast<::ConvBiasImpl::NCBKernSizeParam&>(conv_bias_param) = - conv_bias_size_param; + init_conv_bias_param(param); auto conv_bias_preprocess_kerns = - algo->dispatch_preprocess_kerns(conv_bias_opr, conv_bias_param); + algo->dispatch_preprocess_kerns(conv_bias_param); SmallVector convolution_preprocess_kerns; //! Set the conv_bias param using convolution param - auto set_copy_param_filter_workspace_ptr = + auto set_param_filter_workspace_ptr = [](const NCBKernParam& conv_param, - ::ConvBiasImpl::NCBKernParam& copied_param) { - copied_param.filter_ptr = conv_param.filter_ptr; - copied_param.workspace_ptr = conv_param.workspace_ptr; - copied_param.workspace_size = conv_param.workspace_size; + ::ConvBiasImpl::NCBKernParam& conv_bias_param) { + conv_bias_param.filter_ptr = conv_param.filter_ptr; + conv_bias_param.workspace_ptr = conv_param.workspace_ptr; + conv_bias_param.workspace_size = conv_param.workspace_size; }; for (size_t i = 0; i < conv_bias_preprocess_kerns.size(); i++) { auto kernel = conv_bias_preprocess_kerns[i]; //! If the kerenl batch parallel - auto run = [=](const NCBKernParam& p, - const NCBKernIndex& ncb_index) { - auto copy_param = conv_bias_param; - set_copy_param_filter_workspace_ptr(p, copy_param); - kernel.kern(copy_param, - {ncb_index.thread_id, ncb_index.ndrange_id}); + auto run = [param = conv_bias_param, kernel, + &set_param_filter_workspace_ptr]( + const NCBKernParam& p, + const NCBKernIndex& ncb_index) mutable { + set_param_filter_workspace_ptr(p, param); + kernel.kern(param, {ncb_index.thread_id, ncb_index.ndrange_id}); }; convolution_preprocess_kerns.push_back({run, kernel.global_size}); } @@ -438,38 +398,35 @@ ConvolutionImpl::AlgoDefault::get_preprocess_kimpl( //! Return the implement kernel SmallVector ConvolutionImpl::AlgoDefault::get_kimpl( - ::ConvBiasImpl* conv_bias_opr, ConvBiasImpl::AlgoBase* algo, + ConvBiasImpl::AlgoBase* algo, const NCBKernSizeParam& param) { MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(0)) { // construct the conv_bias kern param ::ConvBiasImpl::NCBKernParam conv_bias_param; - ::ConvBiasImpl::NCBKernSizeParam conv_bias_size_param = - init_convbias_opr_and_param(conv_bias_opr, param); static_cast<::ConvBiasImpl::NCBKernSizeParam&>(conv_bias_param) = - conv_bias_size_param; - auto conv_bias_kerns = - algo->dispatch_kerns(conv_bias_opr, conv_bias_param); + init_conv_bias_param(param); + auto&& conv_bias_kerns = algo->dispatch_kerns(conv_bias_param); SmallVector convolution_kerns; //! Set the conv_bias param using convolution param auto set_copy_param_compute_address = [](const NCBKernParam& conv_param, - ::ConvBiasImpl::NCBKernParam& copied_param) { - copied_param.src_ptr = conv_param.src_ptr; - copied_param.filter_ptr = conv_param.filter_ptr; - copied_param.dst_ptr = conv_param.dst_ptr; - copied_param.workspace_ptr = conv_param.workspace_ptr; - copied_param.workspace_size = conv_param.workspace_size; + ::ConvBiasImpl::NCBKernParam& conv_bias_param) { + conv_bias_param.src_ptr = conv_param.src_ptr; + conv_bias_param.filter_ptr = conv_param.filter_ptr; + conv_bias_param.dst_ptr = conv_param.dst_ptr; + conv_bias_param.workspace_ptr = conv_param.workspace_ptr; + conv_bias_param.workspace_size = conv_param.workspace_size; }; for (size_t i = 0; i < conv_bias_kerns.size(); i++) { - auto kernel = conv_bias_kerns[i]; + auto&& kernel = conv_bias_kerns[i]; //! If the kerenl batch parallel - auto run = [=](const NCBKernParam& p, - const NCBKernIndex& ncb_index) { - auto copy_param = conv_bias_param; - set_copy_param_compute_address(p, copy_param); - kernel.kern(copy_param, - {ncb_index.thread_id, ncb_index.ndrange_id}); + auto run = [param = conv_bias_param, kernel, + &set_copy_param_compute_address]( + const NCBKernParam& p, + const NCBKernIndex& ncb_index) mutable { + set_copy_param_compute_address(p, param); + kernel.kern(param, {ncb_index.thread_id, ncb_index.ndrange_id}); }; convolution_kerns.push_back({run, kernel.global_size}); } diff --git a/dnn/src/fallback/convolution/algos.h b/dnn/src/fallback/convolution/algos.h index d5a44ae8ae10ddf62d8c4c94b50a1754db5a6bcf..591feb0ff4297ee3d4b24fb56f4174bb02ace4ca 100644 --- a/dnn/src/fallback/convolution/algos.h +++ b/dnn/src/fallback/convolution/algos.h @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #pragma once @@ -35,10 +36,10 @@ void kern_naive_forward(const ConvolutionImpl::NCBKernParam& p, src.layout.dtype = p.src_type; dst.layout.dtype = p.dst_type; if (p.filter_meta.format == param::Convolution::Format::NCHW) { - istrd *= p.isz[0] * p.isz[1]; - ostrd *= p.osz[0] * p.osz[1]; - src.layout.init_contiguous_stride({1, IC, IH, IW}); - dst.layout.init_contiguous_stride({1, OC, OH, OW}); + istrd *= p.isz[0] * p.isz[1]; + ostrd *= p.osz[0] * p.osz[1]; + src.layout.init_contiguous_stride({1, IC, IH, IW}); + dst.layout.init_contiguous_stride({1, OC, OH, OW}); } else { // Must be NHWC megdnn_assert( @@ -75,14 +76,12 @@ class ConvolutionImpl::AlgoFallback final : public AlgoBase { public: bool is_reproducible() const override { return true; } const char* name() const override { return "FALLBACK_ALGO"; } - bool usable(ConvolutionImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(ConvolutionImpl* opr, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; SmallVector dispatch_kern( - ConvolutionImpl* /*opr*/, const NCBKernSizeParam& /*param*/) const override; }; @@ -90,66 +89,55 @@ class ConvolutionImpl::AlgoNaive final : public AlgoBase { public: bool is_reproducible() const override { return true; } const char* name() const override { return "NAIVE_ALGO"; } - bool usable(ConvolutionImpl* /*opr*/, const NCBKernSizeParam& /*param*/, + bool usable(const NCBKernSizeParam& /*param*/, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(ConvolutionImpl*, - const NCBKernSizeParam&) const override { - return 0; - }; + size_t get_workspace(const NCBKernSizeParam&) const override { return 0; }; SmallVector dispatch_kern( - ConvolutionImpl* /*opr*/, const NCBKernSizeParam& /*param*/) const override; }; class ConvolutionImpl::AlgoDefault final : public AlgoBase { - static ConvBiasImpl::NCBKernSizeParam init_convbias_opr_and_param( - ConvBiasImpl* conv_bias_opr, const NCBKernSizeParam& param); + static ConvBiasImpl::NCBKernSizeParam init_conv_bias_param( + const NCBKernSizeParam& param); WorkspaceBundle get_bundle(const NCBKernSizeParam& param) const; - static SmallVector get_kimpl(ConvBiasImpl* conv_bias_opr, - ConvBiasImpl::AlgoBase* algo, + static SmallVector get_kimpl(ConvBiasImpl::AlgoBase* algo, const NCBKernSizeParam& param); static SmallVector get_preprocess_kimpl( - ConvBiasImpl* conv_bias_opr, ConvBiasImpl::AlgoBase* algo, - const NCBKernSizeParam& param); + ConvBiasImpl::AlgoBase* algo, const NCBKernSizeParam& param); public: - AlgoDefault(fallback::ConvBiasImpl* conv_bias_opr, ConvBiasImpl::AlgoBase*); + AlgoDefault(ConvBiasImpl::AlgoBase*); bool is_reproducible() const override { return true; } const char* name() const override { return m_name.c_str(); } - bool usable(ConvolutionImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(ConvolutionImpl* opr, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; - size_t get_preprocess_workspace(ConvolutionImpl*, - const NCBKernSizeParam&) const override; + size_t get_preprocess_workspace(const NCBKernSizeParam&) const override; SmallVector deduce_preprocessed_filter_layout( - ConvolutionImpl*, const NCBKernSizeParam&) const override; + const NCBKernSizeParam&) const override; SmallVector dispatch_preprocess_kern( - ConvolutionImpl*, const NCBKernSizeParam& param) const override { - return get_preprocess_kimpl(m_conv_bias_opr, m_algorithm, param); + const NCBKernSizeParam& param) const override { + return get_preprocess_kimpl(m_algorithm, param); } SmallVector dispatch_kern( - ConvolutionImpl* /*opr*/, const NCBKernSizeParam& param) const override { - return get_kimpl(m_conv_bias_opr, m_algorithm, param); + return get_kimpl(m_algorithm, param); } void* type() const override { return sm_fallback_conv_algo_type; } //! select matmul to the highest preference - bool is_preferred(ConvolutionImpl* opr, - const NCBKernSizeParam& param) const override; + bool is_preferred(const NCBKernSizeParam& param) const override; private: std::string m_name; - fallback::ConvBiasImpl* m_conv_bias_opr; ConvBiasImpl::AlgoBase* m_algorithm; }; diff --git a/dnn/src/fallback/convolution/opr_impl.cpp b/dnn/src/fallback/convolution/opr_impl.cpp index 059f1c8e882029b8e52b4943e78682c9a8ad1fda..41f0445021eaa45a5ba896214635b4257fd0df0b 100644 --- a/dnn/src/fallback/convolution/opr_impl.cpp +++ b/dnn/src/fallback/convolution/opr_impl.cpp @@ -59,8 +59,7 @@ public: static_cast(conv_bias_opr)->algo_pack(); for (auto&& algorithm : conv_bias_algo) { // fallback algo - refhold.emplace_back(new AlgoDefault( - static_cast(conv_bias_opr), algorithm)); + refhold.emplace_back(new AlgoDefault(algorithm)); all_algos.emplace_back(refhold.back().get()); } @@ -82,7 +81,7 @@ bool ConvolutionImpl::is_naive_algo(ConvolutionImpl::Algorithm* algo) { } #define NCB_ALGO_FUNC(name, algo, param) \ - static_cast(algo)->name(this, fparam) + static_cast(algo)->name(param) void ConvolutionImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_tensor_out dst, @@ -131,7 +130,7 @@ size_t ConvolutionImpl::get_workspace_in_bytes( return naive::ConvolutionForwardImpl::get_workspace_in_bytes( src, filter, dst, preprocessed_filter); } else { - return static_cast(algo)->get_workspace(this, fparam); + return NCB_ALGO_FUNC(get_workspace, algo, fparam); } } @@ -144,8 +143,7 @@ size_t ConvolutionImpl::get_preprocess_workspace_in_bytes( return naive::ConvolutionForwardImpl::get_preprocess_workspace_in_bytes( src, filter, dst); } else { - return static_cast(algo)->get_preprocess_workspace(this, - fparam); + return NCB_ALGO_FUNC(get_preprocess_workspace, algo, fparam); } } @@ -158,8 +156,7 @@ SmallVector ConvolutionImpl::deduce_preprocessed_filter_layout( return naive::ConvolutionForwardImpl::deduce_preprocessed_filter_layout( src, filter, dst); } else { - return static_cast(algo)->deduce_preprocessed_filter_layout( - this, fparam); + return NCB_ALGO_FUNC(deduce_preprocessed_filter_layout, algo, fparam); } } @@ -251,8 +248,7 @@ ConvolutionImpl::NCBKernParam ConvolutionImpl::make_ncb_kern_param( void ConvolutionImpl::exec_preprocess_with_ncb_kern(const NCBKernParam& param, Algorithm* algo) { - auto kerns = - static_cast(algo)->dispatch_preprocess_kern(this, param); + auto kerns = NCB_ALGO_FUNC(dispatch_preprocess_kern, algo, param); auto fallback_handle = handle(); for (auto kernel : kerns) { megdnn_assert( @@ -272,14 +268,15 @@ void ConvolutionImpl::exec_preprocess_with_ncb_kern(const NCBKernParam& param, void ConvolutionImpl::exec_with_ncb_kern(const NCBKernParam& param, Algorithm* algo) { - auto kerns = static_cast(algo)->dispatch_kern(this, param); + auto kerns = NCB_ALGO_FUNC(dispatch_kern, algo, param); auto fallback_handle = handle(); for (auto kernel : kerns) { - megdnn_assert(param.filter_meta.format == Param::Format::NCHW || - param.filter_meta.format == Param::Format::NHWC || - param.filter_meta.format == Param::Format::NCHW88 || - param.filter_meta.format == Param::Format::NCHW44, - "invalid conv format"); + megdnn_assert( + param.filter_meta.format == Param::Format::NCHW || + param.filter_meta.format == Param::Format::NHWC || + param.filter_meta.format == Param::Format::NCHW88 || + param.filter_meta.format == Param::Format::NCHW44, + "invalid conv format"); auto run = [param, kernel](size_t index, size_t thread_id) { CpuNDRange ndrange_id(kernel.global_size, index); kernel.kern(param, {thread_id, ndrange_id}); @@ -293,13 +290,11 @@ ConvolutionImpl::Algorithm* ConvolutionImpl::get_algorithm_heuristic_with_ncb( const NCBKernSizeParam& param, size_t workspace_limit_in_bytes, bool reproducible) { for (auto i : get_all_algorithms_with_ncb(param)) { - size_t need_workspace = - static_cast(i)->get_workspace(this, param); bool usable_reproducible = static_cast(i)->usable_reproducible( - this, param, AlgoSelectionStrategy::HEURISTIC, - reproducible); - if (usable_reproducible && need_workspace <= workspace_limit_in_bytes) { + param, AlgoSelectionStrategy::HEURISTIC, reproducible); + if (usable_reproducible && NCB_ALGO_FUNC(get_workspace, i, param) <= + workspace_limit_in_bytes) { return i; } } @@ -311,8 +306,8 @@ ConvolutionImpl::get_all_algorithms_with_ncb(const NCBKernSizeParam& param) { std::vector ret; std::vector prefer_algos; for (auto&& i : algo_pack()) { - if (i->usable(this, param, AlgoSelectionStrategy::FULL_RUN)) { - if (i->is_preferred(this, param)) { + if (i->usable(param, AlgoSelectionStrategy::FULL_RUN)) { + if (i->is_preferred(param)) { prefer_algos.push_back(i); } else { ret.push_back(i); diff --git a/dnn/src/fallback/convolution/opr_impl.h b/dnn/src/fallback/convolution/opr_impl.h index 77c6d7402bd902f7048b5a89ae862a91e743db1c..f2dbf198555b86e2e87fd2d92518bf2ea03e098f 100644 --- a/dnn/src/fallback/convolution/opr_impl.h +++ b/dnn/src/fallback/convolution/opr_impl.h @@ -178,42 +178,38 @@ public: class AlgoBase : public Algorithm { public: virtual ~AlgoBase() = default; - virtual bool usable(ConvolutionImpl* opr, const NCBKernSizeParam& param, + virtual bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy) const = 0; - virtual size_t get_workspace(ConvolutionImpl* opr, - const NCBKernSizeParam& param) const = 0; + virtual size_t get_workspace(const NCBKernSizeParam& param) const = 0; virtual SmallVector dispatch_kern( - ConvolutionImpl* opr, const NCBKernSizeParam& param) const = 0; + const NCBKernSizeParam& param) const = 0; virtual SmallVector dispatch_preprocess_kern( - ConvolutionImpl*, const NCBKernSizeParam&) const { + const NCBKernSizeParam&) const { return {}; }; //! get the layouts of weight_prerocess dst virtual SmallVector deduce_preprocessed_filter_layout( - ConvolutionImpl*, const NCBKernSizeParam&) const { + const NCBKernSizeParam&) const { return {}; }; //! get the workspace when weight_prerocess - virtual size_t get_preprocess_workspace(ConvolutionImpl*, - const NCBKernSizeParam&) const { + virtual size_t get_preprocess_workspace(const NCBKernSizeParam&) const { return 0_z; }; //! Temporarily used to identify whether the matmul algorithm is //! is_preferred. - virtual bool is_preferred(ConvolutionImpl*, - const NCBKernSizeParam&) const { + virtual bool is_preferred(const NCBKernSizeParam&) const { return false; } - bool usable_reproducible(ConvolutionImpl* opr, - const NCBKernSizeParam& param, + bool usable_reproducible(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy, bool reproducible = true) const { return (!reproducible || is_reproducible()) && - usable(opr, param, algo_selection_strategy); + usable(param, algo_selection_strategy); } }; diff --git a/dnn/src/x86/conv_bias/f32/algos.cpp b/dnn/src/x86/conv_bias/f32/algos.cpp index b13fc3145e90316ddcfaef0f4e351d2182636e21..732485239c053b73381447a8e20fcd665daeef34 100644 --- a/dnn/src/x86/conv_bias/f32/algos.cpp +++ b/dnn/src/x86/conv_bias/f32/algos.cpp @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #include "src/x86/conv_bias/f32/algos.h" @@ -104,7 +105,7 @@ void get_rectified_size(size_t IH, size_t IW, size_t OH, size_t OW, size_t FH, /* ===================== direct algo ===================== */ bool ConvBiasImpl::AlgoDirect::usable( - FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { auto&& fm = param.filter_meta; bool aviliable = fm.format == Param::Format::NCHW && fm.spatial_ndim == 2 && @@ -142,7 +143,7 @@ WorkspaceBundle ConvBiasImpl::AlgoDirect::get_bundle( return {nullptr, {part0, part1}}; } size_t ConvBiasImpl::AlgoDirect::get_workspace( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { return get_bundle(param).total_size_in_bytes(); } @@ -280,7 +281,8 @@ void ConvBiasImpl::AlgoDirect::do_conv_kern(const WorkspaceBundle& bundle, size_t workspace_group_id = workspace_ids[0], workspace_batch_id = workspace_ids[1], oc = workspace_ids[2]; const float* sptr = kern_param.src(batch_id, group_id); - const float* filter = kern_param.filter(group_id) + oc * FH * FW * IC; + const float* filter = + kern_param.filter(group_id) + oc * FH * FW * IC; const float* bias_ptr = kern_param.bias(batch_id, group_id) + oc * bias_offset; float* dst = kern_param.dst(batch_id, group_id) + oc * OH * OW; @@ -318,7 +320,7 @@ SmallVector ConvBiasImpl::AlgoDirect::get_kimpls( } /* ===================== direct-stride2 algo ===================== */ bool ConvBiasImpl::AlgoDirectStride2::usable( - FallbackConvBiasImpl*, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const { auto&& fm = param.filter_meta; auto FH = fm.spatial[0]; @@ -363,7 +365,7 @@ WorkspaceBundle ConvBiasImpl::AlgoDirectStride2::get_bundle( } size_t ConvBiasImpl::AlgoDirectStride2::get_workspace( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { return get_bundle(param).total_size_in_bytes(); } //! Process one input channel copy padding @@ -528,7 +530,7 @@ WorkspaceBundle ConvBiasImpl::AlgoMatrixMul::get_bundle( } bool ConvBiasImpl::AlgoMatrixMul::is_preferred( - FallbackConvBiasImpl* opr, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { auto&& fm = param.filter_meta; if (fm.dilation[0] != 1 || fm.dilation[1] != 1) { return false; @@ -550,7 +552,7 @@ bool ConvBiasImpl::AlgoMatrixMul::is_preferred( int ic = find_nearest_elem(fm.icpg, {4, 8, 16, 32, 64, 96, 128}); int on = std::round(geometric_mean(param.osz[0], param.osz[1])); ProfileElement cur(f, oc, ic, on); - auto H = static_cast(opr->handle()); + auto H = static_cast(inplace_cpu_handle().get()); auto&& target = std::lower_bound(H->profile_cache().begin(), H->profile_cache().end(), cur); megdnn_assert_internal(target->f == cur.f); diff --git a/dnn/src/x86/conv_bias/f32/algos.h b/dnn/src/x86/conv_bias/f32/algos.h index 5ed6e05162426d100b6a1ce2583e5125b9aedc05..0f9111e17deb2cc13144331db2db51664c2af7c4 100644 --- a/dnn/src/x86/conv_bias/f32/algos.h +++ b/dnn/src/x86/conv_bias/f32/algos.h @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #pragma once @@ -37,14 +38,13 @@ public: return m_large_group ? "X86_CONV_BIAS_DIRECT_STRIDE1_LARGE_GROUP" : "X86_CONV_BIAS_DIRECT_STRIDE1_SMALL_GROUP"; } - bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(FallbackConvBiasImpl* opr, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl*, + const NCBKernSizeParam& param) const override { return get_kimpls(param); } @@ -74,14 +74,13 @@ public: return m_large_group ? "X86_CONV_BIAS_DIRECT_STRIDE2_LARGE_GROUP" : "X86_CONV_BIAS_DIRECT_STRIDE2_SMALL_GROUP"; } - bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(FallbackConvBiasImpl* opr, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl*, + const NCBKernSizeParam& param) const override { return get_kimpls(param); } @@ -131,7 +130,7 @@ public: bool is_reproducible() const override { return true; } const char* name() const override { return "X86_CONV_BIAS_MATMUL"; } - bool usable(FallbackConvBiasImpl*, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy) const override { auto&& fm = param.filter_meta; return fm.format == Param::Format::NCHW && fm.spatial_ndim == 2 && @@ -145,15 +144,12 @@ public: param.nr_threads == 1_z; } - bool is_preferred(FallbackConvBiasImpl*, - const NCBKernSizeParam&) const override; + bool is_preferred(const NCBKernSizeParam&) const override; - size_t get_workspace(FallbackConvBiasImpl*, - const NCBKernSizeParam& param) const override { + size_t get_workspace(const NCBKernSizeParam& param) const override { return get_bundle(param).total_size_in_bytes(); } SmallVector dispatch_kerns( - FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param) const override { size_t group = param.filter_meta.group; return {{kimpl, {group, 1_z, 1_z}}}; @@ -171,7 +167,7 @@ public: AlgoMkldnnConv() {} bool is_reproducible() const override { return true; } const char* name() const override { return "MKLDNN_CONV_FP32"; } - bool usable(FallbackConvBiasImpl*, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy) const override { auto&& fm = param.filter_meta; @@ -184,13 +180,9 @@ public: return ok; }; - size_t get_workspace(FallbackConvBiasImpl* /*opr*/, - const NCBKernSizeParam&) const override { - return 0; - } + size_t get_workspace(const NCBKernSizeParam&) const override { return 0; } SmallVector dispatch_kerns( - FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& /*param*/) const override { auto kern = [](const NCBKernParam& param, const NCBKernIndex& ncb_index) { diff --git a/dnn/src/x86/conv_bias/f32/winograd_algo.cpp b/dnn/src/x86/conv_bias/f32/winograd_algo.cpp index 0a54da96df3f1a82260e475f9b6b2799ed56f807..e8c358c0538699ced171af443158a3f8aca87868 100644 --- a/dnn/src/x86/conv_bias/f32/winograd_algo.cpp +++ b/dnn/src/x86/conv_bias/f32/winograd_algo.cpp @@ -6,16 +6,17 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ -#include "src/x86/conv_bias/f32/algos.h" #include "src/common/utils.h" +#include "src/x86/conv_bias/f32/algos.h" +#include "src/x86/conv_bias/f32/strategy.h" #include "src/x86/conv_bias/opr_impl.h" #include "src/x86/conv_bias/postprocess_helper.h" #include "src/x86/handle.h" #include "src/x86/profile.h" -#include "src/x86/conv_bias/f32/strategy.h" #include "midout.h" @@ -27,10 +28,9 @@ using namespace x86; /* ======================= AlgoFP32WinogradF63_8*8 ======================== */ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MEGDNN_MARK_USED_VAR(param); - MEGDNN_MARK_USED_VAR(opr); MIDOUT_BEGIN(megdnn_x86_winograd_fp32, 1, 0) { //! TODO: now nchw88 winograd only support Dense mode if (param.filter_meta.icpg % 8 != 0 || @@ -44,13 +44,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable( strategy, m_tile_size, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (opr->param().format == param::ConvBias::Format::NCHW88 || - (opr->param().format == + (param.filter_meta.format == param::ConvBias::Format::NCHW88 || + (param.filter_meta.format == param::ConvBias::Format::NCHW88_WINOGRAD && - opr->param().output_block_size == 6 && + param.output_block_size == 6 && param.winograd_matmul_format == param::MatrixMul::Format::MK8)) && - opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && + !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && (param.filter_meta.stride[0] == param.filter_meta.stride[1] && @@ -74,10 +74,9 @@ MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_8x8, /* ======================= AlgoFP32WinogradF23_8*8 ======================== */ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable( - fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MEGDNN_MARK_USED_VAR(param); - MEGDNN_MARK_USED_VAR(opr); MIDOUT_BEGIN(megdnn_x86_winograd_fp32, 2, 0) { //! TODO: now nchw88 winograd only support Dense mode if (param.filter_meta.icpg % 8 != 0 || @@ -91,13 +90,13 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable( strategy, m_tile_size, param) .get_matmul_kern_param(param); return m_matmul_algo->usable(matmul_param) && - (opr->param().format == param::ConvBias::Format::NCHW88 || - (opr->param().format == + (param.filter_meta.format == param::ConvBias::Format::NCHW88 || + (param.filter_meta.format == param::ConvBias::Format::NCHW88_WINOGRAD && - opr->param().output_block_size == 2 && + param.output_block_size == 2 && param.winograd_matmul_format == param::MatrixMul::Format::MK8)) && - opr->param().mode == param::ConvBias::Mode::CROSS_CORRELATION && + !param.filter_meta.should_flip && (param.filter_meta.spatial[0] == param.filter_meta.spatial[1] && param.filter_meta.spatial[0] == 3) && (param.filter_meta.stride[0] == param.filter_meta.stride[1] && diff --git a/dnn/src/x86/conv_bias/int8/algos.cpp b/dnn/src/x86/conv_bias/int8/algos.cpp index ba3aeb2c56bf637746a97d7b233cb1c5e9f625f3..ad26961301f69ba8a9fe8304295b304f6f015282 100644 --- a/dnn/src/x86/conv_bias/int8/algos.cpp +++ b/dnn/src/x86/conv_bias/int8/algos.cpp @@ -36,7 +36,7 @@ using namespace megdnn; using namespace x86; bool ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::usable( - FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { return chanwise_avx2_stride1_qint8_usable(param); } @@ -66,7 +66,7 @@ WorkspaceBundle ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::get_bundle( } size_t ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::get_workspace( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { return get_bundle(param).total_size_in_bytes(); } @@ -78,12 +78,12 @@ ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::get_kimpls( } bool ConvBiasImpl::AlgoChanWiseAvx2Stride1Qint8::is_preferred( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { return chanwise_avx2_stride1_qint8_preferred(param); } bool ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::usable( - FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { return chanwise_avx2_stride2_qint8_usable(param); } @@ -113,7 +113,7 @@ WorkspaceBundle ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::get_bundle( } size_t ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::get_workspace( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { return get_bundle(param).total_size_in_bytes(); } @@ -125,12 +125,12 @@ ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::get_kimpls( } bool ConvBiasImpl::AlgoChanWiseAvx2Stride2Qint8::is_preferred( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { return chanwise_avx2_stride2_qint8_preferred(param); } bool ConvBiasImpl::AlgoDirectAvx2Stride1Int8::usable( - FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, + const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { return direct_avx2_stride1_int8_usable(param); } @@ -170,7 +170,7 @@ WorkspaceBundle ConvBiasImpl::AlgoDirectAvx2Stride1Int8::get_bundle( } size_t ConvBiasImpl::AlgoDirectAvx2Stride1Int8::get_workspace( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { return get_bundle(param).total_size_in_bytes(); } @@ -182,14 +182,13 @@ ConvBiasImpl::AlgoDirectAvx2Stride1Int8::get_kimpls( } bool ConvBiasImpl::AlgoDirectAvx2Stride1Int8::is_preferred( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { return direct_avx2_stride1_int8_preferred(param); } /* ===================== avx2 int8 stride 2 ===================== */ bool ConvBiasImpl::AlgoAVX2DirectConvStride2::usable( - FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param, - AlgoSelectionStrategy) const { + const NCBKernSizeParam& param, AlgoSelectionStrategy) const { return direct_avx2_stride2_int8_usable(param); } @@ -229,7 +228,7 @@ WorkspaceBundle ConvBiasImpl::AlgoAVX2DirectConvStride2::get_bundle( } size_t ConvBiasImpl::AlgoAVX2DirectConvStride2::get_workspace( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { return get_bundle(param).total_size_in_bytes(); } @@ -241,13 +240,12 @@ ConvBiasImpl::AlgoAVX2DirectConvStride2::get_kimpls( } bool ConvBiasImpl::AlgoAVX2DirectConvStride2::is_preferred( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { return direct_avx2_stride2_int8_preferred(param); } #if MEGDNN_X86_WITH_MKL_DNN -bool ConvBiasImpl::AlgoMkldnnQint8::usable(FallbackConvBiasImpl*, - const NCBKernSizeParam& param, +bool ConvBiasImpl::AlgoMkldnnQint8::usable(const NCBKernSizeParam& param, AlgoSelectionStrategy) const { return mkldnn_qint8_usable(param); } @@ -426,19 +424,18 @@ void ConvBiasImpl::AlgoMkldnnQint8::kern_mkldnn_s8x8x32( #undef REORDER_MEMORY bool ConvBiasImpl::AlgoMkldnnQint8::is_preferred( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { return mkldnn_qint8_preferred(param); } /* ===================== mkldnn qint8 matmul algo ===================== */ -bool ConvBiasImpl::AlgoMkldnnMatmulQint8::usable(FallbackConvBiasImpl*, - const NCBKernSizeParam& param, +bool ConvBiasImpl::AlgoMkldnnMatmulQint8::usable(const NCBKernSizeParam& param, AlgoSelectionStrategy) const { return mkldnn_matmul_qint8_usable(param); } bool ConvBiasImpl::AlgoMkldnnMatmulQint8::is_preferred( - FallbackConvBiasImpl*, const NCBKernSizeParam& param) const { + const NCBKernSizeParam& param) const { return mkldnn_matmul_qint8_preferred(param); } diff --git a/dnn/src/x86/conv_bias/int8/algos.h b/dnn/src/x86/conv_bias/int8/algos.h index a85a5b4835869b87f861f870f255cf6f35e03e4d..34717a42456b97dd68d5a36558a2fd3053039cce 100644 --- a/dnn/src/x86/conv_bias/int8/algos.h +++ b/dnn/src/x86/conv_bias/int8/algos.h @@ -25,18 +25,15 @@ public: const char* name() const override { return "X86_CONV_BIAS_CHANWISE_AVX2_INT8_STRIDE1"; } - bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(FallbackConvBiasImpl* opr, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const override { return get_kimpls(param); } void* type() const override; - bool is_preferred(FallbackConvBiasImpl*, - const NCBKernSizeParam& param) const override; + bool is_preferred(const NCBKernSizeParam& param) const override; }; /* ===================== avx2 stride2 chanwise algo ===================== */ @@ -49,18 +46,15 @@ public: const char* name() const override { return "X86_CONV_BIAS_CHANWISE_AVX2_INT8_STRIDE2"; } - bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(FallbackConvBiasImpl* opr, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const override { return get_kimpls(param); } void* type() const override; - bool is_preferred(FallbackConvBiasImpl*, - const NCBKernSizeParam& param) const override; + bool is_preferred(const NCBKernSizeParam& param) const override; }; /* ===================== avx2 stride1 direct algo ===================== */ @@ -73,18 +67,15 @@ public: const char* name() const override { return "X86_CONV_BIAS_DIRECT_AVX2_INT8_STRIDE1"; } - bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(FallbackConvBiasImpl* opr, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; virtual SmallVector dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const override { return get_kimpls(param); } void* type() const override; - bool is_preferred(FallbackConvBiasImpl*, - const NCBKernSizeParam& param) const override; + bool is_preferred(const NCBKernSizeParam& param) const override; }; /* ================== avx2 int8 direct conv stride2 algo ================== */ @@ -97,18 +88,15 @@ public: const char* name() const override { return "X86_CONV_BIAS_DIRECT_AVX2_INT8_STRIDE2"; } - bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; - size_t get_workspace(FallbackConvBiasImpl* opr, - const NCBKernSizeParam& param) const override; + size_t get_workspace(const NCBKernSizeParam& param) const override; SmallVector dispatch_kerns( - fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const override { return get_kimpls(param); } void* type() const override; - bool is_preferred(FallbackConvBiasImpl*, - const NCBKernSizeParam& param) const override; + bool is_preferred(const NCBKernSizeParam& param) const override; }; #if MEGDNN_X86_WITH_MKL_DNN @@ -122,16 +110,14 @@ public: AlgoMkldnnQint8() {} bool is_reproducible() const override { return true; } const char* name() const override { return "MKLDNN_INT8"; } - bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy) const override; - size_t get_workspace(FallbackConvBiasImpl* /*opr*/, - const NCBKernSizeParam& param) const override { + size_t get_workspace(const NCBKernSizeParam& param) const override { size_t nr_threads = param.nr_threads; return get_bundle(param).total_size_in_bytes() * nr_threads; } SmallVector dispatch_kerns( - FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param) const override { size_t group = param.filter_meta.group; size_t n = param.n; @@ -147,8 +133,7 @@ public: return {{kern, {group, n, 1_z}}}; } void* type() const override; - bool is_preferred(FallbackConvBiasImpl*, - const NCBKernSizeParam& param) const override; + bool is_preferred(const NCBKernSizeParam& param) const override; }; /* ===================== mkldnn qint8 matmul algo ===================== */ class ConvBiasImpl::AlgoMkldnnMatmulQint8 final : public AlgoBase { @@ -160,22 +145,19 @@ class ConvBiasImpl::AlgoMkldnnMatmulQint8 final : public AlgoBase { public: bool is_reproducible() const override { return true; } const char* name() const override { return "MKLDNN_MATMUL_INT8"; } - bool usable(FallbackConvBiasImpl* opr, const NCBKernSizeParam& param, + bool usable(const NCBKernSizeParam& param, AlgoSelectionStrategy) const override; - size_t get_workspace(FallbackConvBiasImpl* /*opr*/, - const NCBKernSizeParam& param) const override { + size_t get_workspace(const NCBKernSizeParam& param) const override { return get_bundle(param).total_size_in_bytes(); } SmallVector dispatch_kerns( - FallbackConvBiasImpl* /*opr*/, const NCBKernSizeParam& param) const override { size_t group = param.filter_meta.group; return {{kern_mkldnn_matmul_s8x8x32, {group, 1_z, 1_z}}}; } //! select matmul to the highest preference - bool is_preferred(FallbackConvBiasImpl*, - const NCBKernSizeParam& param) const override; + bool is_preferred(const NCBKernSizeParam& param) const override; void* type() const override; }; diff --git a/dnn/src/x86/conv_bias/opr_impl.cpp b/dnn/src/x86/conv_bias/opr_impl.cpp index 4c7d10cd90df6333b9c8b4551a6fca3791dc5bcd..1f5adeeb3844fd09190717a7411800090adb9583 100644 --- a/dnn/src/x86/conv_bias/opr_impl.cpp +++ b/dnn/src/x86/conv_bias/opr_impl.cpp @@ -163,7 +163,7 @@ const char* ConvBiasImpl::get_algorithm_set_name() const { } bool ConvBiasImpl::is_matmul_quantized_prefer( - const ConvBiasImpl::NCBKernSizeParam& param) { + const ConvBiasImpl::NCBKernSizeParam& param) const { bool conv_direct_chanwise_mkldnn_usable = true; if (param.dst_type.enumv() == DTypeEnum::QuantizedS8 || param.dst_type.enumv() == DTypeEnum::QuantizedS32) { diff --git a/dnn/src/x86/conv_bias/opr_impl.h b/dnn/src/x86/conv_bias/opr_impl.h index ea1626be364ab71ed1c42f596044a8330f85faa8..204cf38a7205ae11515c1501d72854b142705f53 100644 --- a/dnn/src/x86/conv_bias/opr_impl.h +++ b/dnn/src/x86/conv_bias/opr_impl.h @@ -55,7 +55,7 @@ public: const char* get_algorithm_set_name() const override; bool is_matmul_quantized_prefer( - const ConvBiasImpl::NCBKernSizeParam& ncb_param) override; + const ConvBiasImpl::NCBKernSizeParam& ncb_param) const override; }; } // namespace x86