algos.cpp 11.6 KB
Newer Older
M
Megvii Engine Team 已提交
1
#include "src/fallback/conv_bias/conv1x1/algos.h"
2 3 4 5 6 7 8 9 10 11 12
#include "src/common/opr_delegate.h"
#include "src/fallback/conv_bias/common.h"
#include "src/fallback/conv_bias/conv1x1/conv1x1_dispatcher.h"
#include "src/fallback/conv_bias/conv1x1/conv1x1_strategy.h"
#include "src/fallback/conv_bias/opr_impl.h"

#include "megdnn/opr_param_defs.h"
#include "src/naive/convolution/helper.h"

#if MEGDNN_X86
#include "src/x86/conv_bias/postprocess_helper.h"
13 14
#elif (MEGDNN_ARMV7 || MEGDNN_AARCH64)
#include "src/arm_common/conv_bias/postprocess_helper.h"
15 16
#else
#include "src/common/postprocess_helper.h"
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
#endif

#include "midout.h"
MIDOUT_DECL(megdnn_fallback_conv1x1)

using namespace megdnn;
using namespace fallback;
#if MEGDNN_X86
using namespace x86;
#endif
using namespace conv1x1;

size_t ConvBiasImpl::AlgoConv1x1::get_oc_tile_size_heuristic(
        const NCBKernSizeParam& param) const {
    size_t OH = param.osz[0];
    size_t OW = param.osz[1];
    size_t OC = param.filter_meta.ocpg;
    if (OH * OW >= 56 * 56 || OC >= 64)
        return m_oc_block_size;
36 37
    size_t oc_block_size_one_thread = div_ceil(OC, param.nr_threads);
    return round_up<size_t>(oc_block_size_one_thread, 24);
38 39
}

40
WorkspaceBundle ConvBiasImpl::AlgoConv1x1::get_bundle_according_packmode(
41
        const NCBKernSizeParam& param) const {
42 43 44 45 46
    size_t OH = param.osz[0];
    size_t OW = param.osz[1];
    size_t compt_oc_block_size = get_oc_tile_size_heuristic(param);

    auto matmul_param =
47
            utils::get_matmul_kern_param(param, OH * OW, compt_oc_block_size);
48

49 50
    auto pack_mode = m_matmul_algo->packmode();
    if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::DEFAULT) {
M
Megvii Engine Team 已提交
51
        MIDOUT_BEGIN(megdnn_fallback_conv1x1, midout_iv("get_bundle_default"_hash)) {
52
            return Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::DEFAULT>()
M
Megvii Engine Team 已提交
53 54
                    .get_bundle(
                            param, matmul_param, m_matmul_algo, compt_oc_block_size);
55 56 57
        }
        MIDOUT_END();
    } else if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA) {
M
Megvii Engine Team 已提交
58
        MIDOUT_BEGIN(megdnn_fallback_conv1x1, midout_iv("get_bundle_only_packa"_hash)) {
59
            return Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA>()
M
Megvii Engine Team 已提交
60 61
                    .get_bundle(
                            param, matmul_param, m_matmul_algo, compt_oc_block_size);
62 63 64
        }
        MIDOUT_END();
    } else {
M
Megvii Engine Team 已提交
65
        MIDOUT_BEGIN(megdnn_fallback_conv1x1, midout_iv("get_bundle_no_pack"_hash)) {
66
            return Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::NO_PACK>()
M
Megvii Engine Team 已提交
67 68
                    .get_bundle(
                            param, matmul_param, m_matmul_algo, compt_oc_block_size);
69 70 71
        }
        MIDOUT_END();
    }
72
    return {nullptr, {}};
73 74
}

M
Megvii Engine Team 已提交
75
size_t ConvBiasImpl::AlgoConv1x1::get_workspace(const NCBKernSizeParam& param) const {
76 77 78
    return get_bundle_according_packmode(param).total_size_in_bytes();
}

M
Megvii Engine Team 已提交
79 80 81
SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoConv1x1::
        get_kerns_according_packmode(
                const NCBKernSizeParam& param, bool weight_preprocess) const {
82 83 84
    size_t OH = param.osz[0];
    size_t OW = param.osz[1];
    size_t compt_oc_block_size = get_oc_tile_size_heuristic(param);
85
    auto pack_mode = m_matmul_algo->packmode();
86

M
Megvii Engine Team 已提交
87 88
    Conv1x1StrategyBase* conv1x1_strategy = Conv1x1Factory::make_conv1x1_strategy(
            param, pack_mode, param.filter_meta.format);
89
    auto matmul_param =
90
            utils::get_matmul_kern_param(param, OH * OW, compt_oc_block_size);
91

92 93
    WorkspaceBundle whole_bundle = get_bundle_according_packmode(param);
    //! NO_PACK not implement get_bundle
94
    WorkspaceBundle matmul_bundle = {nullptr, {}};
95
    if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::NO_PACK) {
M
Megvii Engine Team 已提交
96
        matmul_bundle = {nullptr, {0, 0, m_matmul_algo->get_workspace(matmul_param)}};
97 98 99 100 101 102
    } else {
        matmul_bundle = m_matmul_algo->get_bundle(matmul_param);
    }
    WorkspaceBundle thread_bundle = utils::get_thread_bundle(
            param, matmul_bundle.get_size(2), compt_oc_block_size);

103
    if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::DEFAULT) {
M
Megvii Engine Team 已提交
104
        MIDOUT_BEGIN(megdnn_fallback_conv1x1, midout_iv("get_kern_default"_hash)) {
105
            if (!weight_preprocess) {
M
Megvii Engine Team 已提交
106 107 108 109
                return Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::DEFAULT>()
                        .get_kern(
                                param, whole_bundle, matmul_bundle, thread_bundle,
                                conv1x1_strategy, m_matmul_algo, compt_oc_block_size);
110
            } else {
M
Megvii Engine Team 已提交
111 112 113 114
                return Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::DEFAULT>()
                        .get_kern_preprocess(
                                param, whole_bundle, matmul_bundle, conv1x1_strategy,
                                m_matmul_algo, compt_oc_block_size);
115
            }
116 117 118
        }
        MIDOUT_END();
    } else if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA) {
M
Megvii Engine Team 已提交
119
        MIDOUT_BEGIN(megdnn_fallback_conv1x1, midout_iv("get_kern_only_packa"_hash)) {
120
            if (!weight_preprocess) {
M
Megvii Engine Team 已提交
121 122 123 124
                return Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA>()
                        .get_kern(
                                param, whole_bundle, matmul_bundle, thread_bundle,
                                conv1x1_strategy, m_matmul_algo, compt_oc_block_size);
125
            } else {
M
Megvii Engine Team 已提交
126 127 128 129
                return Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::ONLY_PACKA>()
                        .get_kern_preprocess(
                                param, whole_bundle, matmul_bundle, conv1x1_strategy,
                                m_matmul_algo, compt_oc_block_size);
130
            }
131 132 133
        }
        MIDOUT_END();
    } else {
M
Megvii Engine Team 已提交
134
        MIDOUT_BEGIN(megdnn_fallback_conv1x1, midout_iv("get_kern_no_pack"_hash)) {
135
            if (!weight_preprocess) {
M
Megvii Engine Team 已提交
136 137 138 139
                return Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::NO_PACK>()
                        .get_kern(
                                param, whole_bundle, matmul_bundle, thread_bundle,
                                conv1x1_strategy, m_matmul_algo, compt_oc_block_size);
140
            } else {
M
Megvii Engine Team 已提交
141 142 143 144
                return Conv1x1Kerns<MatrixMulImpl::AlgoBase::PackMode::NO_PACK>()
                        .get_kern_preprocess(
                                param, whole_bundle, matmul_bundle, conv1x1_strategy,
                                m_matmul_algo, compt_oc_block_size);
145
            }
146 147 148
        }
        MIDOUT_END();
    }
149
}
150

151 152 153 154
SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoConv1x1::dispatch_kerns(
        const NCBKernSizeParam& param) const {
    return get_kerns_according_packmode(param, false);
}
155

M
Megvii Engine Team 已提交
156
SmallVector<TensorLayout> ConvBiasImpl::AlgoConv1x1::deduce_preprocessed_filter_layout(
157
        const NCBKernSizeParam& param) const {
M
Megvii Engine Team 已提交
158 159 160
    MIDOUT_BEGIN(
            megdnn_fallback_conv1x1,
            midout_iv("deduce_preprocessed_filter_layout"_hash)) {
161
        WorkspaceBundle wb = get_bundle_according_packmode(param);
162

163 164
        size_t GROUP = param.filter_meta.group;
        SmallVector<TensorLayout> preprocessed_layouts;
M
Megvii Engine Team 已提交
165
        preprocessed_layouts.push_back({{GROUP, wb.get_size(0)}, dtype::Int8()});
166
        return preprocessed_layouts;
167
    }
168 169 170 171
    MIDOUT_END();
    return {};
}

M
Megvii Engine Team 已提交
172
SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::AlgoConv1x1::dispatch_preprocess_kerns(
173 174
        const NCBKernSizeParam& param) const {
    return get_kerns_according_packmode(param, true);
175 176
}

M
Megvii Engine Team 已提交
177 178
bool ConvBiasImpl::AlgoConv1x1::usable(
        const NCBKernSizeParam& param, AlgoSelectionStrategy) const {
179
    MIDOUT_BEGIN(megdnn_fallback_conv1x1, 0, 2) {
M
Megvii Engine Team 已提交
180 181 182
        size_t FH = param.filter_meta.spatial[0], FW = param.filter_meta.spatial[1];
        size_t PH = param.filter_meta.padding[0], PW = param.filter_meta.padding[1];
        size_t SH = param.filter_meta.stride[0], SW = param.filter_meta.stride[1];
183 184 185 186 187 188 189
        auto format = param.filter_meta.format;
        size_t OH = param.osz[0];
        size_t OW = param.osz[1];
#if MEGDNN_AARCH64 || MEGDNN_ARMV7
        if (format != param::ConvBias::Format::NCHW &&
            format != param::ConvBias::Format::NCHW44 &&
            format != param::ConvBias::Format::NCHW44_DOT) {
190
            return false;
191 192 193 194 195 196 197 198 199
        }
        //! hybird mode is not support
        if (param.filter_meta.format == param::ConvBias::Format::NCHW44 ||
            param.filter_meta.format == param::ConvBias::Format::NCHW44_DOT) {
            if (param.filter_meta.icpg < 4_z || param.filter_meta.icpg == 1 ||
                param.filter_meta.ocpg == 1) {
                return false;
            }
        }
200 201 202
#else  //! x86 and RISC-V do not support NCHW44_DOT
        if (format != param::ConvBias::Format::NCHW &&
            format != param::ConvBias::Format::NCHW44) {
203 204
            return false;
        }
205 206 207 208 209 210
        //! hybird mode is not support
        if (param.filter_meta.format == param::ConvBias::Format::NCHW44) {
            if (param.filter_meta.icpg < 4_z || param.filter_meta.ocpg == 1) {
                return false;
            }
        }
211 212 213
#endif
        //! param
        if (FH != 1 || FW != 1 || PH || PW || SH != 1 || SW != 1) {
214 215
            return false;
        }
216 217 218 219 220
        //! data type
        if (param.src_type.enumv() != param.filter_type.enumv() ||
            (param.src_type.enumv() != DTypeEnum::Int8 &&
             param.src_type.enumv() != DTypeEnum::QuantizedS8 &&
             param.src_type.enumv() != DTypeEnum::Quantized8Asymm &&
221
#if !MEGDNN_DISABLE_FLOAT16
222
             param.src_type.enumv() != DTypeEnum::Float16 &&
223
#endif
224
             param.src_type.enumv() != DTypeEnum::Float32)) {
225 226
            return false;
        }
227 228 229 230 231 232
        //! x86 disable  Quntized8Asymm
#if MEGDNN_X86
        if (param.src_type.enumv() == DTypeEnum::Quantized8Asymm) {
            return false;
        }
#endif
233 234 235
        //! make sure 8x8x16 and 8x8x32 biasmode is nobias and nonlineMode
        //! is identity otherwise return false mean that 8x8x32 and 8x8x16
        //! not support PostProcess
236
        if (param.dst_type.enumv() == DTypeEnum::Int16 ||
237
            param.dst_type.enumv() == DTypeEnum::QuantizedS16 ||
238 239
            param.dst_type.enumv() == DTypeEnum::Int32 ||
            param.dst_type.enumv() == DTypeEnum::QuantizedS32) {
240
            if (param.nonlineMode != megdnn::NonlineMode::IDENTITY) {
241 242 243
                return false;
            }
        }
M
Megvii Engine Team 已提交
244 245
        MatrixMulImpl::KernSizeParam matmul_param = utils::get_matmul_kern_param(
                param, OH * OW, get_oc_tile_size_heuristic(param));
246
        bool matmul_usable = m_matmul_algo->usable(matmul_param);
247 248
        auto pack_mode = m_matmul_algo->packmode();
        bool strategy_usable = Conv1x1Factory::can_make_conv1x1_strategy(
249
                param, pack_mode, param.filter_meta.format);
250
        return matmul_usable && strategy_usable &&
M
Megvii Engine Team 已提交
251
               (param.filter_meta.dilation[0] == param.filter_meta.dilation[1] &&
252 253 254 255 256 257
                param.filter_meta.dilation[0] == 1) &&
               param.compute_mode == param::ConvBias::ComputeMode::DEFAULT;
    }
    MIDOUT_END();
    return false;
}
258

M
Megvii Engine Team 已提交
259
bool ConvBiasImpl::AlgoConv1x1::is_preferred(const NCBKernSizeParam& param) const {
260 261 262
    size_t OH = param.osz[0];
    size_t OW = param.osz[1];
    if (OH * OW != 1) {
263 264
        return m_matmul_algo->algoset() !=
               MatrixMulImpl::AlgoBase::AlgoSet::ALGO_TYPE_GEMV;
265 266 267 268 269
    } else {
#if (MEGDNN_ARMV7 || MEGDNN_AARCH64)
        if (param.src_type.enumv() == DTypeEnum::Int8 &&
            param.filter_type.enumv() == DTypeEnum::Int8 &&
            param.dst_type.enumv() == DTypeEnum::Int16) {
270 271
            return true;
        }
272 273 274 275 276 277 278 279 280
#elif MEGDNN_X86
        size_t OC = param.filter_meta.ocpg;
        if (OC > 2 || param.src_type.enumv() == DTypeEnum::Float32)
            return true;
#endif
        return false;
    }
}

281
// vim: syntax=cpp.doxygen