strategy_base.h 24.8 KB
Newer Older
1 2 3 4
/**
 * \file dnn/src/fallback/conv_bias/im2col/strategy_base.h
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 *
5
 * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
6 7 8 9 10 11 12 13
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */

#pragma once
#include "src/fallback/conv_bias/opr_impl.h"
14 15 16 17 18

#if MEGDNN_X86
#include "src/x86/conv_bias/postprocess_helper.h"
#elif (MEGDNN_ARMV7 || MEGDNN_AARCH64)
#include "src/arm_common/conv_bias/postprocess_helper.h"
19 20
#else
#include "src/common/postprocess_helper.h"
21 22 23 24 25
#endif
using namespace megdnn;
#if MEGDNN_X86
using namespace x86;
#endif
26 27 28
namespace megdnn {

using PackMode = fallback::MatrixMulImpl::AlgoBase::PackMode;
29
using FormatMode = param::ConvBias::Format;
30 31 32 33 34 35 36 37 38 39 40 41 42 43

struct StrategyParam {
    size_t batch_id;
    size_t group_id;
    size_t oc_tile_size;
    size_t oc_cur_index;
    size_t oc_end_index;
    size_t ohw_cur_index;
    size_t output_block_size;
    size_t output_block_oc_size;
    size_t ohw;
    size_t block_m;
    size_t block_n;
    size_t block_k;
44
    size_t pack_oc_size;
45
    size_t packA_group_size;
46 47 48
    bool skip_copy_dst;
    bool is_dst_8bit;
    bool is_ohw_size_bigger;
49
    bool enable_filter_preprocess;
50 51 52 53 54 55 56
};

class StrategyBase {
public:
    StrategyBase() = default;
    virtual ~StrategyBase() = default;
    virtual void copy_padding_kern(
57
            const WorkspaceBundle& bundle,
58
            const fallback::ConvBiasImpl::NCBKernParam& param,
59 60
            const fallback::ConvBiasImpl::NCBKernIndex& ncb_index,
            size_t pack_size) = 0;
61
    virtual void packA_kern(
62
            const WorkspaceBundle& bundle,
63 64
            const fallback::ConvBiasImpl::NCBKernParam& param,
            fallback::MatrixMulImpl::KernSizeParam matmulparam,
65
            const fallback::MatrixMulImpl::AlgoBase* matmul_algo,
66
            const fallback::ConvBiasImpl::NCBKernIndex& ncb_index,
67 68
            const fallback::MatrixMulImpl::AlgoBase::MatmulDescription&
                    matmul_desec,
69
            const StrategyParam& sparam) = 0;
70 71

    virtual void exec_im2col(
72
            const WorkspaceBundle& bundle, const WorkspaceBundle& bundle_thread,
73 74 75
            const StrategyParam& sparam,
            const fallback::ConvBiasImpl::NCBKernParam& param,
            fallback::MatrixMulImpl::KernParam matmul_param,
76
            const fallback::MatrixMulImpl::AlgoBase* matmul_algo) = 0;
77 78 79

    virtual void exec_matmul(
            const fallback::ConvBiasImpl::NCBKernParam& param,
80 81
            const StrategyParam& sparam, const WorkspaceBundle& bundle,
            const WorkspaceBundle& bundle_thread,
82
            fallback::MatrixMulImpl::KernParam matmul_param,
83 84
            const fallback::MatrixMulImpl::AlgoBase* matmul_algo,
            const fallback::ConvBiasImpl::NCBKernIndex& ncb_index,
85 86
            const fallback::MatrixMulImpl::AlgoBase::MatmulDescription&
                    matmul_desc) = 0;
87 88 89

    virtual void exec_postprocess(
            const fallback::ConvBiasImpl::NCBKernParam& param,
90 91
            const StrategyParam& sparam,
            const WorkspaceBundle& bundle_thread) = 0;
92 93
};

94 95 96 97 98 99 100 101 102 103 104 105 106
template <typename src_ctype, typename bias_ctype, typename dst_ctype,
          typename op_ctype, typename op_dtype,
          megdnn::PostprocessMode postprocess_mode, PackMode packmode,
          FormatMode format>
//! this class is a new base class for StrategyDefault StrategyNoPack and so on,
//! in order to handle copy pad use the same code
class StrategyBridge : public StrategyBase {
public:
    constexpr static size_t BUNDLE_PADDING_INDEX = 0;

    StrategyBridge() = default;

    virtual void copy_padding_kern(
107
            const WorkspaceBundle& bundle,
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
            const fallback::ConvBiasImpl::NCBKernParam& param,
            const fallback::ConvBiasImpl::NCBKernIndex& ncb_index,
            size_t pack_oc_size) override {
        UNPACK_CONV_F32_NCB_KERN_SIZES(param);
        MEGDNN_MARK_USED_VAR(N);
        MEGDNN_MARK_USED_VAR(OC);
        MEGDNN_MARK_USED_VAR(OH);
        MEGDNN_MARK_USED_VAR(OW);
        MEGDNN_MARK_USED_VAR(FH);
        MEGDNN_MARK_USED_VAR(FW);
        MEGDNN_MARK_USED_VAR(SH);
        MEGDNN_MARK_USED_VAR(SW);
        size_t IW2 = IW + 2 * PW;
        size_t IH2 = IH + 2 * PH;
        size_t batch_id = ncb_index.ndrange_id[0];
        size_t group_id = ncb_index.ndrange_id[1];
        size_t channel_id = ncb_index.ndrange_id[2];
        size_t PH_SIZE = PH * IW2 * pack_oc_size;

        PW = PW * pack_oc_size;
        IW = IW * pack_oc_size;

        size_t padding_group_size = IH2 * IW2 * IC;
        size_t workspace_channel_offset = pack_oc_size * IH2 * IW2 * channel_id;
        size_t workspace_group_offset = group_id * padding_group_size;
        size_t workspace_batch_offset =
                param.filter_meta.group * batch_id * padding_group_size;

        src_ctype src_zp = static_cast<src_ctype>(0);
        if (param.src_type.enumv() == DTypeEnum::Quantized8Asymm) {
            src_zp = param.src_type.param<dtype::Quantized8Asymm>().zero_point;
        }
        src_ctype* src = const_cast<src_ctype*>(param.src<src_ctype>(
                batch_id, group_id, channel_id, 1, pack_oc_size));
        src_ctype* src2;
        src2 = static_cast<src_ctype*>(bundle.get(BUNDLE_PADDING_INDEX)) +
               workspace_group_offset + workspace_batch_offset +
               workspace_channel_offset;
        src_ctype* src2_ptr = src2;
        const src_ctype* src_ptr = src;
        if (PH != 0) {
            std::memset(src2_ptr, src_zp, sizeof(src_ctype) * PH_SIZE);
            src2_ptr += PH_SIZE;
        }
        rep(ih, IH) {
            if (PW != 0)
                rep(pw, PW) * (src2_ptr++) = src_zp;
            std::memcpy(src2_ptr, src_ptr, sizeof(src_ctype) * IW);
            src2_ptr += IW;
            src_ptr += IW;
            if (PW != 0)
                rep(pw, PW) * (src2_ptr++) = src_zp;
        }
        if (PH != 0) {
            std::memset(src2_ptr, src_zp, sizeof(src_ctype) * PH_SIZE);
            src2_ptr += PH_SIZE;
        }
    }
};

namespace{
template <typename bias_ctype>
inline void* get_matmul_dst_ptr(const fallback::ConvBiasImpl::NCBKernParam& param,
                           const WorkspaceBundle& bundle_thread,
                           const StrategyParam& sparam,
                           size_t matmul_bundle_index) {
    if (sparam.is_dst_8bit || !sparam.is_ohw_size_bigger) {
        return static_cast<void*>(bundle_thread.get(matmul_bundle_index));
    } else {
        bias_ctype* dst =
                param.dst<bias_ctype>(sparam.batch_id, sparam.group_id) +
                sparam.oc_cur_index * sparam.ohw;
        return static_cast<void*>(dst);
    }
}

template <typename bias_ctype>
inline void* get_bias_temp_ptr(
        const fallback::ConvBiasImpl::NCBKernParam& param,
        const WorkspaceBundle& bundle_thread, size_t bias_bundle_index) {
    bias_ctype* bias_tmp_ptr =
            param.bias_mode == megdnn::BiasMode::BIAS
                    ? static_cast<bias_ctype*>(
                              bundle_thread.get(bias_bundle_index))
                    : nullptr;
    return bias_tmp_ptr;
}

template <typename dst_ctype>
void copy_dst(const fallback::ConvBiasImpl::NCBKernParam& param,
              const void* matmul_dst, const StrategyParam& sparam) {
    if (!sparam.skip_copy_dst) {
        size_t pack_oc_size = sparam.pack_oc_size;
        dst_ctype* dst_tmp_ptr =
                reinterpret_cast<dst_ctype*>(const_cast<void*>(matmul_dst));
        dst_ctype* dst =
                param.dst<dst_ctype>(sparam.batch_id, sparam.group_id) +
                sparam.oc_cur_index * sparam.ohw +
                sparam.ohw_cur_index * pack_oc_size;
        size_t oc_loop = sparam.output_block_oc_size / pack_oc_size;
        for (size_t oc = 0; oc < oc_loop; oc++) {
            std::memcpy(dst, dst_tmp_ptr,
                        sizeof(dst_ctype) * sparam.output_block_size *
                                pack_oc_size);
            dst_tmp_ptr += sparam.output_block_size * pack_oc_size;
            dst += sparam.ohw * pack_oc_size;
        }
    }
}

template <typename bias_ctype>
void copy_bias(const fallback::ConvBiasImpl::NCBKernParam& param,
220 221
               const WorkspaceBundle& bundle_thread,
               const StrategyParam& sparam, size_t bias_index) {
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
    const bias_ctype* bias_ptr = static_cast<const bias_ctype*>(
            param.bias<bias_ctype>(sparam.batch_id, sparam.group_id));
    bias_ctype* bias_temp_ptr = static_cast<bias_ctype*>(
            get_bias_temp_ptr<bias_ctype>(param, bundle_thread, bias_index));
    if (param.bias_mode == megdnn::BiasMode::BIAS) {
        bias_ctype* copy_dst = bias_temp_ptr;
        size_t pack_oc_size = sparam.pack_oc_size;
        const bias_ctype* copy_src = bias_ptr +
                                     sparam.oc_cur_index * sparam.ohw +
                                     sparam.ohw_cur_index * pack_oc_size;
        for (size_t oc = sparam.oc_cur_index / pack_oc_size;
             oc < sparam.oc_end_index / pack_oc_size; oc++) {
            std::memcpy(copy_dst, copy_src,
                        sizeof(bias_ctype) * sparam.output_block_size *
                                pack_oc_size);
            copy_dst += sparam.output_block_size * pack_oc_size;
            copy_src += sparam.ohw * pack_oc_size;
        }
    }
}

243 244
template <typename bias_ctype, typename dst_ctype, typename op_ctype,
          typename op_dtype, megdnn::PostprocessMode postprocess_mode>
245
void do_postprocess(const fallback::ConvBiasImpl::NCBKernParam& param,
246 247
                    const StrategyParam& sparam,
                    const WorkspaceBundle& bundle_thread,
248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
                    size_t matmul_bundle_index, size_t bias_bundle_index) {
    copy_bias<bias_ctype>(param, bundle_thread, sparam, bias_bundle_index);
    void* matmul_dst = get_matmul_dst_ptr<bias_ctype>(
            param, bundle_thread, sparam, matmul_bundle_index);

    const bias_ctype* bias_ptr = static_cast<const bias_ctype*>(
            param.bias<bias_ctype>(sparam.batch_id, sparam.group_id));
    void* bias_temp_ptr = get_bias_temp_ptr<bias_ctype>(param, bundle_thread,
                                                        bias_bundle_index);
    void* bias_preprocess_ptr = const_cast<void*>(
            param.bias_mode == megdnn::BiasMode::BIAS
                    ? bias_temp_ptr
                    : static_cast<void*>(const_cast<bias_ctype*>(
                              bias_ptr + sparam.oc_cur_index)));
    size_t pack_oc_size = sparam.pack_oc_size;
    PostProcess<op_ctype, op_dtype, postprocess_mode>::run(
            matmul_dst, bias_preprocess_ptr, matmul_dst, param.bias_mode,
            param.nonlineMode, param.bias_type, param.dst_type, 1_z,
            sparam.output_block_oc_size / pack_oc_size, 1_z,
            sparam.output_block_size, pack_oc_size);
    copy_dst<dst_ctype>(param, matmul_dst, sparam);
}
}

272 273
template <typename src_ctype, typename bias_ctype, typename dst_ctype,
          typename op_ctype, typename op_dtype,
274
          megdnn::PostprocessMode postprocess_mode, PackMode packmode,
275
          FormatMode format = FormatMode::NCHW>
276 277 278 279 280 281
class Strategy;

template <typename src_ctype, typename bias_ctype, typename dst_ctype,
          typename op_ctype, typename op_dtype,
          megdnn::PostprocessMode postprocess_mode>
class Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
282 283 284 285
               postprocess_mode, PackMode::DEFAULT>
        : public StrategyBridge<src_ctype, bias_ctype, dst_ctype, op_ctype,
                                op_dtype, postprocess_mode, PackMode::DEFAULT,
                                FormatMode::NCHW> {
286 287 288 289 290 291 292
public:
    constexpr static size_t BUNDLE_PADDING_INDEX = 0;
    constexpr static size_t BUNDLE_PACKA_INDEX = 1;
    constexpr static size_t THREAD_BUNDLE_PACKB_INDEX = 0;
    constexpr static size_t THREAD_BUNDLE_IM2COL_INDEX = 1;
    constexpr static size_t THREAD_BUNDLE_BIAS_INDEX = 2;

293
    Strategy() = default;
294

295
    virtual void packA_kern(
296
            const WorkspaceBundle& bundle,
297 298 299 300
            const fallback::ConvBiasImpl::NCBKernParam& param,
            fallback::MatrixMulImpl::KernSizeParam matmulparam,
            const fallback::MatrixMulImpl::AlgoBase* matmul_algo,
            const fallback::ConvBiasImpl::NCBKernIndex& ncb_index,
301 302
            const fallback::MatrixMulImpl::AlgoBase::MatmulDescription&
                    matmul_desc,
303
            const StrategyParam& sparam) override;
304
    virtual void exec_im2col(
305
            const WorkspaceBundle& bundle, const WorkspaceBundle& bundle_thread,
306
            const StrategyParam& sparam,
307
            const fallback::ConvBiasImpl::NCBKernParam& param,
308
            fallback::MatrixMulImpl::KernParam matmul_param,
309
            const fallback::MatrixMulImpl::AlgoBase* matmul_algo) override;
310

311 312 313 314 315 316 317 318
    void exec_matmul(const fallback::ConvBiasImpl::NCBKernParam& param,
                     const StrategyParam& sparam, const WorkspaceBundle& bundle,
                     const WorkspaceBundle& bundle_thread,
                     fallback::MatrixMulImpl::KernParam matmul_param,
                     const fallback::MatrixMulImpl::AlgoBase* matmul_algo,
                     const fallback::ConvBiasImpl::NCBKernIndex& ncb_index,
                     const fallback::MatrixMulImpl::AlgoBase::MatmulDescription&
                             matmul_desc) override;
319 320
    void exec_postprocess(const fallback::ConvBiasImpl::NCBKernParam& param,
                          const StrategyParam& sparam,
321
                          const WorkspaceBundle& bundle_thread) override {
322 323 324 325 326
        do_postprocess<bias_ctype, dst_ctype, op_ctype, op_dtype,
                       postprocess_mode>(param, sparam, bundle_thread,
                                         THREAD_BUNDLE_IM2COL_INDEX,
                                         THREAD_BUNDLE_BIAS_INDEX);
    }
327 328 329 330 331 332 333 334 335 336

    void* get_matmul_dst_ptr(const fallback::ConvBiasImpl::NCBKernParam& param,
                             const WorkspaceBundle& bundle_thread,
                             const StrategyParam& sparam);
};

template <typename src_ctype, typename bias_ctype, typename dst_ctype,
          typename op_ctype, typename op_dtype,
          megdnn::PostprocessMode postprocess_mode>
class Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
337 338
               postprocess_mode, PackMode::DEFAULT, FormatMode::NCHW44>
        : public Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
339
                          postprocess_mode, PackMode::DEFAULT> {
340 341 342 343 344 345 346 347 348
public:
    const size_t BUNDLE_PADDING_INDEX = 0;
    const size_t BUNDLE_PACKA_INDEX = 1;
    const size_t THREAD_BUNDLE_PACKB_INDEX = 0;
    const size_t THREAD_BUNDLE_IM2COL_INDEX = 1;
    const size_t THREAD_BUNDLE_BIAS_INDEX = 2;

    Strategy() = default;

349 350 351 352 353 354
    void exec_im2col(
            const WorkspaceBundle& bundle, const WorkspaceBundle& bundle_thread,
            const StrategyParam& sparam,
            const fallback::ConvBiasImpl::NCBKernParam& param,
            fallback::MatrixMulImpl::KernParam matmul_param,
            const fallback::MatrixMulImpl::AlgoBase* matmul_algo) override;
355 356 357 358 359 360
};

template <typename src_ctype, typename bias_ctype, typename dst_ctype,
          typename op_ctype, typename op_dtype,
          megdnn::PostprocessMode postprocess_mode>
class Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
361 362 363 364
               postprocess_mode, PackMode::NO_PACK>
        : public StrategyBridge<src_ctype, bias_ctype, dst_ctype, op_ctype,
                                op_dtype, postprocess_mode, PackMode::NO_PACK,
                                FormatMode::NCHW> {
365 366 367 368 369 370 371 372
public:
    constexpr static size_t BUNDLE_PADDING_INDEX = 0;
    constexpr static size_t BUNDLE_PACKA_INDEX = 1;
    constexpr static size_t THREAD_BUNDLE_IM2COL_INDEX = 0;
    constexpr static size_t THREAD_BUNDLE_MATMULDST_INDEX = 1;
    constexpr static size_t THREAD_BUNDLE_BIAS_INDEX = 2;
    constexpr static size_t THREAD_BUNDLE_MATCOMP_INDEX = 3;

373
    Strategy() = default;
374

375
    void packA_kern(
376
            const WorkspaceBundle& bundle,
377 378 379 380 381
            const fallback::ConvBiasImpl::NCBKernParam& param,
            fallback::MatrixMulImpl::KernSizeParam matmulparam,
            const fallback::MatrixMulImpl::AlgoBase* matmul_algo,
            const fallback::ConvBiasImpl::NCBKernIndex& ncb_index,
            const fallback::MatrixMulImpl::AlgoBase::MatmulDescription& MDsec,
382
            const StrategyParam& sparam) override;
383

384 385 386 387 388 389 390 391
    void exec_matmul(const fallback::ConvBiasImpl::NCBKernParam& param,
                     const StrategyParam& sparam, const WorkspaceBundle& bundle,
                     const WorkspaceBundle& bundle_thread,
                     fallback::MatrixMulImpl::KernParam matmul_param,
                     const fallback::MatrixMulImpl::AlgoBase* matmul_algo,
                     const fallback::ConvBiasImpl::NCBKernIndex& ncb_index,
                     const fallback::MatrixMulImpl::AlgoBase::MatmulDescription&
                             matmul_desc) override;
392 393 394 395 396

    void* get_matmul_dst_ptr(const fallback::ConvBiasImpl::NCBKernParam& param,
                             const WorkspaceBundle& bundle_thread,
                             const StrategyParam& sparam);

397 398 399 400 401 402
    void exec_im2col(
            const WorkspaceBundle& bundle, const WorkspaceBundle& bundle_thread,
            const StrategyParam& sparam,
            const fallback::ConvBiasImpl::NCBKernParam& param,
            fallback::MatrixMulImpl::KernParam matmul_param,
            const fallback::MatrixMulImpl::AlgoBase* matmul_algo) override;
403 404
    void exec_postprocess(const fallback::ConvBiasImpl::NCBKernParam& param,
                          const StrategyParam& sparam,
405
                          const WorkspaceBundle& bundle_thread) override {
406 407 408 409 410
        do_postprocess<bias_ctype, dst_ctype, op_ctype, op_dtype,
                       postprocess_mode>(param, sparam, bundle_thread,
                                         THREAD_BUNDLE_MATMULDST_INDEX,
                                         THREAD_BUNDLE_BIAS_INDEX);
    }
411 412 413 414 415 416
};

template <typename src_ctype, typename bias_ctype, typename dst_ctype,
          typename op_ctype, typename op_dtype,
          megdnn::PostprocessMode postprocess_mode>
class Strategy<src_ctype, bias_ctype, dst_ctype, op_ctype, op_dtype,
417 418 419 420
               postprocess_mode, PackMode::ONLY_PACKA>
        : public StrategyBridge<src_ctype, bias_ctype, dst_ctype, op_ctype,
                                op_dtype, postprocess_mode,
                                PackMode::ONLY_PACKA,FormatMode::NCHW> {
421 422 423 424 425 426 427 428
public:
    constexpr static size_t BUNDLE_PADDING_INDEX = 0;
    constexpr static size_t BUNDLE_PACKA_INDEX = 1;
    constexpr static size_t THREAD_BUNDLE_PACKB_INDEX = 0;
    constexpr static size_t THREAD_BUNDLE_IM2COL_INDEX = 1;
    constexpr static size_t THREAD_BUNDLE_MATMULDST_INDEX = 2;
    constexpr static size_t THREAD_BUNDLE_BIAS_INDEX = 3;

429
    Strategy() = default;
430

431
    void packA_kern(
432
            const WorkspaceBundle& bundle,
433 434 435 436 437
            const fallback::ConvBiasImpl::NCBKernParam& param,
            fallback::MatrixMulImpl::KernSizeParam matmulparam,
            const fallback::MatrixMulImpl::AlgoBase* matmul_algo,
            const fallback::ConvBiasImpl::NCBKernIndex& ncb_index,
            const fallback::MatrixMulImpl::AlgoBase::MatmulDescription& MDsec,
438
            const StrategyParam& sparam) override;
439

440 441 442
    void exec_im2col(
            const WorkspaceBundle& bundle, const WorkspaceBundle& bundle_thread,
            const StrategyParam& sparam,
443 444
            const fallback::ConvBiasImpl::NCBKernParam& param,
            fallback::MatrixMulImpl::KernParam matmul_param,
445 446 447 448 449 450 451 452 453 454
            const fallback::MatrixMulImpl::AlgoBase* matmul_algo) override;

    void exec_matmul(const fallback::ConvBiasImpl::NCBKernParam& param,
                     const StrategyParam& sparam, const WorkspaceBundle& bundle,
                     const WorkspaceBundle& bundle_thread,
                     fallback::MatrixMulImpl::KernParam matmul_param,
                     const fallback::MatrixMulImpl::AlgoBase* matmul_algo,
                     const fallback::ConvBiasImpl::NCBKernIndex& ncb_index,
                     const fallback::MatrixMulImpl::AlgoBase::MatmulDescription&
                             matmul_desc) override;
455 456 457 458

    void* get_matmul_dst_ptr(const fallback::ConvBiasImpl::NCBKernParam& param,
                             const WorkspaceBundle& bundle_thread,
                             const StrategyParam& sparam);
459

460 461
    void exec_postprocess(const fallback::ConvBiasImpl::NCBKernParam& param,
                          const StrategyParam& sparam,
462
                          const WorkspaceBundle& bundle_thread) override {
463 464 465 466 467
        do_postprocess<bias_ctype, dst_ctype, op_ctype, op_dtype,
                       postprocess_mode>(param, sparam, bundle_thread,
                                         THREAD_BUNDLE_MATMULDST_INDEX,
                                         THREAD_BUNDLE_BIAS_INDEX);
    }
468
};
469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484
#if MEGDNN_AARCH64
template <typename op_ctype, typename op_dtype,
          megdnn::PostprocessMode postprocess_mode>
class StrategyFuse4x4x16Nchw44
        : public Strategy<dt_int8, dt_int32, dt_int8, op_ctype, op_dtype,
                          postprocess_mode, PackMode::DEFAULT,
                          FormatMode::NCHW44> {
public:
    StrategyFuse4x4x16Nchw44() = default;

    constexpr static size_t BUNDLE_PADDING_INDEX = 0;
    constexpr static size_t BUNDLE_PACKA_INDEX = 1;
    constexpr static size_t THREAD_BUNDLE_PACKB_INDEX = 0;
    constexpr static size_t THREAD_BUNDLE_IM2COL_INDEX = 1;
    constexpr static size_t THREAD_BUNDLE_BIAS_INDEX = 2;

485 486 487 488 489 490
    void exec_im2col(
            const WorkspaceBundle& bundle, const WorkspaceBundle& bundle_thread,
            const StrategyParam& sparam,
            const fallback::ConvBiasImpl::NCBKernParam& param,
            fallback::MatrixMulImpl::KernParam matmul_param,
            const fallback::MatrixMulImpl::AlgoBase* matmul_algo) override;
491 492 493 494
};

template <typename op_ctype, typename op_dtype,
          megdnn::PostprocessMode postprocess_mode>
495 496
class StrategyFuse8x12x4Nchw44Dot
        : public Strategy<dt_int8, dt_int32, dt_int8, op_ctype, op_dtype,
497 498 499
                          postprocess_mode, PackMode::DEFAULT,
                          FormatMode::NCHW44> {
public:
500
    StrategyFuse8x12x4Nchw44Dot() = default;
501 502 503 504 505 506 507

    constexpr static size_t BUNDLE_PADDING_INDEX = 0;
    constexpr static size_t BUNDLE_PACKA_INDEX = 1;
    constexpr static size_t THREAD_BUNDLE_PACKB_INDEX = 0;
    constexpr static size_t THREAD_BUNDLE_IM2COL_INDEX = 1;
    constexpr static size_t THREAD_BUNDLE_BIAS_INDEX = 2;

508 509 510 511 512 513
    void exec_im2col(
            const WorkspaceBundle& bundle, const WorkspaceBundle& bundle_thread,
            const StrategyParam& sparam,
            const fallback::ConvBiasImpl::NCBKernParam& param,
            fallback::MatrixMulImpl::KernParam matmul_param,
            const fallback::MatrixMulImpl::AlgoBase* matmul_algo) override;
514
};
515
#else
516 517
template <typename op_ctype, typename op_dtype,
          megdnn::PostprocessMode postprocess_mode>
518
class StrategyFuse8x4x4Nchw44DotK3x3S2
519 520 521 522
        : public Strategy<dt_int8, dt_int32, dt_int8, op_ctype, op_dtype,
                          postprocess_mode, PackMode::DEFAULT,
                          FormatMode::NCHW44> {
public:
523
    StrategyFuse8x4x4Nchw44DotK3x3S2() = default;
524 525 526 527 528 529 530

    constexpr static size_t BUNDLE_PADDING_INDEX = 0;
    constexpr static size_t BUNDLE_PACKA_INDEX = 1;
    constexpr static size_t THREAD_BUNDLE_PACKB_INDEX = 0;
    constexpr static size_t THREAD_BUNDLE_IM2COL_INDEX = 1;
    constexpr static size_t THREAD_BUNDLE_BIAS_INDEX = 2;

531 532 533 534 535 536
    void exec_im2col(
            const WorkspaceBundle& bundle, const WorkspaceBundle& bundle_thread,
            const StrategyParam& sparam,
            const fallback::ConvBiasImpl::NCBKernParam& param,
            fallback::MatrixMulImpl::KernParam matmul_param,
            const fallback::MatrixMulImpl::AlgoBase* matmul_algo) override;
537 538 539
};
#endif

540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563
#if MEGDNN_AARCH64 || MEGDNN_ARMV7
template <typename op_ctype, typename op_dtype,
          megdnn::PostprocessMode postprocess_mode>
class StrategyFuseXx12x1Nchw44K3x3S2
        : public Strategy<float, float, float, op_ctype, op_dtype,
                          postprocess_mode, PackMode::DEFAULT,
                          FormatMode::NCHW44> {
public:
    StrategyFuseXx12x1Nchw44K3x3S2() = default;

    constexpr static size_t BUNDLE_PADDING_INDEX = 0;
    constexpr static size_t BUNDLE_PACKA_INDEX = 1;
    constexpr static size_t THREAD_BUNDLE_PACKB_INDEX = 0;
    constexpr static size_t THREAD_BUNDLE_IM2COL_INDEX = 1;
    constexpr static size_t THREAD_BUNDLE_BIAS_INDEX = 2;

    void exec_im2col(
            const WorkspaceBundle& bundle, const WorkspaceBundle& bundle_thread,
            const StrategyParam& sparam,
            const fallback::ConvBiasImpl::NCBKernParam& param,
            fallback::MatrixMulImpl::KernParam matmul_param,
            const fallback::MatrixMulImpl::AlgoBase* matmul_algo) override;
};
#endif
564
}  // namespace megdnn
565 566

// vim: syntax=cpp.doxygen