algos.h 8.8 KB
Newer Older
1 2 3 4 5 6 7 8
/**
 * \file dnn/src/arm_common/conv_bias/fp32/algos.h
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 *
 * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
9 10
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 */

#pragma once

#include "src/arm_common/conv_bias/opr_impl.h"
#include "src/fallback/matrix_mul/opr_impl.h"

namespace megdnn {
namespace arm_common {
class ConvBiasImpl::AlgoFP32WinogradF23_4x4 final : public AlgoBase {
public:
    AlgoFP32WinogradF23_4x4(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
                            uint32_t tile_size)
            : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
    const char* name() const override {
        if (m_name.empty()) {
            m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
                    m_matmul_algo->name(), {4, 2, m_tile_size});
        }
        return m_name.c_str();
    }
32
    MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
33 34 35 36 37 38 39 40 41 42 43 44 45 46
};

class ConvBiasImpl::AlgoFP32WinogradF63 final : public AlgoBase {
public:
    AlgoFP32WinogradF63(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
                        uint32_t tile_size)
            : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
    const char* name() const override {
        if (m_name.empty()) {
            m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
                    m_matmul_algo->name(), {1, 6, m_tile_size});
        }
        return m_name.c_str();
    }
47
    MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
48 49 50 51 52 53 54 55 56 57 58 59 60 61
};

class ConvBiasImpl::AlgoFP32WinogradF63_4x4 final : public AlgoBase {
public:
    AlgoFP32WinogradF63_4x4(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
                            uint32_t tile_size)
            : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
    const char* name() const override {
        if (m_name.empty()) {
            m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
                    m_matmul_algo->name(), {4, 6, m_tile_size});
        }
        return m_name.c_str();
    }
62
    MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
63 64 65 66 67 68 69 70 71 72 73 74 75 76
};

class ConvBiasImpl::AlgoFP32WinogradF54 final : public AlgoBase {
public:
    AlgoFP32WinogradF54(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
                        uint32_t tile_size)
            : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
    const char* name() const override {
        if (m_name.empty()) {
            m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
                    m_matmul_algo->name(), {1, 5, m_tile_size});
        }
        return m_name.c_str();
    }
77
    MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
78 79 80 81 82 83 84 85 86 87 88 89 90 91
};

class ConvBiasImpl::AlgoFP32WinogradF45 final : public AlgoBase {
public:
    AlgoFP32WinogradF45(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
                        uint32_t tile_size)
            : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
    const char* name() const override {
        if (m_name.empty()) {
            m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
                    m_matmul_algo->name(), {1, 4, m_tile_size});
        }
        return m_name.c_str();
    }
92
    MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
93 94
};

95 96 97 98 99 100 101 102 103 104 105 106 107 108
//===================== NCHW44 Winograd Support =====================//
class ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44 final : public AlgoBase {
public:
    AlgoFP32WinogradF23_4x4_NCHW44(
            fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size)
            : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
    const char* name() const override {
        if (m_name.empty()) {
            m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
                    m_matmul_algo->name(), {4, 2, m_tile_size},
                    param::ConvBias::Format::NCHW44);
        }
        return m_name.c_str();
    }
109
    MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
};

class ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44 final : public AlgoBase {
public:
    AlgoFP32WinogradF63_4x4_NCHW44(
            fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size)
            : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
    const char* name() const override {
        if (m_name.empty()) {
            m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
                    m_matmul_algo->name(), {4, 6, m_tile_size},
                    param::ConvBias::Format::NCHW44);
        }
        return m_name.c_str();
    }
125
    MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
126 127 128
};
// ================================================================= //

129 130 131 132 133 134 135 136 137 138 139
class ConvBiasImpl::AlgoF32Direct final : public AlgoBase {
    SmallVector<NCBKern> get_kimpls(const NCBKernSizeParam& param) const;
    bool m_large_group;

public:
    AlgoF32Direct(bool is_large_group) : m_large_group{is_large_group} {}
    bool is_reproducible() const override { return true; }
    const char* name() const override {
        return m_large_group ? "F32DIRECT_LARGE_GROUP"
                             : "F32DIRECT_SMALL_GROUP";
    }
140
    bool usable(const NCBKernSizeParam& param,
141 142
                AlgoSelectionStrategy algo_selection_strategy) const override;

143
    size_t get_workspace(const NCBKernSizeParam& param) const override;
144 145 146 147
    virtual SmallVector<NCBKern> dispatch_kerns(
            const NCBKernSizeParam& param) const override;
};

148
class ConvBiasImpl::AlgoF32DirectStride1 final : public AlgoBase {
149
    SmallVector<NCBKern> get_kimpls(const NCBKernSizeParam& param) const;
150
    bool m_large_group;
151 152

public:
153
    AlgoF32DirectStride1(bool is_large_group) : m_large_group{is_large_group} {}
154
    bool is_reproducible() const override { return true; }
155 156 157
    const char* name() const override {
        return m_large_group ? "F32STRD1_LARGE_GROUP" : "F32STRD1_SMALL_GROUP";
    }
158
    bool usable(const NCBKernSizeParam& param,
159 160
                AlgoSelectionStrategy algo_selection_strategy) const override;

161
    size_t get_workspace(const NCBKernSizeParam& param) const override;
162 163 164
    virtual SmallVector<NCBKern> dispatch_kerns(
            const NCBKernSizeParam& param) const override;
};
165 166

class ConvBiasImpl::AlgoF32DirectStride2 final : public AlgoBase {
167
    SmallVector<NCBKern> get_kimpls(const NCBKernSizeParam& param) const;
168
    bool m_large_group;
169 170

public:
171
    AlgoF32DirectStride2(bool is_large_group) : m_large_group{is_large_group} {}
172
    bool is_reproducible() const override { return true; }
173 174 175
    const char* name() const override {
        return m_large_group ? "F32STRD2_LARGE_GROUP" : "F32STRD2_SMALL_GROUP";
    }
176
    bool usable(const NCBKernSizeParam& param,
177 178
                AlgoSelectionStrategy algo_selection_strategy) const override;

179
    size_t get_workspace(const NCBKernSizeParam& param) const override;
180 181 182 183
    virtual SmallVector<NCBKern> dispatch_kerns(
            const NCBKernSizeParam& param) const override;
};

184
class ConvBiasImpl::AlgoF32DirectNCHW44 final : public AlgoBase {
185 186 187
    SmallVector<NCBKern> get_kimpls(const NCBKernSizeParam& param) const;

public:
188
    AlgoF32DirectNCHW44() {}
189
    bool is_reproducible() const override { return true; }
190
    const char* name() const override { return "F32_CONV_NCHW44_DIRECT"; }
191
    bool usable(const NCBKernSizeParam& param,
192 193
                AlgoSelectionStrategy algo_selection_strategy) const override;

194
    size_t get_workspace(const NCBKernSizeParam& param) const override;
195 196 197 198
    virtual SmallVector<NCBKern> dispatch_kerns(
            const NCBKernSizeParam& param) const override;
};

199
class ConvBiasImpl::AlgoF32DirectNCHWNCHW44 final : public AlgoBase {
200 201 202
    SmallVector<NCBKern> get_kimpls(const NCBKernSizeParam& param) const;

public:
203
    AlgoF32DirectNCHWNCHW44() {}
204
    bool is_reproducible() const override { return true; }
205
    const char* name() const override { return "F32_CONV_NCHW_NCHW44"; }
206
    bool usable(const NCBKernSizeParam& param,
207 208
                AlgoSelectionStrategy algo_selection_strategy) const override;

209
    size_t get_workspace(const NCBKernSizeParam& param) const override;
210 211 212
    virtual SmallVector<NCBKern> dispatch_kerns(
            const NCBKernSizeParam& param) const override;
};
213

214
class ConvBiasImpl::AlgoF32ChannelWiseNCHW44 final : public AlgoBase {
215 216 217 218
    SmallVector<NCBKern> get_kimpls(const NCBKernSizeParam& param) const;

public:
    bool is_reproducible() const override { return true; }
219
    const char* name() const override { return "F32_CHANNEL_WISE_NCHW44"; }
220
    bool usable(const NCBKernSizeParam& param,
221 222
                AlgoSelectionStrategy algo_selection_strategy) const override;

223
    size_t get_workspace(const NCBKernSizeParam& param) const override;
224 225 226 227
    virtual SmallVector<NCBKern> dispatch_kerns(
            const NCBKernSizeParam& param) const override;
};

228 229 230
}  // namespace arm_common
}  // namespace megdnn

231 232
#undef MEGDNN_WINOGRAD_ALGO_FUN_DECLARE

233
// vim: syntax=cpp.doxygen