opr_impl.h 3.9 KB
Newer Older
1 2 3
#pragma once
#include "../elemwise/opr_impl.h"
#include "megdnn/oprs.h"
4
#include "src/cuda/cudnn_with_check.h"
5 6 7 8 9 10 11

namespace megdnn {
namespace cuda {

class ConvBiasForwardImpl : public ConvBiasForward {
public:
    using ConvBiasForward::ConvBiasForward;
M
Megvii Engine Team 已提交
12 13 14 15 16 17 18 19 20
    void exec(
            _megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_tensor_in bias,
            _megdnn_tensor_in z, _megdnn_tensor_out dst,
            const PreprocessedFilter* preprocessed_filter,
            _megdnn_workspace workspace) override;
    size_t get_workspace_in_bytes(
            const TensorLayout&, const TensorLayout&, const TensorLayout&,
            const TensorLayout&, const TensorLayout&,
            const PreprocessedFilter*) override;
21

M
Megvii Engine Team 已提交
22 23 24
    size_t get_preprocess_workspace_in_bytes(
            const TensorLayout&, const TensorLayout&, const TensorLayout&,
            const TensorLayout&, const TensorLayout&) override;
25 26
    SmallVector<TensorLayout> deduce_preprocessed_filter_layout(
            const TensorLayout&, const TensorLayout&, const TensorLayout&,
M
Megvii Engine Team 已提交
27
            const TensorLayout&, const TensorLayout&) override;
M
Megvii Engine Team 已提交
28 29 30 31
    void exec_preprocess(
            const TensorLayout&, _megdnn_tensor_in, _megdnn_tensor_in,
            const TensorLayout&, const TensorLayout&, PreprocessedFilter*,
            _megdnn_workspace) override;
32 33 34 35 36
    const char* get_algorithm_set_name() const override;

    class AlgoBase;
    class AlgoChanwise;
    class AlgoChanwiseSmall;
37
    class AlgoDepthwiseLargeFilter;
38
    class AlgoChanwise8x8x32;
39
    class AlgoFallbackNCHWQS8;
40 41 42 43 44 45 46
    class AlgoInplaceMatmul;
    class AlgoMatmul;
    class AlgoMatmul8x8x32;
    class Algo1x1;
    class AlgoBatchedMatmul;
    class AlgoGroupConvGeneral;
    class AlgoQUInt4x4x32WMMA;
47
    class AlgoCutlassConvolutionBase;
48 49 50 51 52 53
    class AlgoInt8CHWN4DotProdImplicitGemm;
    class AlgoInt8NCHW4DotProdImplicitGemm;
    class AlgoInt8CHWN4IMMAImplicitGemm;
    class AlgoInt8NCHW4IMMAImplicitGemm;
    class AlgoInt8CHWN4IMMAImplicitGemmReorderFilter;
    class AlgoInt8CHWN4IMMAImplicitGemmUnrollWidth;
54
    class AlgoInt8NCHW32IMMAImplicitGemm;
55
    class AlgoInt8NHWCIMMAImplicitGemm;
56
    class AlgoInt4NCHW64IMMAImplicitGemmBase;
57
    class AlgoInt4Int4NCHW64IMMAImplicitGemm;
58
    class AlgoUInt4Int4NCHW64IMMAImplicitGemm;
59 60 61
    class AlgoInt4NHWCIMMAImplicitGemmBase;
    class AlgoInt4Int4NHWCIMMAImplicitGemm;
    class AlgoUInt4Int4NHWCIMMAImplicitGemm;
62
    class AlgoBFloat16;
63
    class AlgoSimpleInt1;
64 65 66
    // The following algorithms are suitable for channel wise convolution
    class AlgoFloat32NCHWFMAImplicitBatchedGemm;
    class AlgoFloat16NCHWHMMAImplicitBatchedGemm;
67
    class AlgoCUDNNConvBase;
68
    class AlgoCUDNNConv;
69
    class AlgoCUDNNConvBiasActivationBase;
70 71
    class AlgoCUDNNConvBiasActivation;
#if CUDNN_VERSION >= 8020
72 73 74
    class AlgoCUDNNConvV8;
    class AlgoCUDNNConvBiasActivationV8;
#endif
75 76 77 78 79

    class AlgoPack;

    static const AlgoPack& algo_pack() { return sm_algo_pack; }

80
    Algorithm* get_algorithm_from_desc(const AlgorithmDesc& desc) override;
81 82 83 84 85

    std::vector<Algorithm*> get_all_algorithms(
            const TensorLayout& src, const TensorLayout& filter,
            const TensorLayout& bias, const TensorLayout& z,
            const TensorLayout& dst) override;
86 87 88 89
    std::vector<Algorithm*> get_all_algorithms_safe(
            const TensorLayout& src, const TensorLayout& filter,
            const TensorLayout& bias, const TensorLayout& z,
            const TensorLayout& dst) override;
90 91
    Algorithm* get_algorithm_heuristic(
            const TensorLayout& src, const TensorLayout& filter,
M
Megvii Engine Team 已提交
92 93
            const TensorLayout& bias, const TensorLayout& z, const TensorLayout& dst,
            size_t workspace_limit_in_bytes, const AlgoAttribute& positive_attr,
94
            const AlgoAttribute& negative_attr) override;
95

96 97 98 99 100 101
private:
    static AlgoPack sm_algo_pack;
};

}  // namespace cuda
}  // namespace megdnn
102

103
// vim: syntax=cpp.doxygen