opr_impl.h 6.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
/**
 * \file dnn/src/cuda/convolution/opr_impl.h
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 *
 * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
#pragma once

#include "megdnn/oprs/nn.h"
14
#include "src/common/utils.h"
15 16 17 18 19 20 21

namespace megdnn {
namespace cuda {

class ConvolutionForwardImpl: public ConvolutionForward {
    public:
        using ConvolutionForward::ConvolutionForward;
22 23 24 25 26
        void exec(_megdnn_tensor_in src, _megdnn_tensor_in filter,
                  _megdnn_tensor_out dst,
                  const PreprocessedFilter* preprocessed_filter,
                  _megdnn_workspace workspace) override;

27 28 29 30 31 32 33 34
        std::vector<Algorithm *> get_all_algorithms(const TensorLayout &src,
                const TensorLayout &filter,
                const TensorLayout &dst) override;
        Algorithm* get_algorithm_heuristic(const TensorLayout& src,
                                           const TensorLayout& filter,
                                           const TensorLayout& dst,
                                           size_t workspace_limit_in_bytes,
                                           bool reproducible) override;
35 36 37 38
        size_t get_workspace_in_bytes(
                const TensorLayout& src, const TensorLayout& filter,
                const TensorLayout& dst,
                const PreprocessedFilter* preprocessed_filter) override;
39 40
        const char* get_algorithm_set_name() const override;

41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
        SmallVector<TensorLayout> deduce_preprocessed_filter_layout(
                const TensorLayout&, const TensorLayout&,
                const TensorLayout&) override {
            return {};
        }
        size_t get_preprocess_workspace_in_bytes(
                const TensorLayout& , const TensorLayout& ,
                const TensorLayout& ) override{
            return 0;
        }
        void exec_preprocess(const TensorLayout&, _megdnn_tensor_in,
                             const TensorLayout&, PreprocessedFilter*,
                             _megdnn_workspace) override {
            megdnn_throw("cuda exec_preprocess has not implemeted yet");
        }

57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
    protected:
        struct ConvBiasExtraData{
            std::unique_ptr<ConvBiasForward> convbias_opr;
            TensorLayout bias_layout;
            TensorLayout z_layout;
        };
    private:
        ConvBiasExtraData conv_bias_extra_data(const TensorLayout&);
};

class ConvolutionBackwardDataImpl: public ConvolutionBackwardData {
    public:
        using ConvolutionBackwardData::ConvolutionBackwardData;
        void exec(_megdnn_tensor_in filter,
                _megdnn_tensor_in diff,
                _megdnn_tensor_out grad,
                _megdnn_workspace workspace) override;
        std::vector<Algorithm *> get_all_algorithms(const TensorLayout &filter,
                const TensorLayout &diff,
                const TensorLayout &grad) override;
        Algorithm* get_algorithm_heuristic(const TensorLayout& filter,
                                           const TensorLayout& diff,
                                           const TensorLayout& grad,
                                           size_t workspace_limit_in_bytes,
                                           bool reproducible) override;
82 83 84 85 86
        Algorithm* get_algorithm_heuristic(
                const TensorLayout& filter,
                const CanonizedFilterMeta& filter_meta,
                const TensorLayout& diff, const TensorLayout& grad,
                size_t workspace_limit_in_bytes, bool reproducible);
87 88 89 90 91 92 93 94 95 96 97
        size_t get_workspace_in_bytes(const TensorLayout& filter,
                                      const TensorLayout& diff,
                                      const TensorLayout& grad) override;
        const char* get_algorithm_set_name() const override;

        class AlgoBase;
        class AlgoCUDNN;
        class AlgoMatmul;
        class AlgoChanwise;
        class AlgoChanwiseSmall;
        class AlgoGroupConvGeneral;
98
        class AlgoBFloat16;
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126

        class AlgoPack;

        static const AlgoPack& algo_pack() {
            return sm_algo_pack;
        }

    private:
        static AlgoPack sm_algo_pack;
};

class ConvolutionBackwardFilterImpl: public ConvolutionBackwardFilter {
    public:
        using ConvolutionBackwardFilter::ConvolutionBackwardFilter;
        void exec(_megdnn_tensor_in src,
                _megdnn_tensor_in diff,
                _megdnn_tensor_out grad,
                _megdnn_workspace workspace) override;
        std::vector<Algorithm *> get_all_algorithms(const TensorLayout &src,
                const TensorLayout &diff,
                const TensorLayout &grad) override;
        Algorithm* get_algorithm_heuristic(const TensorLayout& src,
                                           const TensorLayout& diff,
                                           const TensorLayout& grad,
                                           size_t workspace_limit_in_bytes,
                                           bool reproducible) override;
        Algorithm* get_algorithm_heuristic(const TensorLayout& src,
                                           const TensorLayout& diff,
127 128
                                           const TensorLayout& gradk,
                                           const CanonizedFilterMeta& grad_meta,
129 130 131 132 133 134 135 136 137 138 139 140
                                           size_t workspace_limit_in_bytes,
                                           bool reproducible);
        size_t get_workspace_in_bytes(const TensorLayout& src,
                                      const TensorLayout& diff,
                                      const TensorLayout& grad) override;
        const char* get_algorithm_set_name() const override;

        class AlgoBase;
        class AlgoCUDNN;
        class AlgoMatmul;
        class AlgoChanwise;
        class AlgoGroupConvGeneral;
141
        class AlgoBFloat16;
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156

        class AlgoPack;

        static const AlgoPack& algo_pack() {
            return sm_algo_pack;
        }

    private:
        static AlgoPack sm_algo_pack;
};

} // namespace cuda
} // namespace megdnn

// vim: syntax=cpp.doxygen