opr_impl.h 8.0 KB
Newer Older
1 2 3 4
/**
 * \file dnn/src/cuda/convolution/opr_impl.h
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 *
5
 * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
6 7 8
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
9 10
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.
11 12 13 14
 */
#pragma once

#include "megdnn/oprs/nn.h"
15
#include "src/common/utils.h"
16 17 18 19

namespace megdnn {
namespace cuda {

20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
class ConvolutionForwardImpl : public ConvolutionForward {
public:
    using ConvolutionForward::ConvolutionForward;
    void exec(_megdnn_tensor_in src, _megdnn_tensor_in filter,
              _megdnn_tensor_out dst,
              const PreprocessedFilter* preprocessed_filter,
              _megdnn_workspace workspace) override;

    size_t get_workspace_in_bytes(
            const TensorLayout& src, const TensorLayout& filter,
            const TensorLayout& dst,
            const PreprocessedFilter* preprocessed_filter) override;
    const char* get_algorithm_set_name() const override;

    SmallVector<TensorLayout> deduce_preprocessed_filter_layout(
            const TensorLayout&, const TensorLayout&,
            const TensorLayout&) override {
        return {};
    }
    size_t get_preprocess_workspace_in_bytes(const TensorLayout&,
                                             const TensorLayout&,
                                             const TensorLayout&) override {
        return 0;
    }
    void exec_preprocess(const TensorLayout&, _megdnn_tensor_in,
                         const TensorLayout&, PreprocessedFilter*,
                         _megdnn_workspace) override {
        megdnn_throw("cuda exec_preprocess has not implemeted yet");
    }

    Algorithm* get_algorithm_from_desc(const AlgorithmDesc& desc) override;

    class AlgoBase;
    class AlgoDefault;
    class AlgoPack;

    static const AlgoPack& algo_pack() { return sm_algo_pack; }

protected:
    std::vector<Algorithm*> get_all_algorithms(
            const TensorLayout& src, const TensorLayout& filter,
            const TensorLayout& dst) override;
    Algorithm* get_algorithm_heuristic(const TensorLayout& src,
                                       const TensorLayout& filter,
                                       const TensorLayout& dst,
                                       size_t workspace_limit_in_bytes,
                                       bool reproducible) override;

private:
    static AlgoPack sm_algo_pack;
70 71
};

72 73 74 75 76 77 78 79 80 81 82 83 84
class ConvolutionBackwardDataImpl : public ConvolutionBackwardData {
public:
    using ConvolutionBackwardData::ConvolutionBackwardData;
    void exec(_megdnn_tensor_in filter, _megdnn_tensor_in diff,
              _megdnn_tensor_out grad, _megdnn_workspace workspace) override;
    AlgorithmInfo get_algorithm_info_heuristic(
            const TensorLayout& filter, const CanonizedFilterMeta& filter_meta,
            const TensorLayout& diff, const TensorLayout& grad,
            size_t workspace_limit_in_bytes, bool reproducible) {
        return get_algorithm_heuristic(filter, filter_meta, diff, grad,
                                       workspace_limit_in_bytes, reproducible)
                ->info();
    }
85 86 87 88 89 90 91 92 93 94 95

    AlgorithmInfo get_algorithm_info_heuristic(const TensorLayout& filter,
                                               const TensorLayout& diff,
                                               const TensorLayout& grad,
                                               size_t workspace_limit_in_bytes,
                                               bool reproducible) {
        return get_algorithm_heuristic(filter, diff, grad,
                                       workspace_limit_in_bytes, reproducible)
                ->info();
    }

96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
    size_t get_workspace_in_bytes(const TensorLayout& filter,
                                  const TensorLayout& diff,
                                  const TensorLayout& grad) override;
    const char* get_algorithm_set_name() const override;

    class AlgoBase;
    class AlgoCUDNN;
    class AlgoMatmul;
    class AlgoChanwise;
    class AlgoChanwiseSmall;
    class AlgoGroupConvGeneral;
    class AlgoBFloat16;

    class AlgoPack;

    static const AlgoPack& algo_pack() { return sm_algo_pack; }

113
    Algorithm* get_algorithm_from_desc(const AlgorithmDesc& desc) override;
114 115 116 117 118 119 120 121 122 123

protected:
    std::vector<Algorithm*> get_all_algorithms(
            const TensorLayout& filter, const TensorLayout& diff,
            const TensorLayout& grad) override;
    Algorithm* get_algorithm_heuristic(const TensorLayout& filter,
                                       const TensorLayout& diff,
                                       const TensorLayout& grad,
                                       size_t workspace_limit_in_bytes,
                                       bool reproducible) override;
124

125 126 127 128 129 130 131 132 133 134
private:
    Algorithm* get_algorithm_heuristic(const TensorLayout& filter,
                                       const CanonizedFilterMeta& filter_meta,
                                       const TensorLayout& diff,
                                       const TensorLayout& grad,
                                       size_t workspace_limit_in_bytes,
                                       bool reproducible);

    static AlgoPack sm_algo_pack;
};
135

136 137 138 139 140 141 142 143
class ConvolutionBackwardFilterImpl : public ConvolutionBackwardFilter {
public:
    using ConvolutionBackwardFilter::ConvolutionBackwardFilter;
    void exec(_megdnn_tensor_in src, _megdnn_tensor_in diff,
              _megdnn_tensor_out grad, _megdnn_workspace workspace) override;
    size_t get_workspace_in_bytes(const TensorLayout& src,
                                  const TensorLayout& diff,
                                  const TensorLayout& grad) override;
144 145 146 147
    AlgorithmInfo get_algorithm_info_heuristic(
            const TensorLayout& src, const TensorLayout& diff,
            const TensorLayout& grad, const CanonizedFilterMeta& grad_meta,
            size_t workspace_limit_in_bytes, bool reproducible) {
148 149 150 151 152
        return get_algorithm_heuristic(src, diff, grad, grad_meta,
                                       workspace_limit_in_bytes, reproducible)
                ->info();
    }

153 154 155 156 157 158 159 160 161 162
    AlgorithmInfo get_algorithm_info_heuristic(const TensorLayout& filter,
                                               const TensorLayout& diff,
                                               const TensorLayout& grad,
                                               size_t workspace_limit_in_bytes,
                                               bool reproducible) {
        return get_algorithm_heuristic(filter, diff, grad,
                                       workspace_limit_in_bytes, reproducible)
                ->info();
    }

163 164 165 166 167 168 169 170 171 172 173 174 175
    const char* get_algorithm_set_name() const override;

    class AlgoBase;
    class AlgoCUDNN;
    class AlgoMatmul;
    class AlgoChanwise;
    class AlgoGroupConvGeneral;
    class AlgoBFloat16;

    class AlgoPack;

    static const AlgoPack& algo_pack() { return sm_algo_pack; }

176
    Algorithm* get_algorithm_from_desc(const AlgorithmDesc& desc) override;
177 178 179 180 181 182 183 184 185 186

protected:
    std::vector<Algorithm*> get_all_algorithms(
            const TensorLayout& src, const TensorLayout& diff,
            const TensorLayout& grad) override;
    Algorithm* get_algorithm_heuristic(const TensorLayout& src,
                                       const TensorLayout& diff,
                                       const TensorLayout& grad,
                                       size_t workspace_limit_in_bytes,
                                       bool reproducible) override;
187

188 189 190 191 192 193 194 195 196
private:
    Algorithm* get_algorithm_heuristic(const TensorLayout& src,
                                       const TensorLayout& diff,
                                       const TensorLayout& grad,
                                       const CanonizedFilterMeta& grad_meta,
                                       size_t workspace_limit_in_bytes,
                                       bool reproducible);

    static AlgoPack sm_algo_pack;
197 198
};

199 200
}  // namespace cuda
}  // namespace megdnn
201 202

// vim: syntax=cpp.doxygen