opr_impl.h 1.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
#pragma once
#include "megdnn/oprs.h"
#include "src/cuda/matrix_mul/cublasLt_wrapper.h"
namespace megdnn {
namespace cuda {

class BatchedMatrixMulForwardImpl : public BatchedMatrixMulForward {
public:
    using BatchedMatrixMulForward::BatchedMatrixMulForward;
    BatchedMatrixMulForwardImpl(Handle* handle) : BatchedMatrixMul(handle) {}

    class AlgoBase;
M
Megvii Engine Team 已提交
13
    class AlgoNaive;
14 15 16 17 18 19 20 21
    class AlgoBruteForce;
    class AlgoCublas;
#if CUDA_VERSION >= 10010
    class AlgoCublasLt;
#endif
    class AlgoInt8x8x32;
    class AlgoPack;

M
Megvii Engine Team 已提交
22 23 24 25 26 27
    void exec(
            _megdnn_tensor_in A, _megdnn_tensor_in B, _megdnn_tensor_out C,
            _megdnn_workspace workspace) override;
    size_t get_workspace_in_bytes(
            const TensorLayout& A, const TensorLayout& B,
            const TensorLayout& C) override;
28

M
Megvii Engine Team 已提交
29
    const char* get_algorithm_set_name() const override { return "BATCHED_MATMUL"; }
30 31 32

    bool is_thread_safe() const override { return true; }
    static const AlgoPack& algo_pack() { return sm_algo_pack; }
33
    Algorithm* get_algorithm_from_desc(const AlgorithmDesc& desc) override;
34 35

protected:
M
Megvii Engine Team 已提交
36 37 38 39 40 41
    std::vector<Algorithm*> get_all_algorithms(
            const TensorLayout& A, const TensorLayout& B,
            const TensorLayout& C) override;
    std::vector<Algorithm*> get_all_algorithms_safe(
            const TensorLayout& A, const TensorLayout& B,
            const TensorLayout& C) override;
42 43 44 45
    Algorithm* get_algorithm_heuristic(
            const TensorLayout& A, const TensorLayout& B, const TensorLayout& C,
            size_t workspace_limit_in_bytes, const AlgoAttribute& positive_attr,
            const AlgoAttribute& negative_attr) override;
46 47 48 49 50 51 52 53 54

private:
    static AlgoPack sm_algo_pack;
};

}  // namespace cuda
}  // namespace megdnn

// vim: syntax=cpp.doxygen