#pragma once #include "../elemwise/opr_impl.h" #include "megdnn/oprs.h" #include "src/cuda/cudnn_with_check.h" namespace megdnn { namespace cuda { class ConvBiasForwardImpl : public ConvBiasForward { public: using ConvBiasForward::ConvBiasForward; void exec( _megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_tensor_in bias, _megdnn_tensor_in z, _megdnn_tensor_out dst, const PreprocessedFilter* preprocessed_filter, _megdnn_workspace workspace) override; size_t get_workspace_in_bytes( const TensorLayout&, const TensorLayout&, const TensorLayout&, const TensorLayout&, const TensorLayout&, const PreprocessedFilter*) override; size_t get_preprocess_workspace_in_bytes( const TensorLayout&, const TensorLayout&, const TensorLayout&, const TensorLayout&, const TensorLayout&) override; SmallVector deduce_preprocessed_filter_layout( const TensorLayout&, const TensorLayout&, const TensorLayout&, const TensorLayout&, const TensorLayout&) override; void exec_preprocess( const TensorLayout&, _megdnn_tensor_in, _megdnn_tensor_in, const TensorLayout&, const TensorLayout&, PreprocessedFilter*, _megdnn_workspace) override; const char* get_algorithm_set_name() const override; class AlgoBase; class AlgoChanwise; class AlgoChanwiseSmall; class AlgoDepthwiseLargeFilter; class AlgoChanwise8x8x32; class AlgoFallbackNCHWQS8; class AlgoInplaceMatmul; class AlgoMatmul; class AlgoMatmul8x8x32; class Algo1x1; class AlgoBatchedMatmul; class AlgoGroupConvGeneral; class AlgoQUInt4x4x32WMMA; class AlgoCutlassConvolutionBase; class AlgoInt8CHWN4DotProdImplicitGemm; class AlgoInt8NCHW4DotProdImplicitGemm; class AlgoInt8CHWN4IMMAImplicitGemm; class AlgoInt8NCHW4IMMAImplicitGemm; class AlgoInt8CHWN4IMMAImplicitGemmReorderFilter; class AlgoInt8CHWN4IMMAImplicitGemmUnrollWidth; class AlgoInt8NCHW32IMMAImplicitGemm; class AlgoInt8NHWCIMMAImplicitGemm; class AlgoInt4NCHW64IMMAImplicitGemmBase; class AlgoInt4Int4NCHW64IMMAImplicitGemm; class AlgoUInt4Int4NCHW64IMMAImplicitGemm; class AlgoInt4NHWCIMMAImplicitGemmBase; class AlgoInt4Int4NHWCIMMAImplicitGemm; class AlgoUInt4Int4NHWCIMMAImplicitGemm; class AlgoBFloat16; class AlgoSimpleInt1; // The following algorithms are suitable for channel wise convolution class AlgoFloat32NCHWFMAImplicitBatchedGemm; class AlgoFloat16NCHWHMMAImplicitBatchedGemm; class AlgoCUDNNConvBase; class AlgoCUDNNConv; class AlgoCUDNNConvBiasActivationBase; class AlgoCUDNNConvBiasActivation; #if CUDNN_VERSION >= 8020 class AlgoCUDNNConvV8; class AlgoCUDNNConvBiasActivationV8; #endif class AlgoPack; static const AlgoPack& algo_pack() { return sm_algo_pack; } Algorithm* get_algorithm_from_desc(const AlgorithmDesc& desc) override; std::vector get_all_algorithms( const TensorLayout& src, const TensorLayout& filter, const TensorLayout& bias, const TensorLayout& z, const TensorLayout& dst) override; std::vector get_all_algorithms_safe( const TensorLayout& src, const TensorLayout& filter, const TensorLayout& bias, const TensorLayout& z, const TensorLayout& dst) override; Algorithm* get_algorithm_heuristic( const TensorLayout& src, const TensorLayout& filter, const TensorLayout& bias, const TensorLayout& z, const TensorLayout& dst, size_t workspace_limit_in_bytes, const AlgoAttribute& positive_attr, const AlgoAttribute& negative_attr) override; private: static AlgoPack sm_algo_pack; }; } // namespace cuda } // namespace megdnn // vim: syntax=cpp.doxygen