From 7b17c1180eeab72528e5e09877e2b1d801d038e7 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Tue, 28 Jun 2022 15:37:49 +0800 Subject: [PATCH] refactor(dnn): make cudnn_frontend work GitOrigin-RevId: f089f934945790f1e01659b0a25a4615b87b7db2 --- dnn/CMakeLists.txt | 5 +- dnn/include/megdnn/algorithm_cache.h | 2 +- dnn/src/cuda/conv_bias/algo.cpp | 2 +- dnn/src/cuda/conv_bias/algo.h | 14 +- dnn/src/cuda/conv_bias/cudnn_conv_base.cpp | 11 - .../cudnn_conv_bias_activation_base.cpp | 252 +++++++++--------- .../cudnn_conv_bias_activation_v8.cpp | 13 +- dnn/src/cuda/conv_bias/cudnn_conv_v8.cpp | 13 +- dnn/src/cuda/conv_bias/helper.cpp | 5 +- dnn/src/cuda/conv_bias/helper.h | 5 +- dnn/src/cuda/conv_bias/opr_impl.cpp | 2 +- dnn/src/cuda/conv_bias/opr_impl.h | 6 +- dnn/src/cuda/cudnn_wrapper_v8.cpp | 20 +- dnn/src/cuda/cudnn_wrapper_v8.h | 12 +- dnn/src/cuda/handle.cpp | 15 -- dnn/test/cuda/conv_v8.cpp | 12 +- 16 files changed, 163 insertions(+), 226 deletions(-) diff --git a/dnn/CMakeLists.txt b/dnn/CMakeLists.txt index d0f652414..2704508d3 100644 --- a/dnn/CMakeLists.txt +++ b/dnn/CMakeLists.txt @@ -54,7 +54,10 @@ if(MGE_WITH_CUDA) add_library(cutlass INTERFACE) target_include_directories( cutlass - INTERFACE $) + INTERFACE + $ + $) + add_library(cudnn-frontend INTERFACE) target_include_directories( cudnn-frontend diff --git a/dnn/include/megdnn/algorithm_cache.h b/dnn/include/megdnn/algorithm_cache.h index cb3008439..08744452b 100644 --- a/dnn/include/megdnn/algorithm_cache.h +++ b/dnn/include/megdnn/algorithm_cache.h @@ -31,7 +31,7 @@ public: } }; - class Key { + struct Key { Handle* m_handle; uint32_t m_opr_type; const TensorLayout* m_inp_layouts_ptr; diff --git a/dnn/src/cuda/conv_bias/algo.cpp b/dnn/src/cuda/conv_bias/algo.cpp index df1485e74..117a87506 100644 --- a/dnn/src/cuda/conv_bias/algo.cpp +++ b/dnn/src/cuda/conv_bias/algo.cpp @@ -15,7 +15,7 @@ ConvBiasForwardImpl::AlgoPack::AlgoPack() { non_cudnn_algos.push_back(&batched_matmul); non_cudnn_algos.push_back(&int1_simple); -#if CUDNN_VERSION > 8004 +#if CUDNN_VERSION >= 8020 all_algos.push_back(&cudnn_conv_v8); all_algos.push_back(&cudnn_conv_bias_activation_v8); #endif diff --git a/dnn/src/cuda/conv_bias/algo.h b/dnn/src/cuda/conv_bias/algo.h index c9b237c3b..f57ed2196 100644 --- a/dnn/src/cuda/conv_bias/algo.h +++ b/dnn/src/cuda/conv_bias/algo.h @@ -173,10 +173,10 @@ public: bool is_cudnn() const override { return true; } - size_t get_preprocess_workspace_in_bytes(const SizeArgs& args) const override; - SmallVector deduce_preprocessed_filter_layout( - const SizeArgs& args) const override; - void exec_preprocess(const ExecArgs& args) const override; + // size_t get_preprocess_workspace_in_bytes(const SizeArgs& args) const override; + // SmallVector deduce_preprocessed_filter_layout( + // const SizeArgs& args) const override; + // void exec_preprocess(const ExecArgs& args) const override; protected: virtual size_t cudnn_get_workspace_in_bytes(const SizeArgs& args) const = 0; @@ -237,7 +237,7 @@ private: CudnnAlgoPack::Attr m_attr; }; -#if CUDNN_VERSION > 8004 +#if CUDNN_VERSION >= 8020 class ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationV8 final : public AlgoCUDNNConvBiasActivationBase { public: @@ -414,7 +414,7 @@ private: CudnnAlgoPack::Attr m_attr; }; -#if CUDNN_VERSION > 8004 +#if CUDNN_VERSION >= 8020 class ConvBiasForwardImpl::AlgoCUDNNConvV8 final : public AlgoCUDNNConvBase { public: AlgoCUDNNConvV8() : AlgoCUDNNConvBase() { @@ -1247,7 +1247,7 @@ public: AlgoGroupConvGeneral group; AlgoBFloat16 bfloat16; AlgoSimpleInt1 int1_simple; -#if CUDNN_VERSION > 8004 +#if CUDNN_VERSION >= 8020 AlgoCUDNNConvV8 cudnn_conv_v8; AlgoCUDNNConvBiasActivationV8 cudnn_conv_bias_activation_v8; #endif diff --git a/dnn/src/cuda/conv_bias/cudnn_conv_base.cpp b/dnn/src/cuda/conv_bias/cudnn_conv_base.cpp index e1129347f..c16786f1a 100644 --- a/dnn/src/cuda/conv_bias/cudnn_conv_base.cpp +++ b/dnn/src/cuda/conv_bias/cudnn_conv_base.cpp @@ -1,14 +1,3 @@ -/** - * \file dnn/src/cuda/conv_bias/cudnn_conv_base.cpp - * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") - * - * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - */ - #include "src/common/conv_bias.h" #include "src/cuda/conv_bias/algo.h" #include "src/cuda/utils.h" diff --git a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation_base.cpp b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation_base.cpp index e3de63608..d317c9957 100644 --- a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation_base.cpp +++ b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation_base.cpp @@ -1,14 +1,3 @@ -/** - * \file dnn/src/cuda/conv_bias/cudnn_conv_bias_activation_base.cpp - * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") - * - * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - */ - #include "megdnn/oprs/general.h" #include "./algo.h" @@ -26,19 +15,21 @@ size_t ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::get_workspace_in_by const SizeArgs& args) const { auto workspace_size = cudnn_get_workspace_in_bytes(args); - auto&& param = args.opr->param(); - if (args.preprocessed_filter == nullptr) { - if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && - args.src_layout->dtype.category() != DTypeCategory::FLOAT) { - // cudnn require bias to be float when executing CONFIG_INT - // convert bias to float if bias is not float at first - workspace_size += sizeof(float) * args.bias_layout->span().dist_elem(); - } - if (param.format == param::ConvBias::Format::NCHW32) { - workspace_size += args.filter_layout->span().dist_byte() + - args.bias_layout->span().dist_byte(); - } + // if (args.preprocessed_filter == nullptr) { + if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && + args.src_layout->dtype.category() != DTypeCategory::FLOAT) { + // cudnn require bias to be float when executing CONFIG_INT + // convert bias to float if bias is not float at first + workspace_size += sizeof(float) * args.bias_layout->span().dist_elem(); } + // #if CUDNN_VERSION >= 7500 + // auto&& param = args.opr->param(); + // if (param.format == param::ConvBias::Format::NCHW32) { + // workspace_size += args.filter_layout->span().dist_byte() + + // args.bias_layout->span().dist_byte(); + // } + // #endif + // } return workspace_size; } @@ -56,55 +47,62 @@ void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec( TensorND filter_tensor; TensorND bias_tensor; - auto&& param = args.opr->param(); - if (args.preprocessed_filter != nullptr) { - bias_tensor = TensorND{ - args.bias_tensor->layout, - args.preprocessed_filter->tensors[0].raw_ptr()}; - if (param.format == Param::Format::NCHW32) { - megdnn_assert(args.preprocessed_filter->tensors.size() == 2); - filter_tensor = TensorND{ - args.filter_tensor->layout, - args.preprocessed_filter->tensors[1].raw_ptr()}; - } else { - filter_tensor = *args.filter_tensor; - } - } else { - if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && - args.src_layout->dtype.category() != DTypeCategory::FLOAT) { - auto cvt = args.handle->create_operator(); - auto float_bias_layout = *args.bias_layout; - auto converted_bias_layout = *args.bias_layout; - converted_bias_layout.dtype = dtype::QuantizedS32(alpha); - float_bias_layout.dtype = dtype::Float32(); - auto bias_size_in_bytes = float_bias_layout.span().dist_byte(); - megdnn_assert(args.workspace.size >= bias_size_in_bytes); - cvt->exec( - {args.bias_tensor->raw_ptr(), converted_bias_layout}, - TensorND{workspace_ptr, float_bias_layout}); - - bias_ptr = workspace_ptr; - workspace_ptr += bias_size_in_bytes; - workspace_size -= bias_size_in_bytes; - } - if (param.format == Param::Format::NCHW32) { - size_t reorder_workspace_size = - args.filter_tensor->layout.span().dist_byte() + - args.bias_tensor->layout.span().dist_byte(); - auto reorder_filter_ptr = workspace_ptr; - auto reorder_bias_ptr = - workspace_ptr + args.filter_tensor->layout.span().dist_byte(); - cudnn_reorder_filer_and_bias_nchw32( - cudnn_handle(args.opr->handle()), args.filter_tensor->raw_ptr(), - args.filter_meta, bias_ptr, reorder_filter_ptr, reorder_bias_ptr); - filter_tensor = TensorND(args.filter_tensor->layout, reorder_filter_ptr); - bias_ptr = reorder_bias_ptr; - workspace_ptr += reorder_workspace_size; - workspace_size -= reorder_workspace_size; - } else { - filter_tensor = *args.filter_tensor; - } + // if (args.preprocessed_filter != nullptr) { + // bias_tensor = TensorND{ + // args.bias_tensor->layout, + // args.preprocessed_filter->tensors[0].raw_ptr()}; + // // #if CUDNN_VERSION >= 7500 + // // auto&& param = args.opr->param(); + // // if (param.format == Param::Format::NCHW32) { + // // megdnn_assert(args.preprocessed_filter->tensors.size() == 2); + // // filter_tensor = TensorND{ + // // args.filter_tensor->layout, + // // args.preprocessed_filter->tensors[1].raw_ptr()}; + // // } + // // #else + // filter_tensor = *args.filter_tensor; + // // #endif + // } else { + if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && + args.src_layout->dtype.category() != DTypeCategory::FLOAT) { + auto cvt = args.handle->create_operator(); + auto float_bias_layout = *args.bias_layout; + auto converted_bias_layout = *args.bias_layout; + converted_bias_layout.dtype = dtype::QuantizedS32(alpha); + float_bias_layout.dtype = dtype::Float32(); + auto bias_size_in_bytes = float_bias_layout.span().dist_byte(); + megdnn_assert(args.workspace.size >= bias_size_in_bytes); + cvt->exec( + {args.bias_tensor->raw_ptr(), converted_bias_layout}, + TensorND{workspace_ptr, float_bias_layout}); + + bias_ptr = workspace_ptr; + workspace_ptr += bias_size_in_bytes; + workspace_size -= bias_size_in_bytes; } + // #if CUDNN_VERSION >= 7500 + // auto&& param = args.opr->param(); + // if (param.format == Param::Format::NCHW32) { + // size_t reorder_workspace_size = + // args.filter_tensor->layout.span().dist_byte() + + // args.bias_tensor->layout.span().dist_byte(); + // auto reorder_filter_ptr = workspace_ptr; + // auto reorder_bias_ptr = + // workspace_ptr + + // args.filter_tensor->layout.span().dist_byte(); + // cudnn_reorder_filter_and_bias_nchw32( + // cudnn_handle(args.opr->handle()), + // args.filter_tensor->raw_ptr(), args.filter_meta, + // bias_ptr, reorder_filter_ptr, reorder_bias_ptr); + // filter_tensor = TensorND(args.filter_tensor->layout, + // reorder_filter_ptr); bias_ptr = reorder_bias_ptr; workspace_ptr + // += reorder_workspace_size; workspace_size -= + // reorder_workspace_size; + // } + // #else + filter_tensor = *args.filter_tensor; + // #endif + // } bias_tensor = TensorND{args.bias_tensor->layout, bias_ptr}; ExecArgs exec_args{ @@ -153,58 +151,64 @@ void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec( } } -size_t ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase:: - get_preprocess_workspace_in_bytes(const SizeArgs& args) const { - auto&& param = args.opr->param(); - if (param.format == Param::Format::NCHW32) { - return args.bias_layout->span().dist_byte(); - } - return 0_z; -} - -SmallVector ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase:: - deduce_preprocessed_filter_layout(const SizeArgs& args) const { - auto&& param = args.opr->param(); - if (param.format == Param::Format::NCHW32) { - return {args.bias_layout->collapse_contiguous(), - args.filter_layout->collapse_contiguous()}; - } else { - return {args.bias_layout->collapse_contiguous()}; - } -} - -void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec_preprocess( - const ExecArgs& args) const { - float alpha, beta; - std::tie(alpha, beta) = cudnn_get_conv_bias_act_scale_param( - args.src_tensor->layout, args.dst_tensor->layout, - args.filter_tensor->layout, args.bias_tensor->layout, - args.z_tensor->layout); - MEGDNN_MARK_USED_VAR(beta); - - auto workspace_ptr = args.workspace.raw_ptr; - auto workspace_size = args.workspace.size; - auto bias_ptr = workspace_size > 0 ? workspace_ptr - : args.preprocessed_filter->tensors[0].raw_ptr(); - if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && - args.src_layout->dtype.category() != DTypeCategory::FLOAT) { - auto cvt = args.handle->create_operator(); - auto float_bias_layout = *args.bias_layout; - auto converted_bias_layout = *args.bias_layout; - converted_bias_layout.dtype = dtype::QuantizedS32(alpha); - float_bias_layout.dtype = dtype::Float32(); - - cvt->exec( - {args.bias_tensor->raw_ptr(), converted_bias_layout}, - TensorND{bias_ptr, float_bias_layout}); - } - if (args.opr->param().format == Param::Format::NCHW32) { - auto reorder_filter_ptr = args.preprocessed_filter->tensors[1].raw_ptr(); - auto reorder_bias_ptr = args.preprocessed_filter->tensors[0].raw_ptr(); - cudnn_reorder_filer_and_bias_nchw32( - cudnn_handle(args.opr->handle()), args.filter_tensor->raw_ptr(), - args.filter_meta, bias_ptr, reorder_filter_ptr, reorder_bias_ptr); - } -} +// size_t ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase:: +// get_preprocess_workspace_in_bytes(const SizeArgs&) const { +// #if CUDNN_VERSION >= 7500 +// auto&& param = args.opr->param(); +// if (param.format == Param::Format::NCHW32) { +// return args.bias_layout->span().dist_byte(); +// } +// #endif +// return 0_z; +// } + +// SmallVector ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase:: +// deduce_preprocessed_filter_layout(const SizeArgs& args) const { +// #if CUDNN_VERSION >= 7500 +// auto&& param = args.opr->param(); +// if (param.format == Param::Format::NCHW32) { +// return {args.bias_layout->collapse_contiguous(), +// args.filter_layout->collapse_contiguous()}; +// } +// #endif +// return {args.bias_layout->collapse_contiguous()}; +// } + +// void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec_preprocess( +// const ExecArgs& args) const { +// float alpha, beta; +// std::tie(alpha, beta) = cudnn_get_conv_bias_act_scale_param( +// args.src_tensor->layout, args.dst_tensor->layout, +// args.filter_tensor->layout, args.bias_tensor->layout, +// args.z_tensor->layout); +// MEGDNN_MARK_USED_VAR(beta); + +// auto workspace_ptr = args.workspace.raw_ptr; +// auto workspace_size = args.workspace.size; +// auto bias_ptr = workspace_size > 0 ? workspace_ptr +// : +// args.preprocessed_filter->tensors[0].raw_ptr(); +// if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && +// args.src_layout->dtype.category() != DTypeCategory::FLOAT) { +// auto cvt = args.handle->create_operator(); +// auto float_bias_layout = *args.bias_layout; +// auto converted_bias_layout = *args.bias_layout; +// converted_bias_layout.dtype = dtype::QuantizedS32(alpha); +// float_bias_layout.dtype = dtype::Float32(); + +// cvt->exec( +// {args.bias_tensor->raw_ptr(), converted_bias_layout}, +// TensorND{bias_ptr, float_bias_layout}); +// } +// #if CUDNN_VERSION >= 7500 +// if (args.opr->param().format == Param::Format::NCHW32) { +// auto reorder_filter_ptr = args.preprocessed_filter->tensors[1].raw_ptr(); +// auto reorder_bias_ptr = args.preprocessed_filter->tensors[0].raw_ptr(); +// cudnn_reorder_filter_and_bias_nchw32( +// cudnn_handle(args.opr->handle()), args.filter_tensor->raw_ptr(), +// args.filter_meta, bias_ptr, reorder_filter_ptr, reorder_bias_ptr); +// } +// #endif +// } // vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation_v8.cpp b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation_v8.cpp index b4ba892a5..2e1f8a4d4 100644 --- a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation_v8.cpp +++ b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation_v8.cpp @@ -1,14 +1,3 @@ -/** - * \file dnn/src/cuda/conv_bias/cudnn_conv_bias_activation_v8.cpp - * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") - * - * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - */ - #include "megdnn/oprs/general.h" #include "./algo.h" @@ -17,7 +6,7 @@ #include "src/cuda/cudnn_wrapper_v8.h" #include "src/cuda/utils.h" -#if CUDNN_VERSION >= 8004 +#if CUDNN_VERSION >= 8020 using namespace megdnn; using namespace cuda; using namespace conv_bias; diff --git a/dnn/src/cuda/conv_bias/cudnn_conv_v8.cpp b/dnn/src/cuda/conv_bias/cudnn_conv_v8.cpp index df41c7d82..5615cf968 100644 --- a/dnn/src/cuda/conv_bias/cudnn_conv_v8.cpp +++ b/dnn/src/cuda/conv_bias/cudnn_conv_v8.cpp @@ -1,20 +1,9 @@ -/** - * \file dnn/src/cuda/conv_bias/cudnn_conv_v8.cpp - * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") - * - * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - */ - #include "src/common/conv_bias.h" #include "src/cuda/conv_bias/algo.h" #include "src/cuda/cudnn_wrapper_v8.h" #include "src/cuda/utils.h" -#if CUDNN_VERSION >= 8004 +#if CUDNN_VERSION >= 8020 using namespace megdnn; using namespace cuda; using namespace conv_bias; diff --git a/dnn/src/cuda/conv_bias/helper.cpp b/dnn/src/cuda/conv_bias/helper.cpp index 1a9d529ef..057bbfbf3 100644 --- a/dnn/src/cuda/conv_bias/helper.cpp +++ b/dnn/src/cuda/conv_bias/helper.cpp @@ -239,7 +239,8 @@ std::pair cudnn_get_conv_bias_act_scale_param( return {alpha, beta}; } -void cudnn_reorder_filer_and_bias_nchw32( +#if CUDNN_VERSION >= 7500 +void cudnn_reorder_filter_and_bias_nchw32( const cudnnHandle_t& handle, const void* filter_ptr, const CanonizedFilterMeta& fm, const void* bias_ptr, void* reordered_filter_ptr, void* reordered_bias_ptr) { @@ -250,6 +251,8 @@ void cudnn_reorder_filer_and_bias_nchw32( handle, filter_desc.desc, CUDNN_DEFAULT_REORDER, filter_ptr, reordered_filter_ptr, reorder_bias, bias_ptr, reordered_bias_ptr)); } +#endif + } // namespace conv_bias } // namespace cuda } // namespace megdnn diff --git a/dnn/src/cuda/conv_bias/helper.h b/dnn/src/cuda/conv_bias/helper.h index 5a4b18b23..79464a272 100644 --- a/dnn/src/cuda/conv_bias/helper.h +++ b/dnn/src/cuda/conv_bias/helper.h @@ -117,11 +117,12 @@ std::pair cudnn_get_conv_bias_act_scale_param( const TensorLayout& x, const TensorLayout& y, const TensorLayout& w, const TensorLayout& b, const TensorLayout& z); -void cudnn_reorder_filer_and_bias_nchw32( +#if CUDNN_VERSION >= 7500 +void cudnn_reorder_filter_and_bias_nchw32( const cudnnHandle_t& handle, const void* filter_ptr, const CanonizedFilterMeta& fm, const void* bias_ptr, void* reordered_filter_ptr, void* reordered_bias_ptr); - +#endif } // namespace conv_bias } // namespace cuda } // namespace megdnn diff --git a/dnn/src/cuda/conv_bias/opr_impl.cpp b/dnn/src/cuda/conv_bias/opr_impl.cpp index 8f67ffeb8..00c9382db 100644 --- a/dnn/src/cuda/conv_bias/opr_impl.cpp +++ b/dnn/src/cuda/conv_bias/opr_impl.cpp @@ -47,7 +47,7 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic( const AlgoAttribute& positive_attr, const AlgoAttribute& negative_attr) { using namespace conv_bias; AlgoBase::SizeArgs args{this, src, filter, bias, z, dst}; -#if CUDNN_VERSION >= 8004 +#if CUDNN_VERSION >= 8020 if (sm_algo_pack.cudnn_conv_v8.is_available_attribute( args, positive_attr, negative_attr, workspace_limit_in_bytes)) { return &sm_algo_pack.cudnn_conv_v8; diff --git a/dnn/src/cuda/conv_bias/opr_impl.h b/dnn/src/cuda/conv_bias/opr_impl.h index 2bb5eaebd..7a7268a97 100644 --- a/dnn/src/cuda/conv_bias/opr_impl.h +++ b/dnn/src/cuda/conv_bias/opr_impl.h @@ -32,12 +32,10 @@ public: const char* get_algorithm_set_name() const override; class AlgoBase; - class AlgoCUDNNConvBiasActivation; class AlgoChanwise; class AlgoChanwiseSmall; class AlgoDepthwiseLargeFilter; class AlgoChanwise8x8x32; - class AlgoCUDNNConv; class AlgoFallbackNCHWQS8; class AlgoInplaceMatmul; class AlgoMatmul; @@ -67,8 +65,10 @@ public: class AlgoFloat32NCHWFMAImplicitBatchedGemm; class AlgoFloat16NCHWHMMAImplicitBatchedGemm; class AlgoCUDNNConvBase; + class AlgoCUDNNConv; class AlgoCUDNNConvBiasActivationBase; -#if CUDNN_VERSION > 8004 + class AlgoCUDNNConvBiasActivation; +#if CUDNN_VERSION >= 8020 class AlgoCUDNNConvV8; class AlgoCUDNNConvBiasActivationV8; #endif diff --git a/dnn/src/cuda/cudnn_wrapper_v8.cpp b/dnn/src/cuda/cudnn_wrapper_v8.cpp index 744eb8f79..671cf140f 100644 --- a/dnn/src/cuda/cudnn_wrapper_v8.cpp +++ b/dnn/src/cuda/cudnn_wrapper_v8.cpp @@ -1,13 +1,4 @@ -/** - * \file dnn/src/cuda/cudnn_wrapper_v8.cpp - * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") - * - * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - */ +#if CUDNN_VERSION >= 8020 #include "src/cuda/cudnn_wrapper_v8.h" #include "src/cuda/cudnn_wrapper.h" @@ -19,7 +10,7 @@ #include "cudnn_frontend_EngineConfigGenerator.h" -#include "megdnn/heuristic_cache.h" +#include "megdnn/algorithm_cache.h" using namespace megdnn; using namespace cuda; @@ -240,9 +231,9 @@ auto make_activation_descriptor( // high-level api for convolution execution struct StaticData { - using Key = megdnn::HeuristicCache::Key; - using KeyStorage = megdnn::HeuristicCache::KeyStorage; - using KeyHash = megdnn::HeuristicCache::Hash; + using Key = megdnn::AlgorithmCache::Key; + using KeyStorage = megdnn::AlgorithmCache::KeyStorage; + using KeyHash = megdnn::AlgorithmCache::Hash; using Result = cudnn_frontend::ExecutionPlan; using CudnnFrontendExecutionPlanCache = std::unordered_map; @@ -682,4 +673,5 @@ void megdnn::cuda::run_conv_bias_act_with_plan( handle, plan.get_raw_desc(), variant_pack.get_raw_desc())); } +#endif // vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/cudnn_wrapper_v8.h b/dnn/src/cuda/cudnn_wrapper_v8.h index f601ec073..b5de6da91 100644 --- a/dnn/src/cuda/cudnn_wrapper_v8.h +++ b/dnn/src/cuda/cudnn_wrapper_v8.h @@ -1,15 +1,6 @@ -/** - * \file dnn/src/cuda/cudnn_wrapper_v8.h - * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") - * - * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - */ #pragma once +#if CUDNN_VERSION >= 8020 #include "megdnn/basic_types.h" #include "megdnn/oprs/nn.h" #include "src/common/utils.h" @@ -67,4 +58,5 @@ void run_conv_bias_act_with_plan( } // namespace cuda } // namespace megdnn +#endif // vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/handle.cpp b/dnn/src/cuda/handle.cpp index f0e314eff..5cf5f71b6 100644 --- a/dnn/src/cuda/handle.cpp +++ b/dnn/src/cuda/handle.cpp @@ -58,11 +58,6 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t comp_handle) For example `export CUDA_CACHE_MAXSIZE=2147483647` and `export CUDA_CACHE_PATH=/data/.cuda_cache`)"); } #endif - size_t free, tot; - cudaMemGetInfo(&free, &tot); - printf("before cudnn create, free: %.2f MB, tot: %.2f MB, allocated: %.2f MB\n", - free / 1024.0 / 1024.0, tot / 1024.0 / 1024.0, - (tot - free) / 1024.0 / 1024.0); cudnn_check(cudnnCreate(&m_cudnn_handle)); cublas_check(cublasCreate(&m_cublas_handle)); #if CUDA_VERSION >= 10010 @@ -74,11 +69,6 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t comp_handle) cudnn_check(cudnnSetStream(m_cudnn_handle, stream())); cublas_check(cublasSetStream(m_cublas_handle, stream())); -#if CUDNN_VERSION >= 8004 -// cudnn_check(cudnnOpsInferVersionCheck()); -// cudnn_check(cudnnCnnInferVersionCheck()); -#endif - // Note that all cublas scalars (alpha, beta) and scalar results such as dot // output resides at device side. cublas_check(cublasSetPointerMode(m_cublas_handle, CUBLAS_POINTER_MODE_DEVICE)); @@ -92,11 +82,6 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t comp_handle) cudaMemcpyHostToDevice, stream())); cuda_check(cudaStreamSynchronize(stream())); - cudaMemGetInfo(&free, &tot); - printf("after cudnn create, free: %.2f MB, tot: %.2f MB, allocated: %.2f MB\n", - free / 1024.0 / 1024.0, tot / 1024.0 / 1024.0, - (tot - free) / 1024.0 / 1024.0); - // check tk1 m_is_tegra_k1 = (strcmp(m_device_prop->name, "GK20A") == 0); m_cusolver_handle = nullptr; diff --git a/dnn/test/cuda/conv_v8.cpp b/dnn/test/cuda/conv_v8.cpp index 407ec5468..dcec9d159 100644 --- a/dnn/test/cuda/conv_v8.cpp +++ b/dnn/test/cuda/conv_v8.cpp @@ -1,13 +1,3 @@ -/** - * \file dnn/test/cuda/conv_bias.cpp - * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") - * - * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - */ #include "megdnn/dtype.h" #include "test/cuda/fixture.h" @@ -26,7 +16,7 @@ using namespace megdnn; using namespace test; using namespace conv_bias; -#if CUDNN_VERSION >= 8004 +#if CUDNN_VERSION >= 8020 TEST_F(CUDA, CONV_V8_FLOAT) { Checker checker(handle_cuda()); checker.set_before_exec_callback( -- GitLab