提交 7b17c118 编写于 作者: M Megvii Engine Team

refactor(dnn): make cudnn_frontend work

GitOrigin-RevId: f089f934945790f1e01659b0a25a4615b87b7db2
上级 35e9cc98
...@@ -54,7 +54,10 @@ if(MGE_WITH_CUDA) ...@@ -54,7 +54,10 @@ if(MGE_WITH_CUDA)
add_library(cutlass INTERFACE) add_library(cutlass INTERFACE)
target_include_directories( target_include_directories(
cutlass cutlass
INTERFACE $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/cutlass/include>) INTERFACE
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/cutlass/include>
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/cutlass/tools/util/include>)
add_library(cudnn-frontend INTERFACE) add_library(cudnn-frontend INTERFACE)
target_include_directories( target_include_directories(
cudnn-frontend cudnn-frontend
......
...@@ -31,7 +31,7 @@ public: ...@@ -31,7 +31,7 @@ public:
} }
}; };
class Key { struct Key {
Handle* m_handle; Handle* m_handle;
uint32_t m_opr_type; uint32_t m_opr_type;
const TensorLayout* m_inp_layouts_ptr; const TensorLayout* m_inp_layouts_ptr;
......
...@@ -15,7 +15,7 @@ ConvBiasForwardImpl::AlgoPack::AlgoPack() { ...@@ -15,7 +15,7 @@ ConvBiasForwardImpl::AlgoPack::AlgoPack() {
non_cudnn_algos.push_back(&batched_matmul); non_cudnn_algos.push_back(&batched_matmul);
non_cudnn_algos.push_back(&int1_simple); non_cudnn_algos.push_back(&int1_simple);
#if CUDNN_VERSION > 8004 #if CUDNN_VERSION >= 8020
all_algos.push_back(&cudnn_conv_v8); all_algos.push_back(&cudnn_conv_v8);
all_algos.push_back(&cudnn_conv_bias_activation_v8); all_algos.push_back(&cudnn_conv_bias_activation_v8);
#endif #endif
......
...@@ -173,10 +173,10 @@ public: ...@@ -173,10 +173,10 @@ public:
bool is_cudnn() const override { return true; } bool is_cudnn() const override { return true; }
size_t get_preprocess_workspace_in_bytes(const SizeArgs& args) const override; // size_t get_preprocess_workspace_in_bytes(const SizeArgs& args) const override;
SmallVector<TensorLayout> deduce_preprocessed_filter_layout( // SmallVector<TensorLayout> deduce_preprocessed_filter_layout(
const SizeArgs& args) const override; // const SizeArgs& args) const override;
void exec_preprocess(const ExecArgs& args) const override; // void exec_preprocess(const ExecArgs& args) const override;
protected: protected:
virtual size_t cudnn_get_workspace_in_bytes(const SizeArgs& args) const = 0; virtual size_t cudnn_get_workspace_in_bytes(const SizeArgs& args) const = 0;
...@@ -237,7 +237,7 @@ private: ...@@ -237,7 +237,7 @@ private:
CudnnAlgoPack::Attr m_attr; CudnnAlgoPack::Attr m_attr;
}; };
#if CUDNN_VERSION > 8004 #if CUDNN_VERSION >= 8020
class ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationV8 final class ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationV8 final
: public AlgoCUDNNConvBiasActivationBase { : public AlgoCUDNNConvBiasActivationBase {
public: public:
...@@ -414,7 +414,7 @@ private: ...@@ -414,7 +414,7 @@ private:
CudnnAlgoPack::Attr m_attr; CudnnAlgoPack::Attr m_attr;
}; };
#if CUDNN_VERSION > 8004 #if CUDNN_VERSION >= 8020
class ConvBiasForwardImpl::AlgoCUDNNConvV8 final : public AlgoCUDNNConvBase { class ConvBiasForwardImpl::AlgoCUDNNConvV8 final : public AlgoCUDNNConvBase {
public: public:
AlgoCUDNNConvV8() : AlgoCUDNNConvBase() { AlgoCUDNNConvV8() : AlgoCUDNNConvBase() {
...@@ -1247,7 +1247,7 @@ public: ...@@ -1247,7 +1247,7 @@ public:
AlgoGroupConvGeneral group; AlgoGroupConvGeneral group;
AlgoBFloat16 bfloat16; AlgoBFloat16 bfloat16;
AlgoSimpleInt1 int1_simple; AlgoSimpleInt1 int1_simple;
#if CUDNN_VERSION > 8004 #if CUDNN_VERSION >= 8020
AlgoCUDNNConvV8 cudnn_conv_v8; AlgoCUDNNConvV8 cudnn_conv_v8;
AlgoCUDNNConvBiasActivationV8 cudnn_conv_bias_activation_v8; AlgoCUDNNConvBiasActivationV8 cudnn_conv_bias_activation_v8;
#endif #endif
......
/**
* \file dnn/src/cuda/conv_bias/cudnn_conv_base.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "src/common/conv_bias.h" #include "src/common/conv_bias.h"
#include "src/cuda/conv_bias/algo.h" #include "src/cuda/conv_bias/algo.h"
#include "src/cuda/utils.h" #include "src/cuda/utils.h"
......
/**
* \file dnn/src/cuda/conv_bias/cudnn_conv_bias_activation_base.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "megdnn/oprs/general.h" #include "megdnn/oprs/general.h"
#include "./algo.h" #include "./algo.h"
...@@ -26,19 +15,21 @@ size_t ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::get_workspace_in_by ...@@ -26,19 +15,21 @@ size_t ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::get_workspace_in_by
const SizeArgs& args) const { const SizeArgs& args) const {
auto workspace_size = cudnn_get_workspace_in_bytes(args); auto workspace_size = cudnn_get_workspace_in_bytes(args);
auto&& param = args.opr->param(); // if (args.preprocessed_filter == nullptr) {
if (args.preprocessed_filter == nullptr) { if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() &&
if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && args.src_layout->dtype.category() != DTypeCategory::FLOAT) {
args.src_layout->dtype.category() != DTypeCategory::FLOAT) { // cudnn require bias to be float when executing CONFIG_INT
// cudnn require bias to be float when executing CONFIG_INT // convert bias to float if bias is not float at first
// convert bias to float if bias is not float at first workspace_size += sizeof(float) * args.bias_layout->span().dist_elem();
workspace_size += sizeof(float) * args.bias_layout->span().dist_elem();
}
if (param.format == param::ConvBias::Format::NCHW32) {
workspace_size += args.filter_layout->span().dist_byte() +
args.bias_layout->span().dist_byte();
}
} }
// #if CUDNN_VERSION >= 7500
// auto&& param = args.opr->param();
// if (param.format == param::ConvBias::Format::NCHW32) {
// workspace_size += args.filter_layout->span().dist_byte() +
// args.bias_layout->span().dist_byte();
// }
// #endif
// }
return workspace_size; return workspace_size;
} }
...@@ -56,55 +47,62 @@ void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec( ...@@ -56,55 +47,62 @@ void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec(
TensorND filter_tensor; TensorND filter_tensor;
TensorND bias_tensor; TensorND bias_tensor;
auto&& param = args.opr->param(); // if (args.preprocessed_filter != nullptr) {
if (args.preprocessed_filter != nullptr) { // bias_tensor = TensorND{
bias_tensor = TensorND{ // args.bias_tensor->layout,
args.bias_tensor->layout, // args.preprocessed_filter->tensors[0].raw_ptr()};
args.preprocessed_filter->tensors[0].raw_ptr()}; // // #if CUDNN_VERSION >= 7500
if (param.format == Param::Format::NCHW32) { // // auto&& param = args.opr->param();
megdnn_assert(args.preprocessed_filter->tensors.size() == 2); // // if (param.format == Param::Format::NCHW32) {
filter_tensor = TensorND{ // // megdnn_assert(args.preprocessed_filter->tensors.size() == 2);
args.filter_tensor->layout, // // filter_tensor = TensorND{
args.preprocessed_filter->tensors[1].raw_ptr()}; // // args.filter_tensor->layout,
} else { // // args.preprocessed_filter->tensors[1].raw_ptr()};
filter_tensor = *args.filter_tensor; // // }
} // // #else
} else { // filter_tensor = *args.filter_tensor;
if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && // // #endif
args.src_layout->dtype.category() != DTypeCategory::FLOAT) { // } else {
auto cvt = args.handle->create_operator<TypeCvt>(); if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() &&
auto float_bias_layout = *args.bias_layout; args.src_layout->dtype.category() != DTypeCategory::FLOAT) {
auto converted_bias_layout = *args.bias_layout; auto cvt = args.handle->create_operator<TypeCvt>();
converted_bias_layout.dtype = dtype::QuantizedS32(alpha); auto float_bias_layout = *args.bias_layout;
float_bias_layout.dtype = dtype::Float32(); auto converted_bias_layout = *args.bias_layout;
auto bias_size_in_bytes = float_bias_layout.span().dist_byte(); converted_bias_layout.dtype = dtype::QuantizedS32(alpha);
megdnn_assert(args.workspace.size >= bias_size_in_bytes); float_bias_layout.dtype = dtype::Float32();
cvt->exec( auto bias_size_in_bytes = float_bias_layout.span().dist_byte();
{args.bias_tensor->raw_ptr(), converted_bias_layout}, megdnn_assert(args.workspace.size >= bias_size_in_bytes);
TensorND{workspace_ptr, float_bias_layout}); cvt->exec(
{args.bias_tensor->raw_ptr(), converted_bias_layout},
bias_ptr = workspace_ptr; TensorND{workspace_ptr, float_bias_layout});
workspace_ptr += bias_size_in_bytes;
workspace_size -= bias_size_in_bytes; bias_ptr = workspace_ptr;
} workspace_ptr += bias_size_in_bytes;
if (param.format == Param::Format::NCHW32) { workspace_size -= bias_size_in_bytes;
size_t reorder_workspace_size =
args.filter_tensor->layout.span().dist_byte() +
args.bias_tensor->layout.span().dist_byte();
auto reorder_filter_ptr = workspace_ptr;
auto reorder_bias_ptr =
workspace_ptr + args.filter_tensor->layout.span().dist_byte();
cudnn_reorder_filer_and_bias_nchw32(
cudnn_handle(args.opr->handle()), args.filter_tensor->raw_ptr(),
args.filter_meta, bias_ptr, reorder_filter_ptr, reorder_bias_ptr);
filter_tensor = TensorND(args.filter_tensor->layout, reorder_filter_ptr);
bias_ptr = reorder_bias_ptr;
workspace_ptr += reorder_workspace_size;
workspace_size -= reorder_workspace_size;
} else {
filter_tensor = *args.filter_tensor;
}
} }
// #if CUDNN_VERSION >= 7500
// auto&& param = args.opr->param();
// if (param.format == Param::Format::NCHW32) {
// size_t reorder_workspace_size =
// args.filter_tensor->layout.span().dist_byte() +
// args.bias_tensor->layout.span().dist_byte();
// auto reorder_filter_ptr = workspace_ptr;
// auto reorder_bias_ptr =
// workspace_ptr +
// args.filter_tensor->layout.span().dist_byte();
// cudnn_reorder_filter_and_bias_nchw32(
// cudnn_handle(args.opr->handle()),
// args.filter_tensor->raw_ptr(), args.filter_meta,
// bias_ptr, reorder_filter_ptr, reorder_bias_ptr);
// filter_tensor = TensorND(args.filter_tensor->layout,
// reorder_filter_ptr); bias_ptr = reorder_bias_ptr; workspace_ptr
// += reorder_workspace_size; workspace_size -=
// reorder_workspace_size;
// }
// #else
filter_tensor = *args.filter_tensor;
// #endif
// }
bias_tensor = TensorND{args.bias_tensor->layout, bias_ptr}; bias_tensor = TensorND{args.bias_tensor->layout, bias_ptr};
ExecArgs exec_args{ ExecArgs exec_args{
...@@ -153,58 +151,64 @@ void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec( ...@@ -153,58 +151,64 @@ void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec(
} }
} }
size_t ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase:: // size_t ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::
get_preprocess_workspace_in_bytes(const SizeArgs& args) const { // get_preprocess_workspace_in_bytes(const SizeArgs&) const {
auto&& param = args.opr->param(); // #if CUDNN_VERSION >= 7500
if (param.format == Param::Format::NCHW32) { // auto&& param = args.opr->param();
return args.bias_layout->span().dist_byte(); // if (param.format == Param::Format::NCHW32) {
} // return args.bias_layout->span().dist_byte();
return 0_z; // }
} // #endif
// return 0_z;
SmallVector<TensorLayout> ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase:: // }
deduce_preprocessed_filter_layout(const SizeArgs& args) const {
auto&& param = args.opr->param(); // SmallVector<TensorLayout> ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::
if (param.format == Param::Format::NCHW32) { // deduce_preprocessed_filter_layout(const SizeArgs& args) const {
return {args.bias_layout->collapse_contiguous(), // #if CUDNN_VERSION >= 7500
args.filter_layout->collapse_contiguous()}; // auto&& param = args.opr->param();
} else { // if (param.format == Param::Format::NCHW32) {
return {args.bias_layout->collapse_contiguous()}; // return {args.bias_layout->collapse_contiguous(),
} // args.filter_layout->collapse_contiguous()};
} // }
// #endif
void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec_preprocess( // return {args.bias_layout->collapse_contiguous()};
const ExecArgs& args) const { // }
float alpha, beta;
std::tie(alpha, beta) = cudnn_get_conv_bias_act_scale_param( // void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec_preprocess(
args.src_tensor->layout, args.dst_tensor->layout, // const ExecArgs& args) const {
args.filter_tensor->layout, args.bias_tensor->layout, // float alpha, beta;
args.z_tensor->layout); // std::tie(alpha, beta) = cudnn_get_conv_bias_act_scale_param(
MEGDNN_MARK_USED_VAR(beta); // args.src_tensor->layout, args.dst_tensor->layout,
// args.filter_tensor->layout, args.bias_tensor->layout,
auto workspace_ptr = args.workspace.raw_ptr; // args.z_tensor->layout);
auto workspace_size = args.workspace.size; // MEGDNN_MARK_USED_VAR(beta);
auto bias_ptr = workspace_size > 0 ? workspace_ptr
: args.preprocessed_filter->tensors[0].raw_ptr(); // auto workspace_ptr = args.workspace.raw_ptr;
if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && // auto workspace_size = args.workspace.size;
args.src_layout->dtype.category() != DTypeCategory::FLOAT) { // auto bias_ptr = workspace_size > 0 ? workspace_ptr
auto cvt = args.handle->create_operator<TypeCvt>(); // :
auto float_bias_layout = *args.bias_layout; // args.preprocessed_filter->tensors[0].raw_ptr();
auto converted_bias_layout = *args.bias_layout; // if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() &&
converted_bias_layout.dtype = dtype::QuantizedS32(alpha); // args.src_layout->dtype.category() != DTypeCategory::FLOAT) {
float_bias_layout.dtype = dtype::Float32(); // auto cvt = args.handle->create_operator<TypeCvt>();
// auto float_bias_layout = *args.bias_layout;
cvt->exec( // auto converted_bias_layout = *args.bias_layout;
{args.bias_tensor->raw_ptr(), converted_bias_layout}, // converted_bias_layout.dtype = dtype::QuantizedS32(alpha);
TensorND{bias_ptr, float_bias_layout}); // float_bias_layout.dtype = dtype::Float32();
}
if (args.opr->param().format == Param::Format::NCHW32) { // cvt->exec(
auto reorder_filter_ptr = args.preprocessed_filter->tensors[1].raw_ptr(); // {args.bias_tensor->raw_ptr(), converted_bias_layout},
auto reorder_bias_ptr = args.preprocessed_filter->tensors[0].raw_ptr(); // TensorND{bias_ptr, float_bias_layout});
cudnn_reorder_filer_and_bias_nchw32( // }
cudnn_handle(args.opr->handle()), args.filter_tensor->raw_ptr(), // #if CUDNN_VERSION >= 7500
args.filter_meta, bias_ptr, reorder_filter_ptr, reorder_bias_ptr); // if (args.opr->param().format == Param::Format::NCHW32) {
} // auto reorder_filter_ptr = args.preprocessed_filter->tensors[1].raw_ptr();
} // auto reorder_bias_ptr = args.preprocessed_filter->tensors[0].raw_ptr();
// cudnn_reorder_filter_and_bias_nchw32(
// cudnn_handle(args.opr->handle()), args.filter_tensor->raw_ptr(),
// args.filter_meta, bias_ptr, reorder_filter_ptr, reorder_bias_ptr);
// }
// #endif
// }
// vim: syntax=cpp.doxygen // vim: syntax=cpp.doxygen
/**
* \file dnn/src/cuda/conv_bias/cudnn_conv_bias_activation_v8.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "megdnn/oprs/general.h" #include "megdnn/oprs/general.h"
#include "./algo.h" #include "./algo.h"
...@@ -17,7 +6,7 @@ ...@@ -17,7 +6,7 @@
#include "src/cuda/cudnn_wrapper_v8.h" #include "src/cuda/cudnn_wrapper_v8.h"
#include "src/cuda/utils.h" #include "src/cuda/utils.h"
#if CUDNN_VERSION >= 8004 #if CUDNN_VERSION >= 8020
using namespace megdnn; using namespace megdnn;
using namespace cuda; using namespace cuda;
using namespace conv_bias; using namespace conv_bias;
......
/**
* \file dnn/src/cuda/conv_bias/cudnn_conv_v8.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "src/common/conv_bias.h" #include "src/common/conv_bias.h"
#include "src/cuda/conv_bias/algo.h" #include "src/cuda/conv_bias/algo.h"
#include "src/cuda/cudnn_wrapper_v8.h" #include "src/cuda/cudnn_wrapper_v8.h"
#include "src/cuda/utils.h" #include "src/cuda/utils.h"
#if CUDNN_VERSION >= 8004 #if CUDNN_VERSION >= 8020
using namespace megdnn; using namespace megdnn;
using namespace cuda; using namespace cuda;
using namespace conv_bias; using namespace conv_bias;
......
...@@ -239,7 +239,8 @@ std::pair<float, float> cudnn_get_conv_bias_act_scale_param( ...@@ -239,7 +239,8 @@ std::pair<float, float> cudnn_get_conv_bias_act_scale_param(
return {alpha, beta}; return {alpha, beta};
} }
void cudnn_reorder_filer_and_bias_nchw32( #if CUDNN_VERSION >= 7500
void cudnn_reorder_filter_and_bias_nchw32(
const cudnnHandle_t& handle, const void* filter_ptr, const cudnnHandle_t& handle, const void* filter_ptr,
const CanonizedFilterMeta& fm, const void* bias_ptr, void* reordered_filter_ptr, const CanonizedFilterMeta& fm, const void* bias_ptr, void* reordered_filter_ptr,
void* reordered_bias_ptr) { void* reordered_bias_ptr) {
...@@ -250,6 +251,8 @@ void cudnn_reorder_filer_and_bias_nchw32( ...@@ -250,6 +251,8 @@ void cudnn_reorder_filer_and_bias_nchw32(
handle, filter_desc.desc, CUDNN_DEFAULT_REORDER, filter_ptr, handle, filter_desc.desc, CUDNN_DEFAULT_REORDER, filter_ptr,
reordered_filter_ptr, reorder_bias, bias_ptr, reordered_bias_ptr)); reordered_filter_ptr, reorder_bias, bias_ptr, reordered_bias_ptr));
} }
#endif
} // namespace conv_bias } // namespace conv_bias
} // namespace cuda } // namespace cuda
} // namespace megdnn } // namespace megdnn
......
...@@ -117,11 +117,12 @@ std::pair<float, float> cudnn_get_conv_bias_act_scale_param( ...@@ -117,11 +117,12 @@ std::pair<float, float> cudnn_get_conv_bias_act_scale_param(
const TensorLayout& x, const TensorLayout& y, const TensorLayout& w, const TensorLayout& x, const TensorLayout& y, const TensorLayout& w,
const TensorLayout& b, const TensorLayout& z); const TensorLayout& b, const TensorLayout& z);
void cudnn_reorder_filer_and_bias_nchw32( #if CUDNN_VERSION >= 7500
void cudnn_reorder_filter_and_bias_nchw32(
const cudnnHandle_t& handle, const void* filter_ptr, const cudnnHandle_t& handle, const void* filter_ptr,
const CanonizedFilterMeta& fm, const void* bias_ptr, void* reordered_filter_ptr, const CanonizedFilterMeta& fm, const void* bias_ptr, void* reordered_filter_ptr,
void* reordered_bias_ptr); void* reordered_bias_ptr);
#endif
} // namespace conv_bias } // namespace conv_bias
} // namespace cuda } // namespace cuda
} // namespace megdnn } // namespace megdnn
......
...@@ -47,7 +47,7 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic( ...@@ -47,7 +47,7 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic(
const AlgoAttribute& positive_attr, const AlgoAttribute& negative_attr) { const AlgoAttribute& positive_attr, const AlgoAttribute& negative_attr) {
using namespace conv_bias; using namespace conv_bias;
AlgoBase::SizeArgs args{this, src, filter, bias, z, dst}; AlgoBase::SizeArgs args{this, src, filter, bias, z, dst};
#if CUDNN_VERSION >= 8004 #if CUDNN_VERSION >= 8020
if (sm_algo_pack.cudnn_conv_v8.is_available_attribute( if (sm_algo_pack.cudnn_conv_v8.is_available_attribute(
args, positive_attr, negative_attr, workspace_limit_in_bytes)) { args, positive_attr, negative_attr, workspace_limit_in_bytes)) {
return &sm_algo_pack.cudnn_conv_v8; return &sm_algo_pack.cudnn_conv_v8;
......
...@@ -32,12 +32,10 @@ public: ...@@ -32,12 +32,10 @@ public:
const char* get_algorithm_set_name() const override; const char* get_algorithm_set_name() const override;
class AlgoBase; class AlgoBase;
class AlgoCUDNNConvBiasActivation;
class AlgoChanwise; class AlgoChanwise;
class AlgoChanwiseSmall; class AlgoChanwiseSmall;
class AlgoDepthwiseLargeFilter; class AlgoDepthwiseLargeFilter;
class AlgoChanwise8x8x32; class AlgoChanwise8x8x32;
class AlgoCUDNNConv;
class AlgoFallbackNCHWQS8; class AlgoFallbackNCHWQS8;
class AlgoInplaceMatmul; class AlgoInplaceMatmul;
class AlgoMatmul; class AlgoMatmul;
...@@ -67,8 +65,10 @@ public: ...@@ -67,8 +65,10 @@ public:
class AlgoFloat32NCHWFMAImplicitBatchedGemm; class AlgoFloat32NCHWFMAImplicitBatchedGemm;
class AlgoFloat16NCHWHMMAImplicitBatchedGemm; class AlgoFloat16NCHWHMMAImplicitBatchedGemm;
class AlgoCUDNNConvBase; class AlgoCUDNNConvBase;
class AlgoCUDNNConv;
class AlgoCUDNNConvBiasActivationBase; class AlgoCUDNNConvBiasActivationBase;
#if CUDNN_VERSION > 8004 class AlgoCUDNNConvBiasActivation;
#if CUDNN_VERSION >= 8020
class AlgoCUDNNConvV8; class AlgoCUDNNConvV8;
class AlgoCUDNNConvBiasActivationV8; class AlgoCUDNNConvBiasActivationV8;
#endif #endif
......
/** #if CUDNN_VERSION >= 8020
* \file dnn/src/cuda/cudnn_wrapper_v8.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "src/cuda/cudnn_wrapper_v8.h" #include "src/cuda/cudnn_wrapper_v8.h"
#include "src/cuda/cudnn_wrapper.h" #include "src/cuda/cudnn_wrapper.h"
...@@ -19,7 +10,7 @@ ...@@ -19,7 +10,7 @@
#include "cudnn_frontend_EngineConfigGenerator.h" #include "cudnn_frontend_EngineConfigGenerator.h"
#include "megdnn/heuristic_cache.h" #include "megdnn/algorithm_cache.h"
using namespace megdnn; using namespace megdnn;
using namespace cuda; using namespace cuda;
...@@ -240,9 +231,9 @@ auto make_activation_descriptor( ...@@ -240,9 +231,9 @@ auto make_activation_descriptor(
// high-level api for convolution execution // high-level api for convolution execution
struct StaticData { struct StaticData {
using Key = megdnn::HeuristicCache::Key; using Key = megdnn::AlgorithmCache::Key;
using KeyStorage = megdnn::HeuristicCache::KeyStorage; using KeyStorage = megdnn::AlgorithmCache::KeyStorage;
using KeyHash = megdnn::HeuristicCache::Hash; using KeyHash = megdnn::AlgorithmCache::Hash;
using Result = cudnn_frontend::ExecutionPlan; using Result = cudnn_frontend::ExecutionPlan;
using CudnnFrontendExecutionPlanCache = using CudnnFrontendExecutionPlanCache =
std::unordered_map<KeyStorage, Result, KeyHash>; std::unordered_map<KeyStorage, Result, KeyHash>;
...@@ -682,4 +673,5 @@ void megdnn::cuda::run_conv_bias_act_with_plan( ...@@ -682,4 +673,5 @@ void megdnn::cuda::run_conv_bias_act_with_plan(
handle, plan.get_raw_desc(), variant_pack.get_raw_desc())); handle, plan.get_raw_desc(), variant_pack.get_raw_desc()));
} }
#endif
// vim: syntax=cpp.doxygen // vim: syntax=cpp.doxygen
/**
* \file dnn/src/cuda/cudnn_wrapper_v8.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once #pragma once
#if CUDNN_VERSION >= 8020
#include "megdnn/basic_types.h" #include "megdnn/basic_types.h"
#include "megdnn/oprs/nn.h" #include "megdnn/oprs/nn.h"
#include "src/common/utils.h" #include "src/common/utils.h"
...@@ -67,4 +58,5 @@ void run_conv_bias_act_with_plan( ...@@ -67,4 +58,5 @@ void run_conv_bias_act_with_plan(
} // namespace cuda } // namespace cuda
} // namespace megdnn } // namespace megdnn
#endif
// vim: syntax=cpp.doxygen // vim: syntax=cpp.doxygen
...@@ -58,11 +58,6 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t comp_handle) ...@@ -58,11 +58,6 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t comp_handle)
For example `export CUDA_CACHE_MAXSIZE=2147483647` and `export CUDA_CACHE_PATH=/data/.cuda_cache`)"); For example `export CUDA_CACHE_MAXSIZE=2147483647` and `export CUDA_CACHE_PATH=/data/.cuda_cache`)");
} }
#endif #endif
size_t free, tot;
cudaMemGetInfo(&free, &tot);
printf("before cudnn create, free: %.2f MB, tot: %.2f MB, allocated: %.2f MB\n",
free / 1024.0 / 1024.0, tot / 1024.0 / 1024.0,
(tot - free) / 1024.0 / 1024.0);
cudnn_check(cudnnCreate(&m_cudnn_handle)); cudnn_check(cudnnCreate(&m_cudnn_handle));
cublas_check(cublasCreate(&m_cublas_handle)); cublas_check(cublasCreate(&m_cublas_handle));
#if CUDA_VERSION >= 10010 #if CUDA_VERSION >= 10010
...@@ -74,11 +69,6 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t comp_handle) ...@@ -74,11 +69,6 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t comp_handle)
cudnn_check(cudnnSetStream(m_cudnn_handle, stream())); cudnn_check(cudnnSetStream(m_cudnn_handle, stream()));
cublas_check(cublasSetStream(m_cublas_handle, stream())); cublas_check(cublasSetStream(m_cublas_handle, stream()));
#if CUDNN_VERSION >= 8004
// cudnn_check(cudnnOpsInferVersionCheck());
// cudnn_check(cudnnCnnInferVersionCheck());
#endif
// Note that all cublas scalars (alpha, beta) and scalar results such as dot // Note that all cublas scalars (alpha, beta) and scalar results such as dot
// output resides at device side. // output resides at device side.
cublas_check(cublasSetPointerMode(m_cublas_handle, CUBLAS_POINTER_MODE_DEVICE)); cublas_check(cublasSetPointerMode(m_cublas_handle, CUBLAS_POINTER_MODE_DEVICE));
...@@ -92,11 +82,6 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t comp_handle) ...@@ -92,11 +82,6 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t comp_handle)
cudaMemcpyHostToDevice, stream())); cudaMemcpyHostToDevice, stream()));
cuda_check(cudaStreamSynchronize(stream())); cuda_check(cudaStreamSynchronize(stream()));
cudaMemGetInfo(&free, &tot);
printf("after cudnn create, free: %.2f MB, tot: %.2f MB, allocated: %.2f MB\n",
free / 1024.0 / 1024.0, tot / 1024.0 / 1024.0,
(tot - free) / 1024.0 / 1024.0);
// check tk1 // check tk1
m_is_tegra_k1 = (strcmp(m_device_prop->name, "GK20A") == 0); m_is_tegra_k1 = (strcmp(m_device_prop->name, "GK20A") == 0);
m_cusolver_handle = nullptr; m_cusolver_handle = nullptr;
......
/**
* \file dnn/test/cuda/conv_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "megdnn/dtype.h" #include "megdnn/dtype.h"
#include "test/cuda/fixture.h" #include "test/cuda/fixture.h"
...@@ -26,7 +16,7 @@ using namespace megdnn; ...@@ -26,7 +16,7 @@ using namespace megdnn;
using namespace test; using namespace test;
using namespace conv_bias; using namespace conv_bias;
#if CUDNN_VERSION >= 8004 #if CUDNN_VERSION >= 8020
TEST_F(CUDA, CONV_V8_FLOAT) { TEST_F(CUDA, CONV_V8_FLOAT) {
Checker<ConvBiasForward> checker(handle_cuda()); Checker<ConvBiasForward> checker(handle_cuda());
checker.set_before_exec_callback( checker.set_before_exec_callback(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册