From 039727f84858f9d451664db69f8ee313334fdae7 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Mon, 8 Mar 2021 20:58:54 +0800 Subject: [PATCH] fix(mgb/dnn): fix cudnn8.0.4 convbias with z GitOrigin-RevId: 09453d8a12f8773e1422f9f9b36bd2068b621ee8 --- src/gopt/impl/inference.cpp | 11 ++++++++++ src/gopt/test/inference.cpp | 43 +++++++++++++++++++++---------------- 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/src/gopt/impl/inference.cpp b/src/gopt/impl/inference.cpp index f9d14bc0..5a638efa 100644 --- a/src/gopt/impl/inference.cpp +++ b/src/gopt/impl/inference.cpp @@ -36,6 +36,9 @@ #if MGB_ENABLE_TENSOR_RT #include "megbrain/tensorrt/tensorrt_opr.h" #endif +#if MGB_CUDA +#include +#endif #include "megbrain/gopt/misc.h" @@ -1999,6 +2002,11 @@ void FuseConvBiasZPass::apply(OptState& state) const { auto check_fuse_dtype = [&](opr::ConvBias* conv_bias, VarNode* z) -> bool { return conv_bias->output(0)->dtype().enumv() == z->dtype().enumv(); }; +#if MGB_CUDA && (CUDNN_MAJOR == 8) + auto check_fuse_param = [&](opr::ConvBias* conv_bias, VarNode* z) -> bool { + return conv_bias->input(0) != z; + }; +#endif auto get_convbias_nonline_mode = [&](OperatorNodeBase* opr) -> NonlineMode { if (opr->same_type()) { auto elem = try_cast_as_op(opr); @@ -2037,6 +2045,9 @@ void FuseConvBiasZPass::apply(OptState& state) const { if (conv_bias && check_conv_bias(conv_bias) && check_fuse_shape(conv_bias, z_inp) && +#if MGB_CUDA && (CUDNN_MAJOR == 8) + check_fuse_param(conv_bias, z_inp) && +#endif check_fuse_dtype(conv_bias, z_inp)) { auto param = conv_bias->param(); param.nonlineMode = get_convbias_nonline_mode(opr); diff --git a/src/gopt/test/inference.cpp b/src/gopt/test/inference.cpp index 73dc241e..70a6ea89 100644 --- a/src/gopt/test/inference.cpp +++ b/src/gopt/test/inference.cpp @@ -36,6 +36,10 @@ #include +#if MGB_CUDA +#include +#endif + using namespace mgb; namespace { @@ -2211,8 +2215,6 @@ TEST(TestGoptInference, EnableTensorCore) { MGB_ASSERT_TENSOR_EQ(host_y, host_y_opt); } -//! close for cu111 ci, reopen it when bug fixed -#if CUDA_VERSION < 11000 TEST(FuseConvBiasZPass, BlockFuse) { REQUIRE_GPU(1); auto cn = CompNode::load("gpu0"); @@ -2284,6 +2286,25 @@ TEST(FuseConvBiasZPass, BlockFuse) { OperatorNodeConfig{dtype::QuantizedS8(2.5f)}); z = opr::TypeCvt::make(z, dtype::Float32()); + SymbolVar z_fuse; + { + auto options = gopt::OptimizeForInferenceOptions{}; + options.enable_fuse_conv_bias_nonlinearity() + .enable_fuse_conv_bias_with_z(); + unpack_vector(gopt::optimize_for_inference({z}, options), z_fuse); + } + graph->compile({{z_fuse, {}}}) + ->to_json() + ->writeto_fpath( + output_file("FuseConvBiasZPass.BlockFuse_fuse.json")); + + auto nr_elem_multi_type = + find_opr_num(z_fuse); + MGB_MARK_USED_VAR(nr_elem_multi_type); +#if MGB_CUDA && (CUDNN_MAJOR == 8) + ASSERT_EQ(2u, nr_elem_multi_type); +#else + ASSERT_EQ(1u, nr_elem_multi_type); //! fuse z mannually auto z0 = opr::ConvBias::make( x, w1, b1, param, {}, @@ -2299,42 +2320,26 @@ TEST(FuseConvBiasZPass, BlockFuse) { OperatorNodeConfig{dtype::QuantizedS8(2.5f)}); z4 = opr::TypeCvt::make(z4, dtype::Float32()); - SymbolVar z_fuse; SymbolVar z_nonfuse; - { - auto options = gopt::OptimizeForInferenceOptions{}; - options.enable_fuse_conv_bias_nonlinearity() - .enable_fuse_conv_bias_with_z(); - unpack_vector(gopt::optimize_for_inference({z}, options), z_fuse); - } { auto options = gopt::OptimizeForInferenceOptions{}; options.enable_fuse_conv_bias_nonlinearity(); unpack_vector(gopt::optimize_for_inference({z4}, options), z_nonfuse); } - auto nr_elem_multi_type = - find_opr_num(z_fuse); - MGB_MARK_USED_VAR(nr_elem_multi_type); - ASSERT_EQ(1u, nr_elem_multi_type); - graph->compile({{z_fuse, {}}}) - ->to_json() - ->writeto_fpath( - output_file("FuseConvBiasZPass.BlockFuse_fuse.json")); graph->compile({{z_nonfuse, {}}}) ->to_json() ->writeto_fpath(output_file( "FuseConvBiasZPass.BlockFuse_nonfuse.json")); - HostTensorND host_z_fuse, host_z_nonfuse; auto func = graph->compile({make_callback_copy(z_nonfuse, host_z_nonfuse), make_callback_copy(z_fuse, host_z_fuse)}); func->execute(); MGB_ASSERT_TENSOR_EQ(host_z_fuse, host_z_nonfuse); +#endif } } -#endif TEST(TestEnableTensorCore, ShuffleMerge) { REQUIRE_GPU(1); -- GitLab