提交 36b1ba05 编写于 作者: M Megvii Engine Team

fix(mgb/dnn): fix cudnn8.0.4 convbias with z

GitOrigin-RevId: 09453d8a12f8773e1422f9f9b36bd2068b621ee8
上级 dadd5086
......@@ -36,6 +36,9 @@
#if MGB_ENABLE_TENSOR_RT
#include "megbrain/tensorrt/tensorrt_opr.h"
#endif
#if MGB_CUDA
#include <cudnn.h>
#endif
#include "megbrain/gopt/misc.h"
......@@ -1999,6 +2002,11 @@ void FuseConvBiasZPass::apply(OptState& state) const {
auto check_fuse_dtype = [&](opr::ConvBias* conv_bias, VarNode* z) -> bool {
return conv_bias->output(0)->dtype().enumv() == z->dtype().enumv();
};
#if MGB_CUDA && (CUDNN_MAJOR == 8)
auto check_fuse_param = [&](opr::ConvBias* conv_bias, VarNode* z) -> bool {
return conv_bias->input(0) != z;
};
#endif
auto get_convbias_nonline_mode = [&](OperatorNodeBase* opr) -> NonlineMode {
if (opr->same_type<opr::Elemwise>()) {
auto elem = try_cast_as_op<opr::Elemwise>(opr);
......@@ -2037,6 +2045,9 @@ void FuseConvBiasZPass::apply(OptState& state) const {
if (conv_bias && check_conv_bias(conv_bias) &&
check_fuse_shape(conv_bias, z_inp) &&
#if MGB_CUDA && (CUDNN_MAJOR == 8)
check_fuse_param(conv_bias, z_inp) &&
#endif
check_fuse_dtype(conv_bias, z_inp)) {
auto param = conv_bias->param();
param.nonlineMode = get_convbias_nonline_mode(opr);
......
......@@ -36,6 +36,10 @@
#include <random>
#if MGB_CUDA
#include <cudnn.h>
#endif
using namespace mgb;
namespace {
......@@ -2211,8 +2215,6 @@ TEST(TestGoptInference, EnableTensorCore) {
MGB_ASSERT_TENSOR_EQ(host_y, host_y_opt);
}
//! close for cu111 ci, reopen it when bug fixed
#if CUDA_VERSION < 11000
TEST(FuseConvBiasZPass, BlockFuse) {
REQUIRE_GPU(1);
auto cn = CompNode::load("gpu0");
......@@ -2284,6 +2286,25 @@ TEST(FuseConvBiasZPass, BlockFuse) {
OperatorNodeConfig{dtype::QuantizedS8(2.5f)});
z = opr::TypeCvt::make(z, dtype::Float32());
SymbolVar z_fuse;
{
auto options = gopt::OptimizeForInferenceOptions{};
options.enable_fuse_conv_bias_nonlinearity()
.enable_fuse_conv_bias_with_z();
unpack_vector(gopt::optimize_for_inference({z}, options), z_fuse);
}
graph->compile({{z_fuse, {}}})
->to_json()
->writeto_fpath(
output_file("FuseConvBiasZPass.BlockFuse_fuse.json"));
auto nr_elem_multi_type =
find_opr_num<mgb::opr::ElemwiseMultiType>(z_fuse);
MGB_MARK_USED_VAR(nr_elem_multi_type);
#if MGB_CUDA && (CUDNN_MAJOR == 8)
ASSERT_EQ(2u, nr_elem_multi_type);
#else
ASSERT_EQ(1u, nr_elem_multi_type);
//! fuse z mannually
auto z0 = opr::ConvBias::make(
x, w1, b1, param, {},
......@@ -2299,42 +2320,26 @@ TEST(FuseConvBiasZPass, BlockFuse) {
OperatorNodeConfig{dtype::QuantizedS8(2.5f)});
z4 = opr::TypeCvt::make(z4, dtype::Float32());
SymbolVar z_fuse;
SymbolVar z_nonfuse;
{
auto options = gopt::OptimizeForInferenceOptions{};
options.enable_fuse_conv_bias_nonlinearity()
.enable_fuse_conv_bias_with_z();
unpack_vector(gopt::optimize_for_inference({z}, options), z_fuse);
}
{
auto options = gopt::OptimizeForInferenceOptions{};
options.enable_fuse_conv_bias_nonlinearity();
unpack_vector(gopt::optimize_for_inference({z4}, options),
z_nonfuse);
}
auto nr_elem_multi_type =
find_opr_num<mgb::opr::ElemwiseMultiType>(z_fuse);
MGB_MARK_USED_VAR(nr_elem_multi_type);
ASSERT_EQ(1u, nr_elem_multi_type);
graph->compile({{z_fuse, {}}})
->to_json()
->writeto_fpath(
output_file("FuseConvBiasZPass.BlockFuse_fuse.json"));
graph->compile({{z_nonfuse, {}}})
->to_json()
->writeto_fpath(output_file(
"FuseConvBiasZPass.BlockFuse_nonfuse.json"));
HostTensorND host_z_fuse, host_z_nonfuse;
auto func =
graph->compile({make_callback_copy(z_nonfuse, host_z_nonfuse),
make_callback_copy(z_fuse, host_z_fuse)});
func->execute();
MGB_ASSERT_TENSOR_EQ(host_z_fuse, host_z_nonfuse);
#endif
}
}
#endif
TEST(TestEnableTensorCore, ShuffleMerge) {
REQUIRE_GPU(1);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册