提交 10af44ab 编写于 作者: M Megvii Engine Team

fix(dnn/cuda): fix cudnn conv impl for nchw4_nchw hybrid layout

the conv_bias algo *_IMPLICIT_GEMM in cudnn less than 8.0.0 is disabled due to the incorrect result for int8x4->f32 configs

GitOrigin-RevId: 7cc52d0a85c5ba345af52c61534e7e82f42cc088
上级 feb813bc
...@@ -73,10 +73,12 @@ bool ConvBiasForwardImpl::AlgoCUDNNConvBiasActivation::is_available( ...@@ -73,10 +73,12 @@ bool ConvBiasForwardImpl::AlgoCUDNNConvBiasActivation::is_available(
return false; return false;
} }
//! FIXME: conv kernel of cudnn for NCHW4_NCHW tensor format causes illegal #if CUDNN_MAJOR < 8
//! memory access errors, so we have to disable this kernel here. if (m_cudnn_enum == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM &&
if (param.format == param::ConvBias::Format::NCHW4_NCHW || param.format == param::ConvBias::Format::NCHW4_NCHW)
param.format == param::ConvBias::Format::NCHW4_NCHW32 || return false;
#endif
if (param.format == param::ConvBias::Format::NCHW4_NCHW32 ||
param.format == param::ConvBias::Format::NCHW32_NCHW4) param.format == param::ConvBias::Format::NCHW32_NCHW4)
return false; return false;
if (param.format == param::ConvBias::Format::NCHW && if (param.format == param::ConvBias::Format::NCHW &&
......
...@@ -571,9 +571,6 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4) { ...@@ -571,9 +571,6 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4) {
checker.exec({{1, 4, 2, 2, 4}, {16, 4, 3, 3, 4}, {1, 4, 1, 1, 4}, {}, {}}); checker.exec({{1, 4, 2, 2, 4}, {16, 4, 3, 3, 4}, {1, 4, 1, 1, 4}, {}, {}});
} }
//! FIXME: conv kernel of cudnn for NCHW4_NCHW tensor format causes illegal
//! memory access errors, so we have to disable this test here.
#if 0
TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) { TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) {
require_compute_capability(6, 1); require_compute_capability(6, 1);
using namespace conv_bias; using namespace conv_bias;
...@@ -600,8 +597,9 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) { ...@@ -600,8 +597,9 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) {
auto run = [&](const TensorShapeArray& shapes) { auto run = [&](const TensorShapeArray& shapes) {
opr->param() = param; opr->param() = param;
TensorLayout dst_layout; TensorLayout dst_layout;
opr->deduce_layout({shapes[0], dtype::Float32()}, opr->deduce_layout(
{shapes[1], dtype::Float32()}, {}, {}, dst_layout); {shapes[0], dtype::Float32()}, {shapes[1], dtype::Float32()}, {}, {},
dst_layout);
checker.execs({shapes[0], shapes[1], shapes[2], dst_layout, {}}); checker.execs({shapes[0], shapes[1], shapes[2], dst_layout, {}});
}; };
...@@ -631,8 +629,6 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) { ...@@ -631,8 +629,6 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) {
} }
#endif #endif
#endif
TEST_F(CUDA, CONV_BIAS_FORWARD_CHANWISE) { TEST_F(CUDA, CONV_BIAS_FORWARD_CHANWISE) {
Checker<ConvBiasForward> checker(handle_cuda()); Checker<ConvBiasForward> checker(handle_cuda());
std::vector<TestArg> args = get_chanwise_args(); std::vector<TestArg> args = get_chanwise_args();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册