From df4dfda0db7ae2541c71cc796ab35113cdb72b1c Mon Sep 17 00:00:00 2001
From: Tian Zheng <tizheng@nvidia.com>
Date: Tue, 22 Nov 2022 15:28:36 +0800
Subject: [PATCH] CudnnNormConvolution is no longer supported on NVIDIA Hopper
 GPUs (#48203)

* Skip tests that use fused_ops on H100

* Add error message to FusedOps on H100
---
 paddle/fluid/operators/fused/cudnn_norm_conv.cu.h         | 8 ++++++++
 paddle/fluid/operators/fused/cudnn_norm_conv_test.cc      | 8 ++++----
 .../fluid/tests/unittests/ir/test_fuse_resnet_unit.py     | 5 +++--
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h b/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h
index 01e5e24e0a0..a5e210dc7fe 100644
--- a/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h
+++ b/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h
@@ -45,6 +45,14 @@ struct NormConvolutionArgs {
            int stride,
            int dilation,
            int group) {
+    PADDLE_ENFORCE_LT(
+        ctx.GetComputeCapability(),
+        90,
+        phi::errors::PreconditionNotMet(
+            "Expect compute compatiblity to be less than 90, but got %d. "
+            "CUDNN FusedOps is no longer available on H100 and later "
+            "devices.",
+            ctx.GetComputeCapability()));
     PADDLE_ENFORCE_EQ(
         input_shape.size(),
         4U,
diff --git a/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc b/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc
index 81e298ff9c2..3369a8ca4a9 100644
--- a/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc
+++ b/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc
@@ -439,7 +439,7 @@ TEST(CudnnNormConvFp16, K1S1) {
   phi::GPUContext *ctx = static_cast<phi::GPUContext *>(
       platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0)));
 
-  if (ctx->GetComputeCapability() < 70) {
+  if (ctx->GetComputeCapability() < 70 || ctx->GetComputeCapability() >= 90) {
     ASSERT_THROW(test.CheckForward(1e-3, true),
                  paddle::platform::EnforceNotMet);
     ASSERT_THROW(test.CheckBackward(1e-3, true),
@@ -469,7 +469,7 @@ TEST(CudnnNormConvFp16, K3S1) {
   phi::GPUContext *ctx = static_cast<phi::GPUContext *>(
       platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0)));
 
-  if (ctx->GetComputeCapability() < 70) {
+  if (ctx->GetComputeCapability() < 70 || ctx->GetComputeCapability() >= 90) {
     ASSERT_THROW(test.CheckForward(1e-3, true),
                  paddle::platform::EnforceNotMet);
     ASSERT_THROW(test.CheckBackward(1e-3, true),
@@ -499,7 +499,7 @@ TEST(CudnnNormConvFp16, K1S1O4) {
   phi::GPUContext *ctx = static_cast<phi::GPUContext *>(
       platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0)));
 
-  if (ctx->GetComputeCapability() < 70) {
+  if (ctx->GetComputeCapability() < 70 || ctx->GetComputeCapability() >= 90) {
     ASSERT_THROW(test.CheckForward(1e-3, true),
                  paddle::platform::EnforceNotMet);
     ASSERT_THROW(test.CheckBackward(1e-3, true),
@@ -529,7 +529,7 @@ TEST(CudnnNormConvFp16, K1S2O4) {
   phi::GPUContext *ctx = static_cast<phi::GPUContext *>(
       platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0)));
 
-  if (ctx->GetComputeCapability() <= 70) {
+  if (ctx->GetComputeCapability() <= 70 || ctx->GetComputeCapability() >= 90) {
     ASSERT_THROW(test.CheckForward(1e-3, true),
                  paddle::platform::EnforceNotMet);
     ASSERT_THROW(test.CheckBackward(1e-3), paddle::platform::EnforceNotMet);
diff --git a/python/paddle/fluid/tests/unittests/ir/test_fuse_resnet_unit.py b/python/paddle/fluid/tests/unittests/ir/test_fuse_resnet_unit.py
index 4dabcdbcf35..28d49fbac7b 100644
--- a/python/paddle/fluid/tests/unittests/ir/test_fuse_resnet_unit.py
+++ b/python/paddle/fluid/tests/unittests/ir/test_fuse_resnet_unit.py
@@ -25,9 +25,10 @@ np.random.seed(0)
 @unittest.skipIf(
     not paddle.is_compiled_with_cuda()
     or paddle.get_cudnn_version() < 8000
-    or paddle.device.cuda.get_device_capability()[0] < 7,
+    or paddle.device.cuda.get_device_capability()[0] < 7
+    or paddle.device.cuda.get_device_capability()[0] >= 9,
     "only support with cuda and cudnn version is at least 8.0 "
-    "and device's compute capability is at least 7.0",
+    "and device's compute capability is at least 7.0 and less than 9.0",
 )
 class TestFuseResNetUnit(unittest.TestCase):
     def test_fuse_resenet_unit(self):
-- 
GitLab