CudnnNormConvolution is no longer supported on NVIDIA Hopper GPUs (#48203)

* Skip tests that use fused_ops on H100 * Add error message to FusedOps on H100

CudnnNormConvolution is no longer supported on NVIDIA Hopper GPUs (#48203)
* Skip tests that use fused_ops on H100 * Add error message to FusedOps on H100
df4dfda0 · Tian Zheng · GitHub · 2ab60c30 · df4dfda0 · df4dfda0
3 changed file
--- a/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h
+++ b/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h
@@ -45,6 +45,14 @@ struct NormConvolutionArgs {
           int stride,
           int dilation,
           int group) {
+    PADDLE_ENFORCE_LT(
+        ctx.GetComputeCapability(),
+        90,
+        phi::errors::PreconditionNotMet(
+            "Expect compute compatiblity to be less than 90, but got %d. "
+            "CUDNN FusedOps is no longer available on H100 and later "
+            "devices.",
+            ctx.GetComputeCapability()));
    PADDLE_ENFORCE_EQ(
        input_shape.size(),
        4U,

--- a/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc
+++ b/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc
@@ -439,7 +439,7 @@ TEST(CudnnNormConvFp16, K1S1) {
  phi::GPUContext *ctx = static_cast<phi::GPUContext *>(
      platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0)));
-  if (ctx->GetComputeCapability() < 70) {
+  if (ctx->GetComputeCapability() < 70 || ctx->GetComputeCapability() >= 90) {
    ASSERT_THROW(test.CheckForward(1e-3, true),
                 paddle::platform::EnforceNotMet);
    ASSERT_THROW(test.CheckBackward(1e-3, true),
@@ -469,7 +469,7 @@ TEST(CudnnNormConvFp16, K3S1) {
  phi::GPUContext *ctx = static_cast<phi::GPUContext *>(
      platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0)));
-  if (ctx->GetComputeCapability() < 70) {
+  if (ctx->GetComputeCapability() < 70 || ctx->GetComputeCapability() >= 90) {
    ASSERT_THROW(test.CheckForward(1e-3, true),
                 paddle::platform::EnforceNotMet);
    ASSERT_THROW(test.CheckBackward(1e-3, true),
@@ -499,7 +499,7 @@ TEST(CudnnNormConvFp16, K1S1O4) {
  phi::GPUContext *ctx = static_cast<phi::GPUContext *>(
      platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0)));
-  if (ctx->GetComputeCapability() < 70) {
+  if (ctx->GetComputeCapability() < 70 || ctx->GetComputeCapability() >= 90) {
    ASSERT_THROW(test.CheckForward(1e-3, true),
                 paddle::platform::EnforceNotMet);
    ASSERT_THROW(test.CheckBackward(1e-3, true),
@@ -529,7 +529,7 @@ TEST(CudnnNormConvFp16, K1S2O4) {
  phi::GPUContext *ctx = static_cast<phi::GPUContext *>(
      platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0)));
-  if (ctx->GetComputeCapability() <= 70) {
+  if (ctx->GetComputeCapability() <= 70 || ctx->GetComputeCapability() >= 90) {
    ASSERT_THROW(test.CheckForward(1e-3, true),
                 paddle::platform::EnforceNotMet);
    ASSERT_THROW(test.CheckBackward(1e-3), paddle::platform::EnforceNotMet);

--- a/python/paddle/fluid/tests/unittests/ir/test_fuse_resnet_unit.py
+++ b/python/paddle/fluid/tests/unittests/ir/test_fuse_resnet_unit.py
@@ -25,9 +25,10 @@ np.random.seed(0)
 @unittest.skipIf(
    not paddle.is_compiled_with_cuda()
    or paddle.get_cudnn_version() < 8000
-    or paddle.device.cuda.get_device_capability()[0] < 7,
+    or paddle.device.cuda.get_device_capability()[0] < 7
+    or paddle.device.cuda.get_device_capability()[0] >= 9,
    "only support with cuda and cudnn version is at least 8.0 "
-    "and device's compute capability is at least 7.0",
+    "and device's compute capability is at least 7.0 and less than 9.0",
 )
 class TestFuseResNetUnit(unittest.TestCase):
    def test_fuse_resenet_unit(self):