未验证 提交 df4dfda0 编写于 作者: T Tian Zheng 提交者: GitHub

CudnnNormConvolution is no longer supported on NVIDIA Hopper GPUs (#48203)

* Skip tests that use fused_ops on H100

* Add error message to FusedOps on H100
上级 2ab60c30
...@@ -45,6 +45,14 @@ struct NormConvolutionArgs { ...@@ -45,6 +45,14 @@ struct NormConvolutionArgs {
int stride, int stride,
int dilation, int dilation,
int group) { int group) {
PADDLE_ENFORCE_LT(
ctx.GetComputeCapability(),
90,
phi::errors::PreconditionNotMet(
"Expect compute compatiblity to be less than 90, but got %d. "
"CUDNN FusedOps is no longer available on H100 and later "
"devices.",
ctx.GetComputeCapability()));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
input_shape.size(), input_shape.size(),
4U, 4U,
......
...@@ -439,7 +439,7 @@ TEST(CudnnNormConvFp16, K1S1) { ...@@ -439,7 +439,7 @@ TEST(CudnnNormConvFp16, K1S1) {
phi::GPUContext *ctx = static_cast<phi::GPUContext *>( phi::GPUContext *ctx = static_cast<phi::GPUContext *>(
platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0))); platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0)));
if (ctx->GetComputeCapability() < 70) { if (ctx->GetComputeCapability() < 70 || ctx->GetComputeCapability() >= 90) {
ASSERT_THROW(test.CheckForward(1e-3, true), ASSERT_THROW(test.CheckForward(1e-3, true),
paddle::platform::EnforceNotMet); paddle::platform::EnforceNotMet);
ASSERT_THROW(test.CheckBackward(1e-3, true), ASSERT_THROW(test.CheckBackward(1e-3, true),
...@@ -469,7 +469,7 @@ TEST(CudnnNormConvFp16, K3S1) { ...@@ -469,7 +469,7 @@ TEST(CudnnNormConvFp16, K3S1) {
phi::GPUContext *ctx = static_cast<phi::GPUContext *>( phi::GPUContext *ctx = static_cast<phi::GPUContext *>(
platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0))); platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0)));
if (ctx->GetComputeCapability() < 70) { if (ctx->GetComputeCapability() < 70 || ctx->GetComputeCapability() >= 90) {
ASSERT_THROW(test.CheckForward(1e-3, true), ASSERT_THROW(test.CheckForward(1e-3, true),
paddle::platform::EnforceNotMet); paddle::platform::EnforceNotMet);
ASSERT_THROW(test.CheckBackward(1e-3, true), ASSERT_THROW(test.CheckBackward(1e-3, true),
...@@ -499,7 +499,7 @@ TEST(CudnnNormConvFp16, K1S1O4) { ...@@ -499,7 +499,7 @@ TEST(CudnnNormConvFp16, K1S1O4) {
phi::GPUContext *ctx = static_cast<phi::GPUContext *>( phi::GPUContext *ctx = static_cast<phi::GPUContext *>(
platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0))); platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0)));
if (ctx->GetComputeCapability() < 70) { if (ctx->GetComputeCapability() < 70 || ctx->GetComputeCapability() >= 90) {
ASSERT_THROW(test.CheckForward(1e-3, true), ASSERT_THROW(test.CheckForward(1e-3, true),
paddle::platform::EnforceNotMet); paddle::platform::EnforceNotMet);
ASSERT_THROW(test.CheckBackward(1e-3, true), ASSERT_THROW(test.CheckBackward(1e-3, true),
...@@ -529,7 +529,7 @@ TEST(CudnnNormConvFp16, K1S2O4) { ...@@ -529,7 +529,7 @@ TEST(CudnnNormConvFp16, K1S2O4) {
phi::GPUContext *ctx = static_cast<phi::GPUContext *>( phi::GPUContext *ctx = static_cast<phi::GPUContext *>(
platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0))); platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0)));
if (ctx->GetComputeCapability() <= 70) { if (ctx->GetComputeCapability() <= 70 || ctx->GetComputeCapability() >= 90) {
ASSERT_THROW(test.CheckForward(1e-3, true), ASSERT_THROW(test.CheckForward(1e-3, true),
paddle::platform::EnforceNotMet); paddle::platform::EnforceNotMet);
ASSERT_THROW(test.CheckBackward(1e-3), paddle::platform::EnforceNotMet); ASSERT_THROW(test.CheckBackward(1e-3), paddle::platform::EnforceNotMet);
......
...@@ -25,9 +25,10 @@ np.random.seed(0) ...@@ -25,9 +25,10 @@ np.random.seed(0)
@unittest.skipIf( @unittest.skipIf(
not paddle.is_compiled_with_cuda() not paddle.is_compiled_with_cuda()
or paddle.get_cudnn_version() < 8000 or paddle.get_cudnn_version() < 8000
or paddle.device.cuda.get_device_capability()[0] < 7, or paddle.device.cuda.get_device_capability()[0] < 7
or paddle.device.cuda.get_device_capability()[0] >= 9,
"only support with cuda and cudnn version is at least 8.0 " "only support with cuda and cudnn version is at least 8.0 "
"and device's compute capability is at least 7.0", "and device's compute capability is at least 7.0 and less than 9.0",
) )
class TestFuseResNetUnit(unittest.TestCase): class TestFuseResNetUnit(unittest.TestCase):
def test_fuse_resenet_unit(self): def test_fuse_resenet_unit(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册