From 3f366fee3312a7fc011f1110c117a718e6b47ca2 Mon Sep 17 00:00:00 2001 From: Qi Li Date: Wed, 2 Jun 2021 19:21:00 +0800 Subject: [PATCH] [ROCM] fix fused_fc_elementwise_layernorm, test=develop (#33281) --- paddle/fluid/platform/cuda_device_function.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/paddle/fluid/platform/cuda_device_function.h b/paddle/fluid/platform/cuda_device_function.h index 4095720f71e..35214330238 100644 --- a/paddle/fluid/platform/cuda_device_function.h +++ b/paddle/fluid/platform/cuda_device_function.h @@ -31,6 +31,7 @@ namespace platform { #endif inline static int RoundToPowerOfTwo(int dim) { +#ifdef PADDLE_WITH_CUDA if (dim > 512) { return 1024; } else if (dim > 256) { @@ -44,6 +45,17 @@ inline static int RoundToPowerOfTwo(int dim) { } else { return 32; } +#else // HIP results in error or nan if > 256 + if (dim > 128) { + return 256; + } else if (dim > 64) { + return 128; + } else if (dim > 32) { + return 64; + } else { + return 32; + } +#endif } #define CUDA_LAUNCH_KERNEL_BASE(dim, ...) \ -- GitLab