From ef6120f32f41250984545c74b0417209aebcf349 Mon Sep 17 00:00:00 2001 From: Qi Li Date: Thu, 3 Jun 2021 13:58:49 +0800 Subject: [PATCH] [ROCM] fix fused_fc_elementwise_layernorm, test=develop (#33281) (#33299) --- paddle/fluid/platform/cuda_device_function.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/paddle/fluid/platform/cuda_device_function.h b/paddle/fluid/platform/cuda_device_function.h index dde9531e591..5a86bb46e6a 100644 --- a/paddle/fluid/platform/cuda_device_function.h +++ b/paddle/fluid/platform/cuda_device_function.h @@ -32,6 +32,7 @@ namespace platform { #endif inline static int RoundToPowerOfTwo(int dim) { +#ifdef PADDLE_WITH_CUDA if (dim > 512) { return 1024; } else if (dim > 256) { @@ -45,6 +46,17 @@ inline static int RoundToPowerOfTwo(int dim) { } else { return 32; } +#else // HIP results in error or nan if > 256 + if (dim > 128) { + return 256; + } else if (dim > 64) { + return 128; + } else if (dim > 32) { + return 64; + } else { + return 32; + } +#endif } #define CUDA_LAUNCH_KERNEL_BASE(dim, ...) \ -- GitLab