diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.h b/paddle/fluid/operators/elementwise/elementwise_add_op.h
index 0ef79667b8d66df8beaf512a95820e119816cbff..e78b0c03fcc756335a5adadaa08aa541a0b04942 100644
--- a/paddle/fluid/operators/elementwise/elementwise_add_op.h
+++ b/paddle/fluid/operators/elementwise/elementwise_add_op.h
@@ -22,6 +22,8 @@ limitations under the License. */
 #include "paddle/fluid/operators/math/math_function.h"
 #ifdef PADDLE_WITH_CUDA
 #ifdef __NVCC__
+#include <cuda.h>
+#include <cuda_fp16.h>
 #include "cub/cub.cuh"
 #endif
 #endif
@@ -361,6 +363,7 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel<T> {
       int max_blocks = std::max(max_physical_threads / (block_x * block_y), 1);
       int theory_block = (width + blocks.x - 1) / blocks.x;
       dim3 grids(std::min(theory_block, max_blocks));
+#if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__)
       if (std::is_same<T, paddle::platform::float16>::value && width < 2048 &&
           width % 2 == 0 && height % 64 == 0) {
         auto &dev_ctx =
@@ -378,6 +381,7 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel<T> {
                                                                  width, height);
         return;
       }
+#endif
 
       if (width / height < 32) {
         MatrixColReduce<T, block_x, block_y><<<grids, blocks, 0, stream>>>(