diff --git a/paddle/cuda/include/hl_matrix_type.cuh b/paddle/cuda/include/hl_matrix_type.cuh index 77f73167fe6ea2e301b3f68e51d32f3e1578ec93..12c717b612da907ac7aedf3e7787bdbb96948be2 100644 --- a/paddle/cuda/include/hl_matrix_type.cuh +++ b/paddle/cuda/include/hl_matrix_type.cuh @@ -39,6 +39,8 @@ typedef double2 vecType; #elif defined(__SSE3__) #include "hl_cpu_simd_sse.cuh" #elif (defined(__ARM_NEON) || defined(__ARM_NEON__)) && !defined(__NVCC__) +// Currently nvcc does not support neon intrinsic. +// TODO: Extract simd intrinsic implementation from .cu files. #include "hl_cpu_simd_neon.cuh" #else #include "hl_cpu_scalar.cuh"