From 08cada982c567d452ed71f4a228e2f5785e5f601 Mon Sep 17 00:00:00 2001
From: RichardWooSJTU <37864677+RichardWooSJTU@users.noreply.github.com>
Date: Mon, 18 Jul 2022 21:40:45 +0800
Subject: [PATCH] fix build error in low arch (#44391)

---
 .../inference/tensorrt/plugin/fused_token_prune_op_plugin.cu  | 4 ++++
 1 file changed, 4 insertions(+)
diff --git a/paddle/fluid/inference/tensorrt/plugin/fused_token_prune_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/fused_token_prune_op_plugin.cu
index 627ef44e6fd..c10ab7277e7 100644
--- a/paddle/fluid/inference/tensorrt/plugin/fused_token_prune_op_plugin.cu
+++ b/paddle/fluid/inference/tensorrt/plugin/fused_token_prune_op_plugin.cu
@@ -38,10 +38,12 @@ __global__ void ElementwiseMask(const T* a,
                                 const T* b,
                                 T* res,
                                 int num_elements) {
+#if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__)
   auto tid = threadIdx.x + blockIdx.x * blockDim.x;
   if (tid >= num_elements) return;
   const T zero = 0;
   res[tid] = b[tid] >= zero ? a[tid] : zero;
+#endif
 }
 
 template <typename T>
@@ -121,6 +123,7 @@ __global__ void ReduceSum2(
 template <>
 __global__ void ReduceSum2<half>(
     const half* src, half* dst, int bsz, int nb_head, int max_seq_len) {
+#if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__)
   int tid = threadIdx.x;
   int bid = blockIdx.x;
   int num_blocks_per_head = ((max_seq_len / blockDim.x) * max_seq_len);
@@ -152,6 +155,7 @@ __global__ void ReduceSum2<half>(
         static_cast<size_t>(bsz * max_seq_len),
         static_cast<platform::float16>(res_half[0]));
   }
+#endif
 }
 
 template <typename T>
-- 
GitLab