diff --git a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu index 6bae3606afe0efcda771e858b7c40be88c5b704c..8e59fc1355a7579273fd6abac66f5ea70f86c6b1 100644 --- a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu @@ -229,7 +229,9 @@ template __global__ void apply_scale(T *data, T scale, int n) { #if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__) int tid = blockIdx.x * blockDim.x + threadIdx.x; - data[tid] = data[tid] * scale; + if (tid < n) { + data[tid] = data[tid] * scale; + } #endif }