diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu index f4a5319a68caac23882b1b0902a07ffa20000248..e38ac9a0ad2da52c62b4f64f4ea50eaaa90faec9 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu +++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu @@ -534,6 +534,8 @@ template __global__ void masked_multihead_attention_kernel( Masked_multihead_attention_params params) { +#if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__) + static_assert(Dh % THREADS_PER_KEY == 0, ""); static_assert(Dh % THREADS_PER_VALUE == 0, ""); @@ -821,6 +823,9 @@ __global__ void masked_multihead_attention_kernel( printf("\n"); } #endif +#else + assert(false); +#endif } template