diff --git a/paddle/fluid/operators/fused/fused_attention_op.cc b/paddle/fluid/operators/fused/fused_attention_op.cc index e473f8ff0662cfc3fd7bdc5010bfa1dc08fba85f..1f377810a22878112291e489c9abb60ee35c9b3d 100644 --- a/paddle/fluid/operators/fused/fused_attention_op.cc +++ b/paddle/fluid/operators/fused/fused_attention_op.cc @@ -163,11 +163,15 @@ class FusedAttentionOp : public framework::OperatorWithKernel { "The third dim of CacheKV must be equal with num " "head %d, but got %d", y_dim[1], c_dim[2])); // num_head - PADDLE_ENFORCE_GE( - c_dim[3], 0, - paddle::platform::errors::InvalidArgument( - "The forth dim of CacheKV must be greater than 0, but got %d", - c_dim[3])); // cache_seq_len + // In compile stage, input seq_len can be -1, in that case + // c_dim[3] may < 0 in while + if (ctx->IsRuntime()) { + PADDLE_ENFORCE_GE( + c_dim[3], 0, + paddle::platform::errors::InvalidArgument( + "The forth dim of CacheKV must be greater than 0, but got %d", + c_dim[3])); // cache_seq_len + } PADDLE_ENFORCE_EQ(c_dim[4], y_dim[2], paddle::platform::errors::InvalidArgument( "The fifth dim of CacheKV must be equal with head "