diff --git a/paddle/fluid/operators/fused/fused_attention_op.cc b/paddle/fluid/operators/fused/fused_attention_op.cc
index e473f8ff0662cfc3fd7bdc5010bfa1dc08fba85f..1f377810a22878112291e489c9abb60ee35c9b3d 100644
--- a/paddle/fluid/operators/fused/fused_attention_op.cc
+++ b/paddle/fluid/operators/fused/fused_attention_op.cc
@@ -163,11 +163,15 @@ class FusedAttentionOp : public framework::OperatorWithKernel {
                             "The third dim of CacheKV must be equal with num "
                             "head %d, but got %d",
                             y_dim[1], c_dim[2]));  // num_head
-      PADDLE_ENFORCE_GE(
-          c_dim[3], 0,
-          paddle::platform::errors::InvalidArgument(
-              "The forth dim of CacheKV must be greater than 0, but got %d",
-              c_dim[3]));  // cache_seq_len
+      // In compile stage, input seq_len can be -1, in that case
+      // c_dim[3] may < 0 in while
+      if (ctx->IsRuntime()) {
+        PADDLE_ENFORCE_GE(
+            c_dim[3], 0,
+            paddle::platform::errors::InvalidArgument(
+                "The forth dim of CacheKV must be greater than 0, but got %d",
+                c_dim[3]));  // cache_seq_len
+      }
       PADDLE_ENFORCE_EQ(c_dim[4], y_dim[2],
                         paddle::platform::errors::InvalidArgument(
                             "The fifth dim of CacheKV must be equal with head "