diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
index 52204ff3658f450a17233d5e93b9e1a689f699e2..2edc3cb4d3eb2a2fa527187ba9e331e4318c3613 100644
--- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc
+++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
@@ -457,13 +457,13 @@ void Tensor::CopyToCpuImpl(T *data,
     auto custom_place = t_place;
     auto *dev_ctx = static_cast<const paddle::platform::CustomDeviceContext *>(
         pool.Get(custom_place));
+    dev_ctx->Wait();
     paddle::memory::Copy(paddle::platform::CPUPlace(),
                          static_cast<void *>(data),
                          custom_place,
                          t_data,
                          ele_num * sizeof(T),
                          dev_ctx->stream());
-// TODO(wangran16): sync_stream
 #else
     PADDLE_THROW(paddle::platform::errors::InvalidArgument(
         "The analysis predictor supports CPU, GPU, NPU and XPU now."));