diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc
index c697a1c93786d43d540ba7c40f6fd78ffcdcb0e0..03323e3da7bd2634be1d9f07d9e8fcf21cfdf437 100644
--- a/paddle/fluid/framework/details/fetch_op_handle.cc
+++ b/paddle/fluid/framework/details/fetch_op_handle.cc
@@ -47,9 +47,11 @@ void FetchOpHandle::WaitAndMergeCPUTensors() const {
 }
 
 void FetchOpHandle::RunImpl() {
+  auto cpu_ctx =
+      platform::DeviceContextPool::Instance().Get(platform::CPUPlace());
   for (auto *input : inputs_) {
     auto *var = static_cast<VarHandle *>(input);
-    var->generated_op_->Wait(this->dev_ctx_[var->place_]);
+    var->generated_op_->Wait(cpu_ctx);
   }
 
   tensors_.resize(inputs_.size());
diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
index 501e1dfad7644de274ff540458bb9417760dedaf..7d1f7e46b8435ec0ef1913ea70d9a8f7a6734aac 100644
--- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
+++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
@@ -209,10 +209,6 @@ void ThreadedSSAGraphExecutor::RunOp(
       VLOG(10) << op->DebugString();
       op->Run(use_event_);
 
-      for (auto &dev_ctx : op->dev_ctx_) {
-        dev_ctx.second->Wait();  // Sync error
-      }
-
       for (auto *ready : *ready_buffer) {
         ready->store(true, std::memory_order_release);
       }