diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index c697a1c93786d43d540ba7c40f6fd78ffcdcb0e0..03323e3da7bd2634be1d9f07d9e8fcf21cfdf437 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -47,9 +47,11 @@ void FetchOpHandle::WaitAndMergeCPUTensors() const { } void FetchOpHandle::RunImpl() { + auto cpu_ctx = + platform::DeviceContextPool::Instance().Get(platform::CPUPlace()); for (auto *input : inputs_) { auto *var = static_cast(input); - var->generated_op_->Wait(this->dev_ctx_[var->place_]); + var->generated_op_->Wait(cpu_ctx); } tensors_.resize(inputs_.size()); diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index 501e1dfad7644de274ff540458bb9417760dedaf..7d1f7e46b8435ec0ef1913ea70d9a8f7a6734aac 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -209,10 +209,6 @@ void ThreadedSSAGraphExecutor::RunOp( VLOG(10) << op->DebugString(); op->Run(use_event_); - for (auto &dev_ctx : op->dev_ctx_) { - dev_ctx.second->Wait(); // Sync error - } - for (auto *ready : *ready_buffer) { ready->store(true, std::memory_order_release); }