diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index 03323e3da7bd2634be1d9f07d9e8fcf21cfdf437..26c09eb8eb9db6f30e5d2e3ce332a8a62975fa40 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -66,6 +66,7 @@ void FetchOpHandle::RunImpl() { if (platform::is_gpu_place(var->place_)) { #ifdef PADDLE_WITH_CUDA TensorCopy(t, cpu, *dev_ctx_[t.place()], &tensors_[i]); + dev_ctx_[t.place()]->Wait(); #endif } else { tensors_[i].ShareDataWith(t); diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc index 07a4b8921753a0b3cb8e62397e0a0ab2385a094a..63affb705424f847d531fc8c2f20d132c92784d8 100644 --- a/paddle/fluid/framework/details/op_handle_base.cc +++ b/paddle/fluid/framework/details/op_handle_base.cc @@ -33,9 +33,6 @@ std::string OpHandleBase::DebugString() const { OpHandleBase::~OpHandleBase() { #ifdef PADDLE_WITH_CUDA - for (auto &ctx : dev_ctx_) { - ctx.second->Wait(); - } for (auto &ev : events_) { PADDLE_ENFORCE(cudaEventDestroy(ev.second)); }