diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 1d9584939fc77f7aa9e8abd249edc34144adffa0..2e13b3c8c1cf9c6fd75bb0b34aa5b81858049e41 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -320,14 +320,14 @@ struct NCCLAllReduceOpHandle : public OpHandle { explicit NCCLAllReduceOpHandle(ParallelExecutorPrivate *member) : member_(member) { for (auto &nccl : member_->communication_streams_) { - cudaEventCreate(&events_[nccl.second.device_id()], - cudaEventDisableTiming); + PADDLE_ENFORCE(cudaEventCreate(&events_[nccl.second.device_id()], + cudaEventDisableTiming)); } } ~NCCLAllReduceOpHandle() { for (auto &ev : events_) { - cudaEventDestroy(ev.second); + PADDLE_ENFORCE(cudaEventDestroy(ev.second)); } } @@ -362,7 +362,7 @@ struct NCCLAllReduceOpHandle : public OpHandle { platform::dynload::ncclAllReduce( buffer, buffer, numel, static_cast(dtype), ncclSum, nccl_ctx.comm, nccl_ctx.stream()); - cudaEventRecord(events_[dev_id], nccl_ctx.stream()); + PADDLE_ENFORCE(cudaEventRecord(events_[dev_id], nccl_ctx.stream())); } platform::dynload::ncclGroupEnd(); @@ -381,7 +381,7 @@ struct NCCLAllReduceOpHandle : public OpHandle { boost::get(waited_dev->GetPlace()).device; auto stream = static_cast(waited_dev)->stream(); - cudaStreamWaitEvent(stream, events_[dev_id], 0); + PADDLE_ENFORCE(cudaStreamWaitEvent(stream, events_[dev_id], 0)); } } }