diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index ec5eb579105a46c0c30ce72e3fecd77f711b9f69..5870eac8115a6882bd6bce269377f8bf64849df4 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -250,6 +250,8 @@ struct NCCLAllReduceOpHandle : public OpHandle { int dtype = -1; size_t numel = 0; + platform::dynload::ncclGroupStart(); + for (auto &p : member_->places_) { int dev_id = boost::get(p).device; @@ -266,11 +268,12 @@ struct NCCLAllReduceOpHandle : public OpHandle { auto &nccl_ctx = member_->communication_streams_.at(dev_id); - ncclAllReduce(buffer, buffer, numel, static_cast(dtype), - ncclSum, nccl_ctx.comm, nccl_ctx.stream()); + platform::dynload::ncclAllReduce( + buffer, buffer, numel, static_cast(dtype), ncclSum, + nccl_ctx.comm, nccl_ctx.stream()); } - ncclGroupEnd(); + platform::dynload::ncclGroupEnd(); } } };