diff --git a/paddle/platform/nccl_test.cu b/paddle/platform/nccl_test.cu index ef6d845874745af1150e4425f8d6be416cc44ece..5a75ff3382b8f7727b6c80cba8470ef7faf1142e 100644 --- a/paddle/platform/nccl_test.cu +++ b/paddle/platform/nccl_test.cu @@ -89,6 +89,7 @@ TEST(NCCL, all_reduce) { VLOG(1) << "Invoking ncclAllReduce"; + dynload::ncclGroupStart(); for (int i = 0; i < dev_count; ++i) { VLOG(1) << "Invoking ncclAllReduce with device " << i; SetDeviceId(i); @@ -97,6 +98,7 @@ TEST(NCCL, all_reduce) { ncclSum, comms[i], data[i]->dev_ctx.stream())); VLOG(1) << "Invoked ncclAllReduce for device " << i; } + dynload::ncclGroupEnd(); VLOG(1) << "Invoked ncclAllReduce";