diff --git a/paddle/fluid/framework/details/multi_devices_graph_pass.cc b/paddle/fluid/framework/details/multi_devices_graph_pass.cc index c0fb3ee83331dc946eea9a4ae05ef324a657238a..23b9890e9bf310c141bfc96555d7f725770a0472 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_pass.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_pass.cc @@ -927,7 +927,16 @@ void DistSSAGraphBuilder::InsertCollectiveOp(ir::Graph *result, void DistSSAGraphBuilder::InsertPostprocessOps(ir::Graph *result) const { // broad cast received parameters when training in parameter server mode. if (need_broadcast_var_) { - // cpu reduce mode did not need to broadcast received parameters. + // There are 4 conditions: + // 1. GPU && Reduce: Reduce gradient then broadcast gradient to other GPUS. + // Need to broadcast received parameters to other GPU. + // 2. GPU && AllReduce: AllReduce all graident to each GPU. Need to + // broadcast received parameters to other GPU. + // 3. CPU && AllReduce: AllReduce all gradient to each thread. Need to + // broadcast received parameters to other scope. + // 4. CPU && Reduce: because all parameters share the same memory, did not + // broadcast + // received parameters. if (!UseGPU() && strategy_.reduce_ == BuildStrategy::ReduceStrategy::kReduce) { return;