diff --git a/paddle/fluid/framework/details/multi_devices_graph_pass.cc b/paddle/fluid/framework/details/multi_devices_graph_pass.cc
index c0fb3ee83331dc946eea9a4ae05ef324a657238a..23b9890e9bf310c141bfc96555d7f725770a0472 100644
--- a/paddle/fluid/framework/details/multi_devices_graph_pass.cc
+++ b/paddle/fluid/framework/details/multi_devices_graph_pass.cc
@@ -927,7 +927,16 @@ void DistSSAGraphBuilder::InsertCollectiveOp(ir::Graph *result,
 void DistSSAGraphBuilder::InsertPostprocessOps(ir::Graph *result) const {
   // broad cast received parameters when training in parameter server mode.
   if (need_broadcast_var_) {
-    // cpu reduce mode did not need to broadcast received parameters.
+    // There are 4 conditions:
+    // 1. GPU && Reduce: Reduce gradient then broadcast gradient to other GPUS.
+    // Need to broadcast received parameters to other GPU.
+    // 2. GPU && AllReduce: AllReduce all graident to each GPU. Need to
+    // broadcast received parameters to other GPU.
+    // 3. CPU && AllReduce: AllReduce all gradient to each thread. Need to
+    // broadcast received parameters to other scope.
+    // 4. CPU && Reduce: because all parameters share the same memory, did not
+    // broadcast
+    //     received parameters.
     if (!UseGPU() &&
         strategy_.reduce_ == BuildStrategy::ReduceStrategy::kReduce) {
       return;