diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py
index d57341cfa9b04cca8f8be2fc1fa6ccece430fcbd..f4128dcbe935ea5caf63b8c0b377f9bed36d6705 100644
--- a/python/paddle/fluid/parallel_executor.py
+++ b/python/paddle/fluid/parallel_executor.py
@@ -48,8 +48,9 @@ class ParallelExecutor(object):
                 it will share variables from the specified ParallelExecutor.
             use_default_grad_scale(bool, default True): If set True, a default
                 scale value equal to `1./device_count` would be multiplied to
-                the gradients. Otherwise, a customized scale value should be
-                feeded to the network.
+                gradients of each device and scaled gradients would be
+                aggregated. Otherwise, a customized scale value should be fed
+                to the network.
 
         Returns:
             A ParallelExecutor object.