diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py index d57341cfa9b04cca8f8be2fc1fa6ccece430fcbd..f4128dcbe935ea5caf63b8c0b377f9bed36d6705 100644 --- a/python/paddle/fluid/parallel_executor.py +++ b/python/paddle/fluid/parallel_executor.py @@ -48,8 +48,9 @@ class ParallelExecutor(object): it will share variables from the specified ParallelExecutor. use_default_grad_scale(bool, default True): If set True, a default scale value equal to `1./device_count` would be multiplied to - the gradients. Otherwise, a customized scale value should be - feeded to the network. + gradients of each device and scaled gradients would be + aggregated. Otherwise, a customized scale value should be fed + to the network. Returns: A ParallelExecutor object.