diff --git a/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py b/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py index 7e95bfe7f331aadd3b5aeb6adc99a1a5706d7c6b..7e527eced3f0419f5601efa2138b0f51917194c4 100644 --- a/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py +++ b/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py @@ -197,7 +197,7 @@ def fused_allreduce_gradients_with_group( else _apply_collective_grads ) with framework.no_grad(): - apply_func(parameter_list, group, bucket_size) + apply_func(parameter_list, group, bucket_size, scale) def fused_allreduce_gradients(parameter_list, hcg):