diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index bc6ce8c5e1c3f750318eb105729b88617af5d578..5e527ea03ab9c816948f343ac103672a751fdbc3 100755 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -623,6 +623,20 @@ class DistributedStrategy(object): @property def localsgd(self): + """ + Indicating whether we are using Local SGD training. For more details, please refer to + [Don't Use Large Mini-Batches, Use Local SGD](https://arxiv.org/pdf/1808.07217.pdf), + + Default Value: False + + Examples: + .. code-block:: python + + import paddle.distributed.fleet as fleet + strategy = fleet.DistributedStrategy() + strategy.localsgd = True # by default this is false + + """ return self.strategy.localsgd @localsgd.setter @@ -634,6 +648,28 @@ class DistributedStrategy(object): @property def localsgd_configs(self): + """ + Set LocalSGD training configurations. LocalSGD has a configurable + setting that can be configured through a dict. + + **Notes**: + **k_steps(int)**: The local steps for training before parameter + synchronization. Default 1. If strategy.auto is set True, the + local steps will be calculated automatically during training. + The algorithm is referenced in this paper: + [Adaptive Communication Strategies to Achieve the Best Error-Runtime Trade-off in Local-Update SGD](https://arxiv.org/pdf/1810.08313.pdf). + In this case, k_steps indicates the first local steps which + is suggested setting to 1. + + Examples: + .. code-block:: python + + import paddle.distributed.fleet as fleet + strategy = fleet.DistributedStrategy() + strategy.localsgd = True + strategy.localsgd_configs = {"k_steps": 4} + """ + return get_msg_dict(self.strategy.localsgd_configs) @localsgd_configs.setter