diff --git a/python/paddle/distributed/fleet/model.py b/python/paddle/distributed/fleet/model.py index e7cd0b33e9d16d2bebd064323d81bc66ca751786..c849a94dcea5d308c0fbe10835d05f6b0a44d710 100755 --- a/python/paddle/distributed/fleet/model.py +++ b/python/paddle/distributed/fleet/model.py @@ -41,44 +41,40 @@ def distributed_model(model): .. code-block:: python - import paddle - import paddle.nn as nn - from paddle.distributed import fleet - - class LinearNet(nn.Layer): - def __init__(self): - super().__init__() - self._linear1 = nn.Linear(10, 10) - self._linear2 = nn.Linear(10, 1) - - def forward(self, x): - return self._linear2(self._linear1(x)) - - # 1. initialize fleet environment - fleet.init(is_collective=True) - - # 2. create layer & optimizer - layer = LinearNet() - loss_fn = nn.MSELoss() - adam = paddle.optimizer.Adam( - learning_rate=0.001, parameters=layer.parameters()) - - # 3. get data_parallel model using fleet - adam = fleet.distributed_optimizer(adam) - dp_layer = fleet.distributed_model(layer) - - # 4. run layer - inputs = paddle.randn([10, 10], 'float32') - outputs = dp_layer(inputs) - labels = paddle.randn([10, 1], 'float32') - loss = loss_fn(outputs, labels) - - print("loss:", loss.numpy()) - - loss.backward() - - adam.step() - adam.clear_grad() + >>> import paddle + >>> import paddle.nn as nn + >>> from paddle.distributed import fleet + + >>> class LinearNet(nn.Layer): + ... def __init__(self): + ... super().__init__() + ... self._linear1 = nn.Linear(10, 10) + ... self._linear2 = nn.Linear(10, 1) + ... def forward(self, x): + ... return self._linear2(self._linear1(x)) + + >>> # 1. initialize fleet environment + >>> fleet.init(is_collective=True) + + >>> # 2. create layer & optimizer + >>> layer = LinearNet() + >>> loss_fn = nn.MSELoss() + >>> adam = paddle.optimizer.Adam( + ... learning_rate=0.001, parameters=layer.parameters()) + + >>> # 3. get data_parallel model using fleet + >>> adam = fleet.distributed_optimizer(adam) + >>> dp_layer = fleet.distributed_model(layer) + + >>> # 4. run layer + >>> inputs = paddle.randn([10, 10], 'float32') + >>> outputs = dp_layer(inputs) + >>> labels = paddle.randn([10, 1], 'float32') + >>> loss = loss_fn(outputs, labels) + >>> print("loss:", loss.numpy()) + >>> loss.backward() + >>> adam.step() + >>> adam.clear_grad() """ diff --git a/python/paddle/distributed/fleet/optimizer.py b/python/paddle/distributed/fleet/optimizer.py index 9e693e670f3ff2945a9d846a75d99f89ddd9ff61..85c27fd138ef311164844dcf32aa51ddfe66d51c 100755 --- a/python/paddle/distributed/fleet/optimizer.py +++ b/python/paddle/distributed/fleet/optimizer.py @@ -37,12 +37,15 @@ def _dygraph_distributed_optimizer(optimizer, strategy=None): Fleet: instance of fleet. Examples: .. code-block:: python - import paddle - import paddle.distributed.fleet as fleet - fleet.init(is_collective=True) - strategy = fleet.DistributedStrategy() - optimizer = paddle.optimizer.SGD(learning_rate=0.001) - optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) + + >>> import paddle + >>> import paddle.distributed.fleet as fleet + >>> fleet.init(is_collective=True) + >>> strategy = fleet.DistributedStrategy() + >>> linear = paddle.nn.Linear(10, 10) + >>> optimizer = paddle.optimizer.SGD(learning_rate=0.001, parameters=linear.parameters()) + >>> optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) + """ fleet_env = fleet.fleet fleet_env.user_defined_optimizer = optimizer