未验证 提交 17d6da6b 编写于 作者: P PommesPeter 提交者: GitHub

[xdoctest] reformat example code with google style No.186-190 (#56166)

* fix: updated code examples.

* fix: added paddle.seed

* fix: updated code style

* Apply suggestions from code review

* refactor: refine detail of code examples

* Update python/paddle/distributed/auto_parallel/static/process_mesh_v2.py

* fix: refine detail

* fix: refine detail

* Update python/paddle/distributed/auto_parallel/static/process_mesh_v2.py
Co-authored-by: NNyakku Shigure <sigure.qaq@gmail.com>

* refactor: refine detail

* refactor: refine detail

* fix: refine doc

---------
Co-authored-by: NNyakku Shigure <sigure.qaq@gmail.com>
上级 14b81d5a
...@@ -79,39 +79,39 @@ class Engine: ...@@ -79,39 +79,39 @@ class Engine:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.vision.transforms as T >>> import paddle.vision.transforms as T
from paddle.distributed.fleet import auto >>> from paddle.distributed.fleet import auto
from paddle.vision.datasets import MNIST >>> from paddle.vision.datasets import MNIST
transform = T.Compose([ >>> transform = T.Compose([
T.Transpose(), ... T.Transpose(),
T.Normalize([127.5], [127.5]) ... T.Normalize([127.5], [127.5])
]) >>> ])
train_dataset = MNIST(mode='train', transform=transform) >>> train_dataset = MNIST(mode='train', transform=transform)
valid_dataset = MNIST(mode='test', transform=transform) >>> valid_dataset = MNIST(mode='test', transform=transform)
model = paddle.vision.models.LeNet() >>> model = paddle.vision.models.LeNet()
loss = paddle.nn.CrossEntropyLoss() >>> loss = paddle.nn.CrossEntropyLoss()
optimizer = paddle.optimizer.Adam( >>> optimizer = paddle.optimizer.Adam(
learning_rate=0.001, parameters=model.parameters()) ... learning_rate=0.001, parameters=model.parameters())
metrics = paddle.metric.Accuracy(topk=(1, 2)) >>> metrics = paddle.metric.Accuracy(topk=(1, 2))
engine = auto.Engine(model, loss, optimizer, metrics) >>> engine = auto.Engine(model, loss, optimizer, metrics)
# fit >>> # fit
engine.fit(train_dataset, >>> engine.fit(train_dataset,
epochs=2, ... epochs=2,
batch_size=64) ... batch_size=64)
# evaluate >>> # evaluate
engine.evaluate(valid_dataset, >>> engine.evaluate(valid_dataset,
batch_size=64) ... batch_size=64)
# predict >>> # predict
engine.predict(valid_dataset, >>> engine.predict(valid_dataset,
batch_size=64) ... batch_size=64)
# save >>> # save
engine.save("./my_model") >>> engine.save("./my_model")
# load >>> # load
engine.load("./my_model") >>> engine.load("./my_model")
""" """
...@@ -918,27 +918,27 @@ class Engine: ...@@ -918,27 +918,27 @@ class Engine:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.vision.transforms as T >>> import paddle.vision.transforms as T
from paddle.distributed.fleet import auto >>> from paddle.distributed.fleet import auto
from paddle.vision.datasets import MNIST >>> from paddle.vision.datasets import MNIST
transform = T.Compose([ >>> transform = T.Compose([
T.Transpose(), ... T.Transpose(),
T.Normalize([127.5], [127.5]) ... T.Normalize([127.5], [127.5])
]) >>> ])
train_dataset = MNIST(mode='train', transform=transform) >>> train_dataset = MNIST(mode='train', transform=transform)
model = paddle.vision.models.LeNet() >>> model = paddle.vision.models.LeNet()
loss = paddle.nn.CrossEntropyLoss() >>> loss = paddle.nn.CrossEntropyLoss()
optimizer = paddle.optimizer.Adam( >>> optimizer = paddle.optimizer.Adam(
learning_rate=0.001, parameters=model.parameters()) ... learning_rate=0.001, parameters=model.parameters())
metrics = paddle.metric.Accuracy(topk=(1, 2)) >>> metrics = paddle.metric.Accuracy(topk=(1, 2))
engine = auto.Engine(model, loss, optimizer, metrics) >>> engine = auto.Engine(model, loss, optimizer, metrics)
engine.fit(train_dataset, >>> engine.fit(train_dataset,
epochs=2, ... epochs=2,
batch_size=64) ... batch_size=64)
""" """
self._mode = 'train' self._mode = 'train'
self._inputs_spec, self._labels_spec = self._prepare_data_spec( self._inputs_spec, self._labels_spec = self._prepare_data_spec(
...@@ -1071,23 +1071,23 @@ class Engine: ...@@ -1071,23 +1071,23 @@ class Engine:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.vision.transforms as T >>> import paddle.vision.transforms as T
from paddle.distributed.fleet import auto >>> from paddle.distributed.fleet import auto
from paddle.vision.datasets import MNIST >>> from paddle.vision.datasets import MNIST
transform = T.Compose([ >>> transform = T.Compose([
T.Transpose(), ... T.Transpose(),
T.Normalize([127.5], [127.5]) ... T.Normalize([127.5], [127.5])
]) >>> ])
valid_dataset = MNIST(mode='test', transform=transform) >>> valid_dataset = MNIST(mode='test', transform=transform)
model = paddle.vision.models.LeNet() >>> model = paddle.vision.models.LeNet()
loss = paddle.nn.CrossEntropyLoss() >>> loss = paddle.nn.CrossEntropyLoss()
metrics = paddle.metric.Accuracy(topk=(1, 2)) >>> metrics = paddle.metric.Accuracy(topk=(1, 2))
engine = auto.Engine(model, loss, metrics=metrics) >>> engine = auto.Engine(model, loss, metrics=metrics)
engine.evaluate(valid_dataset, batch_size=64) >>> engine.evaluate(valid_dataset, batch_size=64)
""" """
self._mode = 'eval' self._mode = 'eval'
...@@ -1181,21 +1181,21 @@ class Engine: ...@@ -1181,21 +1181,21 @@ class Engine:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.vision.transforms as T >>> import paddle.vision.transforms as T
from paddle.distributed.fleet import auto >>> from paddle.distributed.fleet import auto
from paddle.vision.datasets import MNIST >>> from paddle.vision.datasets import MNIST
transform = T.Compose([ >>> transform = T.Compose([
T.Transpose(), ... T.Transpose(),
T.Normalize([127.5], [127.5]) ... T.Normalize([127.5], [127.5])
]) >>> ])
valid_dataset = MNIST(mode='test', transform=transform) >>> valid_dataset = MNIST(mode='test', transform=transform)
model = paddle.vision.models.LeNet() >>> model = paddle.vision.models.LeNet()
engine = auto.Engine(model) >>> engine = auto.Engine(model)
engine.predict(valid_dataset, batch_size=64) >>> engine.predict(valid_dataset, batch_size=64)
""" """
self._mode = 'predict' self._mode = 'predict'
self._inputs_spec, self._labels_spec = self._prepare_data_spec( self._inputs_spec, self._labels_spec = self._prepare_data_spec(
...@@ -1650,28 +1650,29 @@ class Engine: ...@@ -1650,28 +1650,29 @@ class Engine:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle
import paddle.vision.transforms as T >>> import paddle
from paddle.distributed.fleet import auto >>> import paddle.vision.transforms as T
from paddle.vision.datasets import MNIST >>> from paddle.distributed.fleet import auto
>>> from paddle.vision.datasets import MNIST
transform = T.Compose([
T.Transpose(), >>> transform = T.Compose([
T.Normalize([127.5], [127.5]) ... T.Transpose(),
]) ... T.Normalize([127.5], [127.5])
train_dataset = MNIST(mode='train', transform=transform) >>> ])
>>> train_dataset = MNIST(mode='train', transform=transform)
model = paddle.vision.models.LeNet()
loss = paddle.nn.CrossEntropyLoss() >>> model = paddle.vision.models.LeNet()
optimizer = paddle.optimizer.Adam( >>> loss = paddle.nn.CrossEntropyLoss()
learning_rate=0.001, parameters=model.parameters()) >>> optimizer = paddle.optimizer.Adam(
metrics = paddle.metric.Accuracy(topk=(1, 2)) ... learning_rate=0.001, parameters=model.parameters())
>>> metrics = paddle.metric.Accuracy(topk=(1, 2))
engine = auto.Engine(model, loss, optimizer, metrics)
engine.fit(train_dataset, >>> engine = auto.Engine(model, loss, optimizer, metrics)
epochs=1, >>> engine.fit(train_dataset,
batch_size=64) ... epochs=1,
engine.save("./my_model") ... batch_size=64)
>>> engine.save("./my_model")
""" """
if training: if training:
...@@ -1734,29 +1735,30 @@ class Engine: ...@@ -1734,29 +1735,30 @@ class Engine:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle
import paddle.vision.transforms as T >>> import paddle
from paddle.distributed.fleet import auto >>> import paddle.vision.transforms as T
from paddle.vision.datasets import MNIST >>> from paddle.distributed.fleet import auto
>>> from paddle.vision.datasets import MNIST
transform = T.Compose([
T.Transpose(), >>> transform = T.Compose([
T.Normalize([127.5], [127.5]) ... T.Transpose(),
]) ... T.Normalize([127.5], [127.5])
train_dataset = MNIST(mode='train', transform=transform) >>> ])
>>> train_dataset = MNIST(mode='train', transform=transform)
model = paddle.vision.models.LeNet()
loss = paddle.nn.CrossEntropyLoss() >>> model = paddle.vision.models.LeNet()
optimizer = paddle.optimizer.Adam( >>> loss = paddle.nn.CrossEntropyLoss()
learning_rate=0.001, parameters=model.parameters()) >>> optimizer = paddle.optimizer.Adam(
metrics = paddle.metric.Accuracy(topk=(1, 2)) ... learning_rate=0.001, parameters=model.parameters())
>>> metrics = paddle.metric.Accuracy(topk=(1, 2))
engine = auto.Engine(model, loss, optimizer, metrics)
engine.fit(train_dataset, >>> engine = auto.Engine(model, loss, optimizer, metrics)
epochs=1, >>> engine.fit(train_dataset,
batch_size=64) ... epochs=1,
engine.save("./my_model") ... batch_size=64)
engine.load("./my_model") >>> engine.save("./my_model")
>>> engine.load("./my_model")
""" """
self._strict = strict self._strict = strict
......
...@@ -33,14 +33,13 @@ class ProcessMesh(core.ProcessMesh): ...@@ -33,14 +33,13 @@ class ProcessMesh(core.ProcessMesh):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.distributed as dist >>> import paddle.distributed as dist
>>> paddle.enable_static()
paddle.enable_static() >>> mesh = dist.ProcessMesh([[2, 4, 5], [0, 1, 3]])
>>> assert mesh.shape == [2, 3]
mesh = dist.ProcessMesh([[2, 4, 5], [0, 1, 3]]) >>> assert mesh.process_ids == [2, 4, 5, 0, 1, 3]
assert mesh.shape == [2, 3]
assert mesh.processe_ids == [2, 4, 5, 0, 1, 3]
""" """
......
...@@ -611,10 +611,18 @@ def save_distributed_checkpoint( ...@@ -611,10 +611,18 @@ def save_distributed_checkpoint(
Examples: Examples:
.. code-block:: python .. code-block:: python
path = os.path.join("./output", "step_%d" % step) >>> import os
os.makedirs(path, exist_ok=True) >>> from paddle.distributed.auto_parallel.static.utils import save_distributed_checkpoint
add_info = {'batch': step, "batch_size": global_batch_size}
save_distributed_checkpoint(program, path, path, add_info) >>> step = 16000
>>> global_batch_size = 32
>>> path = os.path.join("./output", "step_%d" % step)
>>> os.makedirs(path, exist_ok=True)
>>> program = paddle.static.Program()
>>> add_info = {'batch': step, "batch_size": global_batch_size}
>>> save_distributed_checkpoint(program, path, path, add_info)
""" """
from .dist_context import get_default_distributed_context from .dist_context import get_default_distributed_context
...@@ -653,11 +661,18 @@ def load_distributed_checkpoint(checkpoint_path, dist_attr_path): ...@@ -653,11 +661,18 @@ def load_distributed_checkpoint(checkpoint_path, dist_attr_path):
Examples: Examples:
.. code-block:: python .. code-block:: python
ckpt_path = ['./model_state_rank0.pdmodel', >>> # doctest: +SKIP('Depends on external files.')
'./model_state_rank1.pdmodel'] >>> from paddle.distributed.auto_parallel.static.utils import load_distributed_checkpoint
dist_attr_path = ['./dist_attr_rank0.pdattr',
'./dist_attr_rank1.pdattr'] >>> ckpt_path = [
param_dict, dist_attr, add_info = load_distributed_checkpoint(ckpt_path, dist_attr_path) ... './model_state_rank0.pdmodel',
... './model_state_rank1.pdmodel',
... ]
>>> dist_attr_path = [
... './dist_attr_rank0.pdattr',
... './dist_attr_rank1.pdattr',
... ]
>>> param_dict, dist_attr, add_info = load_distributed_checkpoint(ckpt_path, dist_attr_path)
""" """
assert _check_valid_path( assert _check_valid_path(
checkpoint_path checkpoint_path
...@@ -692,12 +707,19 @@ def load_checkpoint_into_program( ...@@ -692,12 +707,19 @@ def load_checkpoint_into_program(
Examples: Examples:
.. code-block:: python .. code-block:: python
exe.run(startup_program) >>> # doctest: +SKIP('Depends on external files.')
ckpt_path = ['./model_state_rank0.pdmodel', >>> from paddle.distributed.auto_parallel.static.utils import load_checkpoint_into_program
'./model_state_rank1.pdmodel']
dist_attr_path = ['./dist_attr_rank0.pdattr', >>> exe.run(startup_program)
'./dist_attr_rank1.pdattr'] >>> ckpt_path = [
load_checkpoint_into_program(ckpt_path, dist_attr_path, main_program) ... './model_state_rank0.pdmodel',
... './model_state_rank1.pdmodel',
... ]
>>> dist_attr_path = [
... './dist_attr_rank0.pdattr',
... './dist_attr_rank1.pdattr',
... ]
>>> load_checkpoint_into_program(ckpt_path, dist_attr_path, main_program)
""" """
from .dist_context import get_default_distributed_context from .dist_context import get_default_distributed_context
...@@ -999,13 +1021,18 @@ def _merge_parameter( ...@@ -999,13 +1021,18 @@ def _merge_parameter(
Examples: Examples:
.. code-block:: python .. code-block:: python
import numpy as np >>> import numpy as np
partition_param_list = [(np.array([[[1.11, 1.12]]]), [[0,1],[0,1],[0,2]])] >>> from paddle.distributed.auto_parallel.static.utils import _merge_parameter
param = np.array([[[1.13, 1.14]]])
partition_index = [[0,1],[0,1],[2,4]] >>> partition_param_list = [(np.array([[[1.11, 1.12]]]), [[0, 1],[0, 1],[0, 2]])]
>>> param = np.array([[[1.13, 1.14]]])
>>> partition_index = [[0, 1],[0, 1],[2, 4]]
>>> complete_shape = [2, 2, 4]
>>> _merge_parameter(partition_param_list, param, partition_index, complete_shape)
>>> print(partition_param_list)
[(array([[[1.11, 1.12, 1.13, 1.14]]]), [[0, 1],[0, 1],[0, 4]])]
_merge_parameter(partition_param_list, param, partition_index)
# partition_param_list: [(np.array([[[1.11, 1.12, 1.13, 1.14]]]), [[0,1],[0,1],[0,4]])]
""" """
from .reshard import Resharder from .reshard import Resharder
...@@ -1061,16 +1088,20 @@ def _slice_parameter(complete_param, partition_index_list, length): ...@@ -1061,16 +1088,20 @@ def _slice_parameter(complete_param, partition_index_list, length):
Examples: Examples:
.. code-block:: python .. code-block:: python
import numpy as np >>> import numpy as np
complete_param = np.array([[[1.11, 1.12, 1.13, 1.14, 1.15, 1.16]]]) >>> from paddle.distributed.auto_parallel.static.utils import _slice_parameter
rank = 2
complete_shape = [1, 1, 6] >>> complete_param = np.array([[[1.11, 1.12, 1.13, 1.14, 1.15, 1.16]]])
dims_mapping = [-1, -1, 0] >>> rank = 2
process_shape = [3] >>> complete_shape = [1, 1, 6]
process_group = [0, 1, 2] >>> dims_mapping = [-1, -1, 0]
>>> process_shape = [3]
>>> process_group = [0, 1, 2]
>>> sliced_param_list = _slice_parameter(complete_param, [[], [], [2, 4]], 3)
>>> print(sliced_param_list)
[array([[[1.11, 1.12]]]), array([[[1.13, 1.14]]]), array([[[1.15, 1.16]]])]
sliced_param_list = _slice_parameter(complete_param, [[], [], [2, 4]], 3)
# [array([[[1.11, 1.12]]]), array([[[1.13, 1.14]]]), array([[[1.15, 1.16]]])]
""" """
sliced_param_list = [] sliced_param_list = []
axis = len(complete_param.shape) - length axis = len(complete_param.shape) - length
...@@ -1098,21 +1129,24 @@ def _get_sliced_param_index( ...@@ -1098,21 +1129,24 @@ def _get_sliced_param_index(
Examples: Examples:
.. code-block:: python .. code-block:: python
import numpy as np >>> import numpy as np
complete_param = np.array([[[1.11, 1.12, 1.13, 1.14, 1.15, 1.16]]]) >>> from paddle.distributed.auto_parallel.static.utils import _get_sliced_param_index
rank = 2
complete_shape = [1, 1, 6] >>> complete_param = np.array([[[1.11, 1.12, 1.13, 1.14, 1.15, 1.16]]])
dims_mapping = [-1, -1, 0] >>> rank = 2
process_shape = [3] >>> complete_shape = [1, 1, 6]
process_group = [0, 1, 2] >>> dims_mapping = [-1, -1, 0]
>>> process_shape = [3]
slice_param = _slice_parameter(complete_param, [[], [], [2, 4]], 3) >>> process_group = [0, 1, 2]
# slice_param:
# [array([[[1.11, 1.12]]]), array([[[1.13, 1.14]]]), array([[[1.15, 1.16]]])] >>> slice_param = _slice_parameter(complete_param, [[], [], [2, 4]], 3)
>>> print(slice_param)
index = _get_sliced_param_index(rank, complete_shape, dims_mapping [array([[[1.11, 1.12]]]), array([[[1.13, 1.14]]]), array([[[1.15, 1.16]]])]
process_shape, process_group)
# index: 2 >>> index = _get_sliced_param_index(rank, complete_shape, dims_mapping,
... process_shape, process_group)
>>> print(index)
2
""" """
from .reshard import Resharder from .reshard import Resharder
...@@ -1145,15 +1179,18 @@ def _get_split_indices( ...@@ -1145,15 +1179,18 @@ def _get_split_indices(
Examples: Examples:
.. code-block:: python .. code-block:: python
import numpy as np >>> import numpy as np
complete_param = np.array([[[1.11, 1.12, 1.13, 1.14, 1.15, 1.16]]]) >>> from paddle.distributed.auto_parallel.static.utils import _get_split_indices
complete_shape = [1, 1, 6]
dims_mapping = [-1, -1, 0] >>> complete_param = np.array([[[1.11, 1.12, 1.13, 1.14, 1.15, 1.16]]])
process_shape = [3] >>> complete_shape = [1, 1, 6]
process_group = [0, 1, 2] >>> dims_mapping = [-1, -1, 0]
>>> process_shape = [3]
>>> process_group = [0, 1, 2]
index = _get_split_indices(complete_shape, dims_mapping, process_shape, process_group) >>> index = _get_split_indices(complete_shape, dims_mapping, process_shape, process_group)
# index: [[], [], [2, 4]] >>> print(index)
[[], [], [2, 4]]
""" """
from .reshard import Resharder from .reshard import Resharder
...@@ -2410,19 +2447,26 @@ def wrap_data_for_completion( ...@@ -2410,19 +2447,26 @@ def wrap_data_for_completion(
output_specs: list, DistTensorSpec for each output tensor of the dist_op output_specs: list, DistTensorSpec for each output tensor of the dist_op
attrs: dict, attribute map of the dist op attrs: dict, attribute map of the dist op
Usage: Examples:
op_desc = dist_op.serial_op.desc .. code-block:: python
input_name_list = []
output_name_list = [] >>> # doctest: +SKIP('Depends on other ops.')
input_name_list.append(op_desc.input('X')[0]) # 'X' is the arg name for op >>> from paddle.distributed.auto_parallel.static.utils import wrap_data_for_completion
input_name_list.append(op_desc.input('Y')[0])
output_name_list.append(op_desc.output('Out')[0]) >>> op_desc = dist_op.serial_op.desc
attr_name_list = ['trans_x', 'trans_y'] >>> input_name_list = []
input_specs, output_specs, attrs = wrap_data_for_completion( >>> output_name_list = []
dist_op,
input_name_list, >>> input_name_list.append(op_desc.input('X')[0]) # 'X' is the arg name for op
output_name_list, >>> input_name_list.append(op_desc.input('Y')[0])
attr_name_list) >>> output_name_list.append(op_desc.output('Out')[0])
>>> attr_name_list = ['trans_x', 'trans_y']
>>> input_specs, output_specs, attrs = wrap_data_for_completion(
... dist_op,
... input_name_list,
... output_name_list,
... attr_name_list)
""" """
......
...@@ -152,20 +152,21 @@ class Strategy(BaseConfig): ...@@ -152,20 +152,21 @@ class Strategy(BaseConfig):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
from paddle.distributed.fleet import auto >>> from paddle.distributed.fleet import auto
strategy = auto.Strategy() >>> strategy = auto.Strategy()
sharding = strategy.sharding >>> sharding = strategy.sharding
self.assertEqual(sharding.enabled, False) >>> assert sharding.enable == False
self.assertEqual(sharding.stage, 1) >>> assert sharding.stage == 1
self.assertEqual(sharding.degree, 8) >>> assert sharding.degree == 8
sharding.enabled = True
sharding.stage = 2 >>> sharding.enable = True
sharding.degree = 2 >>> sharding.stage = 2
self.assertEqual(sharding.enabled, True) >>> sharding.degree = 2
self.assertEqual(sharding.stage, 2) >>> assert sharding.enable == True
self.assertEqual(sharding.degree, 2) >>> assert sharding.stage == 2
>>> assert sharding.degree == 2
""" """
......
...@@ -188,12 +188,13 @@ def new_group(ranks=None, backend=None, timeout=_default_timeout): ...@@ -188,12 +188,13 @@ def new_group(ranks=None, backend=None, timeout=_default_timeout):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> # doctest: +REQUIRES(env: DISTRIBUTED)
>>> import paddle
paddle.distributed.init_parallel_env() >>> paddle.distributed.init_parallel_env()
tindata = paddle.randn(shape=[2, 3]) >>> tindata = paddle.randn(shape=[2, 3])
gp = paddle.distributed.new_group([2,4,6]) >>> gp = paddle.distributed.new_group([2, 4, 6])
paddle.distributed.all_reduce(tindata, group=gp, sync_op=False) >>> paddle.distributed.all_reduce(tindata, group=gp, sync_op=False)
""" """
global _custom_gid global _custom_gid
...@@ -310,9 +311,8 @@ def is_available(): ...@@ -310,9 +311,8 @@ def is_available():
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> print(paddle.distributed.is_available())
print(paddle.distributed.is_available())
""" """
return core.is_compiled_with_dist() return core.is_compiled_with_dist()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册