未验证 提交 f3fa2ed3 编写于 作者: 张春乔 提交者: GitHub

[xdoctest] reformat example code with google style in No. 309 (#56596)

* input.py

* Update python/paddle/nn/functional/input.py

* Update input.py

* Update all_gather.py

* Update all_gather.py

* xdoc

* Apply suggestions from code review

* Update python/paddle/distributed/models/moe/utils.py

* Apply suggestions from code review
Co-authored-by: NNyakku Shigure <sigure.qaq@gmail.com>

* Apply suggestions from code review

* Apply suggestions from code review

* Apply suggestions from code review

---------
Co-authored-by: NNyakku Shigure <sigure.qaq@gmail.com>
上级 487660a1
...@@ -27,17 +27,17 @@ def _number_count(numbers, upper_range): ...@@ -27,17 +27,17 @@ def _number_count(numbers, upper_range):
out (Tensor): The output expert count. out (Tensor): The output expert count.
Examples: Examples:
.. code-block:: python .. code-block:: python
# required: distributed
import paddle >>> # doctest: +REQUIRES(env: DISTRIBUTED)
>>> import paddle
numbers = [ >>> from paddle.distributed.models.moe import utils
[0, 2], >>> numbers = [[0, 2], [0, 2]]
[0, 2] >>> upper_range = 6
] >>> numbers = paddle.to_tensor(numbers, dtype="int64")
upper_range = 6 >>> number_count = utils._number_count(numbers, upper_range)
numbers = paddle.to_tensor(numbers, dtype="int32") >>> print(number_count)
number_count = paddle.distributed.utils.number_count(numbers, upper_range) Tensor(shape=[6], dtype=int64, place=Place(gpu:0), stop_gradient=True,
print(number_count) # the result: [2, 0, 2, 0, 0, 0] [2, 0, 2, 0, 0, 0])
""" """
if in_dynamic_mode(): if in_dynamic_mode():
return _legacy_C_ops.number_count(numbers, 'upper_range', upper_range) return _legacy_C_ops.number_count(numbers, 'upper_range', upper_range)
...@@ -73,18 +73,18 @@ def _assign_pos(x, cum_count): ...@@ -73,18 +73,18 @@ def _assign_pos(x, cum_count):
Examples: Examples:
.. code-block:: python .. code-block:: python
# required: distributed >>> # doctest: +REQUIRES(env: DISTRIBUTED)
import paddle >>> import paddle
number_count = [2, 0, 2, 0] >>> from paddle.distributed.models.moe import utils
numbers = [ >>> number_count = [2, 0, 2, 0]
[0, 2], >>> numbers = [[0, 2], [0, 2]]
[0, 2] >>> number_count = paddle.to_tensor(number_count, dtype="int64")
] >>> numbers = paddle.to_tensor(numbers, dtype="int64")
number_count = paddle.to_tensor(number_count) >>> num_cum = paddle.cumsum(number_count)
numbers = paddle.to_tensor(numbers, dtype="int32") >>> pos = utils._assign_pos(x=numbers, cum_count=num_cum)
num_cum = paddle.cumsum(number_count) >>> print(pos)
pos = paddle.distributed.utils.assign_pos(x=numbers, cum_count=num_cum) Tensor(shape=[4], dtype=int64, place=Place(gpu:0), stop_gradient=True,
print(pos) # the result: (2, 0, 3, 1) [2, 0, 3, 1])
""" """
if in_dynamic_mode(): if in_dynamic_mode():
return _legacy_C_ops.assign_pos(x, cum_count, cum_count[-1]) return _legacy_C_ops.assign_pos(x, cum_count, cum_count[-1])
...@@ -140,15 +140,19 @@ def _limit_by_capacity(expert_count, capacity, n_worker): ...@@ -140,15 +140,19 @@ def _limit_by_capacity(expert_count, capacity, n_worker):
out (Tensor): The output expert count limit by capacity. out (Tensor): The output expert count limit by capacity.
Examples: Examples:
.. code-block:: python .. code-block:: python
# required: distributed
import paddle >>> # doctest: +REQUIRES(env: DISTRIBUTED)
expert_count = [1, 2, 2, 8, 3, 6] >>> import paddle
capacity = [5, 5, 5] >>> from paddle.distributed.models.moe import utils
n_work = 2 >>> expert_count = [1, 2, 2, 8, 3, 6]
expert_count = paddle.to_tensor(expert_count, dtype="int32") >>> capacity = [5, 5, 5]
capacity = paddle.to_tensor(capacity, dtype="int32") >>> n_work = 2
out = paddle.distributed.utils.limit_by_capacity(expert_count, capacity, n_work) >>> expert_count = paddle.to_tensor(expert_count, dtype="int64")
print(out) # the result: [1, 2, 2, 4, 3, 3] >>> capacity = paddle.to_tensor(capacity, dtype="int64")
>>> out = utils._limit_by_capacity(expert_count, capacity, n_work)
>>> print(out)
Tensor(shape=[6], dtype=int64, place=Place(gpu:0), stop_gradient=True,
[1, 2, 2, 4, 3, 3])
""" """
if in_dynamic_mode(): if in_dynamic_mode():
return _legacy_C_ops.limit_by_capacity( return _legacy_C_ops.limit_by_capacity(
...@@ -186,14 +190,19 @@ def _prune_gate_by_capacity(gate_idx, expert_count, n_expert, n_worker): ...@@ -186,14 +190,19 @@ def _prune_gate_by_capacity(gate_idx, expert_count, n_expert, n_worker):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> # doctest: +REQUIRES(env: DISTRIBUTED)
gate_idx = paddle.to_tensor([1, 3, 3, 3, 3, 2, 1, 1], dtype='int32') >>> import paddle
expert_count = paddle.to_tensor([0, 3, 1, 3, 0, 0, 0, 0], dtype='int32') >>> from paddle.distributed.models.moe import utils
n_worker = 1 >>> gate_idx = paddle.to_tensor([1, 3, 3, 3, 3, 2, 1, 1], dtype='int64')
new_gate_id = paddle.distributed.utils.prune_gate_by_capacity(gate_idx, expert_count, n_expert, n_worker) >>> expert_count = paddle.to_tensor([0, 3, 1, 3, 0, 0, 0, 0], dtype='int64')
print(new_gate_id) >>> n_worker = 1
# Tensor(shape=[8], dtype=int32, place=CUDAPlace(0), stop_gradient=True, >>> n_expert = 8
[1, 3, 3, 3, -1, 2, 1, 1]) >>> new_gate_id = utils._prune_gate_by_capacity(
... gate_idx, expert_count, n_expert, n_worker
... )
>>> print(new_gate_id)
Tensor(shape=[8], dtype=int64, place=Place(gpu:0), stop_gradient=True,
[1, 3, 3, 3, -1, 2, 1, 1])
""" """
if in_dynamic_mode(): if in_dynamic_mode():
return _legacy_C_ops.prune_gate_by_capacity( return _legacy_C_ops.prune_gate_by_capacity(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册