[xdoctest] reformat example code with google style in No. 309 (#56596)

* input.py * Update python/paddle/nn/functional/input.py * Update input.py * Update all_gather.py * Update all_gather.py * xdoc * Apply suggestions from code review * Update python/paddle/distributed/models/moe/utils.py * Apply suggestions from code review Co-authored-by: N Nyakku Shigure <sigure.qaq@gmail.com> * Apply suggestions from code review * Apply suggestions from code review * Apply suggestions from code review --------- Co-authored-by: N Nyakku Shigure <sigure.qaq@gmail.com>

[xdoctest] reformat example code with google style in No. 309 (#56596)
* input.py * Update python/paddle/nn/functional/input.py * Update input.py * Update all_gather.py * Update all_gather.py * xdoc * Apply suggestions from code review * Update python/paddle/distributed/models/moe/utils.py * Apply suggestions from code review Co-authored-by: N Nyakku Shigure <sigure.qaq@gmail.com> * Apply suggestions from code review * Apply suggestions from code review * Apply suggestions from code review --------- Co-authored-by: N Nyakku Shigure <sigure.qaq@gmail.com>
f3fa2ed3 · 张春乔 · GitHub · 487660a1 · f3fa2ed3
隐藏空白更改
内联并排

Showing with 49 addition and 40 deletion

python/paddle/distributed/models/moe/utils.py python/paddle/distributed/models/moe/utils.py +49 -40

未找到文件。
--- a/python/paddle/distributed/models/moe/utils.py
+++ b/python/paddle/distributed/models/moe/utils.py
@@ -27,17 +27,17 @@ def _number_count(numbers, upper_range):
        out (Tensor): The output expert count.
    Examples:
        .. code-block:: python
-            # required: distributed
-            import paddle
+            >>> # doctest: +REQUIRES(env: DISTRIBUTED)
+            >>> import paddle
-            numbers = [
+            >>> from paddle.distributed.models.moe import utils
-                [0, 2],
+            >>> numbers = [[0, 2], [0, 2]]
-                [0, 2]
+            >>> upper_range = 6
-            ]
+            >>> numbers = paddle.to_tensor(numbers, dtype="int64")
-            upper_range = 6
+            >>> number_count = utils._number_count(numbers, upper_range)
-            numbers = paddle.to_tensor(numbers, dtype="int32")
+            >>> print(number_count)
-            number_count = paddle.distributed.utils.number_count(numbers, upper_range)
+            Tensor(shape=[6], dtype=int64, place=Place(gpu:0), stop_gradient=True,
-            print(number_count) # the result: [2, 0, 2, 0, 0, 0]
+            [2, 0, 2, 0, 0, 0])
    """
    if in_dynamic_mode():
        return _legacy_C_ops.number_count(numbers, 'upper_range', upper_range)
@@ -73,18 +73,18 @@ def _assign_pos(x, cum_count):
    Examples:
        .. code-block:: python
-            # required: distributed
+            >>> # doctest: +REQUIRES(env: DISTRIBUTED)
-            import paddle
+            >>> import paddle
-            number_count = [2, 0, 2, 0]
+            >>> from paddle.distributed.models.moe import utils
-            numbers = [
+            >>> number_count = [2, 0, 2, 0]
-                [0, 2],
+            >>> numbers = [[0, 2], [0, 2]]
-                [0, 2]
+            >>> number_count = paddle.to_tensor(number_count, dtype="int64")
-            ]
+            >>> numbers = paddle.to_tensor(numbers, dtype="int64")
-            number_count = paddle.to_tensor(number_count)
+            >>> num_cum = paddle.cumsum(number_count)
-            numbers = paddle.to_tensor(numbers, dtype="int32")
+            >>> pos = utils._assign_pos(x=numbers, cum_count=num_cum)
-            num_cum = paddle.cumsum(number_count)
+            >>> print(pos)
-            pos = paddle.distributed.utils.assign_pos(x=numbers, cum_count=num_cum)
+            Tensor(shape=[4], dtype=int64, place=Place(gpu:0), stop_gradient=True,
-            print(pos) # the result: (2, 0, 3, 1)
+            [2, 0, 3, 1])
    """
    if in_dynamic_mode():
        return _legacy_C_ops.assign_pos(x, cum_count, cum_count[-1])
@@ -140,15 +140,19 @@ def _limit_by_capacity(expert_count, capacity, n_worker):
        out (Tensor): The output expert count limit by capacity.
    Examples:
        .. code-block:: python
-            # required: distributed
-            import paddle
+            >>> # doctest: +REQUIRES(env: DISTRIBUTED)
-            expert_count = [1, 2, 2, 8, 3, 6]
+            >>> import paddle
-            capacity = [5, 5, 5]
+            >>> from paddle.distributed.models.moe import utils
-            n_work = 2
+            >>> expert_count = [1, 2, 2, 8, 3, 6]
-            expert_count = paddle.to_tensor(expert_count, dtype="int32")
+            >>> capacity = [5, 5, 5]
-            capacity = paddle.to_tensor(capacity, dtype="int32")
+            >>> n_work = 2
-            out = paddle.distributed.utils.limit_by_capacity(expert_count, capacity, n_work)
+            >>> expert_count = paddle.to_tensor(expert_count, dtype="int64")
-            print(out) # the result: [1, 2, 2, 4, 3, 3]
+            >>> capacity = paddle.to_tensor(capacity, dtype="int64")
+            >>> out = utils._limit_by_capacity(expert_count, capacity, n_work)
+            >>> print(out)
+            Tensor(shape=[6], dtype=int64, place=Place(gpu:0), stop_gradient=True,
+            [1, 2, 2, 4, 3, 3])
    """
    if in_dynamic_mode():
        return _legacy_C_ops.limit_by_capacity(
@@ -186,14 +190,19 @@ def _prune_gate_by_capacity(gate_idx, expert_count, n_expert, n_worker):
    Examples:
        .. code-block:: python
-            import paddle
+            >>> # doctest: +REQUIRES(env: DISTRIBUTED)
-            gate_idx = paddle.to_tensor([1, 3, 3, 3, 3, 2, 1, 1], dtype='int32')
+            >>> import paddle
-            expert_count = paddle.to_tensor([0, 3, 1, 3, 0, 0, 0, 0], dtype='int32')
+            >>> from paddle.distributed.models.moe import utils
-            n_worker = 1
+            >>> gate_idx = paddle.to_tensor([1, 3, 3, 3, 3, 2, 1, 1], dtype='int64')
-            new_gate_id = paddle.distributed.utils.prune_gate_by_capacity(gate_idx, expert_count, n_expert, n_worker)
+            >>> expert_count = paddle.to_tensor([0, 3, 1, 3, 0, 0, 0, 0], dtype='int64')
-            print(new_gate_id)
+            >>> n_worker = 1
-            # Tensor(shape=[8], dtype=int32, place=CUDAPlace(0), stop_gradient=True,
+            >>> n_expert = 8
-              [1, 3, 3, 3, -1, 2, 1, 1])
+            >>> new_gate_id = utils._prune_gate_by_capacity(
+            ...     gate_idx, expert_count, n_expert, n_worker
+            ... )
+            >>> print(new_gate_id)
+            Tensor(shape=[8], dtype=int64, place=Place(gpu:0), stop_gradient=True,
+            [1, 3, 3, 3, -1, 2, 1, 1])
    """
    if in_dynamic_mode():
        return _legacy_C_ops.prune_gate_by_capacity(