fix api format and example (#14686)

* fix api format and examples test=develop * Update executor.py test=develop * Update nn.py * Update nn.py test=develop * Update nn.py test=develop

fix api format and example (#14686)
* fix api format and examples test=develop * Update executor.py test=develop * Update nn.py * Update nn.py test=develop * Update nn.py test=develop
6d04a9cf · Tink_Y · Cheerego · ff423730 · 6d04a9cf · 6d04a9cf
9 changed file
--- a/python/paddle/fluid/clip.py
+++ b/python/paddle/fluid/clip.py
@@ -134,12 +134,12 @@ class GradientClipByValue(BaseGradientClipAttr):
    Examples:
        .. code-block:: python

-            w_param_attrs = ParamAttr(name=None,
-              initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
+            w_param_attrs = fluid.ParamAttr(name=None,
+              initializer=fluid.initializer.UniformInitializer(low=-1.0, high=1.0, seed=0),
              learning_rate=1.0,
-              regularizer=L1Decay(1.0),
+              regularizer=fluid.regularizer.L1Decay(1.0),
              trainable=True,
-              clip=GradientClipByValue(-1.0, 1.0))
+              clip=fluid.clip.GradientClipByValue(-1.0, 1.0))
            y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
    """

@@ -185,12 +185,12 @@ class GradientClipByNorm(BaseGradientClipAttr):
    Examples:
        .. code-block:: python

-            w_param_attrs = ParamAttr(name=None,
-              initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
+            w_param_attrs = flui.ParamAttr(name=None,
+              initializer=fluid.initializer.UniformInitializer(low=-1.0, high=1.0, seed=0),
              learning_rate=1.0,
-              regularizer=L1Decay(1.0),
+              regularizer=fluid.regularizer.L1Decay(1.0),
              trainable=True,
-              clip=GradientClipByNorm(clip_norm=2.0))
+              clip=fluid.clip.GradientClipByNorm(clip_norm=2.0))
            y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)

    """

--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -20,7 +20,7 @@ import six
 from .framework import Program, default_main_program, Variable
 from . import core

-__all__ = ['Executor', 'global_scope', 'scope_guard', '_switch_scope']
+__all__ = ['Executor', 'global_scope', 'scope_guard']

 g_scope = core.Scope()

@@ -407,16 +407,17 @@ class Executor(object):

        Examples:

-            >>> data = layers.data(name='X', shape=[1], dtype='float32')
-            >>> hidden = layers.fc(input=data, size=10)
-            >>> layers.assign(hidden, out)
-            >>> loss = layers.mean(out)
+            >>> data = fluid.layers.data(name='X', shape=[1], dtype='float32')
+            >>> out = fluid.layers.create_tensor(dtype='float32')
+            >>> hidden = fluid.layers.fc(input=data, size=10)
+            >>> fluid.layers.assign(hidden,out)
+            >>> loss = fluid.layers.mean(out)
            >>> adam = fluid.optimizer.Adam()
-            >>> adam.minimize(loss)
+						>>> adam.minimize(loss)

            >>> cpu = core.CPUPlace()
-            >>> exe = Executor(cpu)
-            >>> exe.run(default_startup_program())
+            >>> exe = fluid.Executor(cpu)
+            >>> exe.run(fluid.default_startup_program())

            >>> x = numpy.random.random(size=(10, 1)).astype('float32')
            >>> outs = exe.run(

--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -89,12 +89,13 @@ def name_scope(prefix=None):

    Examples:
        .. code-block:: python
+
          with name_scope("encoder"):
             ...
          with name_scope("decoder"):
             ...
-             with name_scope("attention"):
-                ...
+          with name_scope("attention"):
+             ...
    """
    # TODO(panyx0718): Only [0-9a-z].
    assert prefix, "namescope prefix cannot be empty."

--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -943,7 +943,18 @@ def __create_unshared_decorated_reader__(op_type, reader, attrs, name=None):

 def shuffle(reader, buffer_size):
    """
-    Shuffle the reader.
+    Creates a data reader whose data output is shuffled.
+    Output from the iterator that created by original reader will be
+    buffered into shuffle buffer, and then shuffled. The size of shuffle buffer
+    is determined by argument buf_size.
+
+    Args:
+        param reader: the original reader whose output will be shuffled.
+        type reader: callable
+        param buf_size: shuffle buffer size.
+        type buf_size: int
+        return: the new reader whose output is shuffled.
+        rtype: callable
    """
    return __create_unshared_decorated_reader__(
        'create_shuffle_reader', reader, {'buffer_size': int(buffer_size)})

--- a/python/paddle/fluid/layers/learning_rate_scheduler.py
+++ b/python/paddle/fluid/layers/learning_rate_scheduler.py
@@ -308,13 +308,9 @@ def piecewise_decay(boundaries, values):


 def append_LARS(params_grads, learning_rate, weight_decay):
-    """Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
-       each layer.
-
-    ```python
-        learning_rate *= local_gw_ratio * sqrt(sumsq(param))
-                        / (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
-    ```
+    """
+    Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
+    each layer.

    Args:
        learning_rate: A learning rate Variable. This
@@ -323,6 +319,11 @@ def append_LARS(params_grads, learning_rate, weight_decay):

    Returns:
        The decayed learning rate
+    Examples:
+        .. code-block:: python
+        
+            learning_rate *= local_gw_ratio * sqrt(sumsq(param))
+                        / (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
    """

    def _balanced_weight(param_norm, grad_norm):

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -928,7 +928,7 @@ def dynamic_gru(input,
            emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
            hidden_dim = 512
            x = fluid.layers.fc(input=emb, size=hidden_dim * 3)
-            hidden = fluid.layers.dynamic_gru(input=x, dim=hidden_dim)
+            hidden = fluid.layers.dynamic_gru(input=x, size=hidden_dim)
    """

    helper = LayerHelper('gru', **locals())
@@ -3586,6 +3586,7 @@ def beam_search_decode(ids, scores, beam_size, end_id, name=None):

    Examples:
        .. code-block:: python
+
            # Suppose `ids` and `scores` are LodTensorArray variables reserving
            # the selected ids and scores of all steps
            finished_ids, finished_scores = layers.beam_search_decode(
@@ -5083,7 +5084,7 @@ def im2sequence(input,

            output.lod = [[4, 4]]

-     Examples:
+    Examples:

        .. code-block:: python

@@ -5870,24 +5871,23 @@ def pad_constant_like(x, y, pad_value=0., name=None):
                  [[38, 39, 40]],
                  [[41, 42, 43]]]]
            Y.shape = (1, 3, 1, 3)
+		And
+            pad_value = -1,

-    And
-        pad_value = -1,
-
-    Return:
-        Out = [[[[35, 36, 37],
-                  [-1, -1, -1]],
-                [[38, 39, 40],
-                  [-1, -1, -1]],
-                 [[41, 42, 43],
-                  [-1, -1, -1]]],
-                [[[-1, -1, -1],
-                  [-1, -1, -1]],
-                 [[-1, -1, -1],
-                  [-1, -1, -1]],
-                 [[-1, -1, -1],
-                  [-1, -1, -1]]]]
-        Out.shape = (2, 3, 2, 3)
+        Return:
+            Out = [[[[35, 36, 37],
+                     [-1, -1, -1]],
+                    [[38, 39, 40],
+                     [-1, -1, -1]],
+                    [[41, 42, 43],
+                     [-1, -1, -1]]],
+                  [[[-1, -1, -1],
+                    [-1, -1, -1]],
+                   [[-1, -1, -1],
+                    [-1, -1, -1]],
+                   [[-1, -1, -1],
+                    [-1, -1, -1]]]]
+            Out.shape = (2, 3, 2, 3)

    Args:
        x (Variable): The input tensor variable.
@@ -6126,6 +6126,7 @@ def image_resize(input,
    Supporting resample methods:

        'BILINEAR' : Bilinear interpolation
+
        'NEAREST' : Nearest neighbor interpolation

    Args:
@@ -6781,7 +6782,7 @@ def crop(x, shape=None, offsets=None, name=None):

            # or
            z = fluid.layers.data(name="z", shape=[3, 5], dtype="float32")
-            crop = fluid.layers.crop(z, shape=[2, 3])
+            crop = fluid.layers.crop(z, shape=[-1, 2, 3])

    """
    helper = LayerHelper('crop', **locals())
@@ -7062,39 +7063,40 @@ def pad2d(input,
    than height-1. And the width dimension has the same condition.

    Example:
+        .. code-block:: text

-      Given that X is a channel of image from input:
+	      Given that X is a channel of image from input:

-      X = [[1, 2, 3],
-           [4, 5, 6]]
+	      X = [[1, 2, 3],
+		   [4, 5, 6]]

-      Case 0:
+	      Case 0:

-        paddings = [0, 1, 2, 3],
-        mode = 'constant'
-        pad_value = 0
+		paddings = [0, 1, 2, 3],
+		mode = 'constant'
+		pad_value = 0

-        Out = [[0, 0, 1, 2, 3, 0, 0, 0]
-               [0, 0, 4, 5, 6, 0, 0, 0]
-               [0, 0, 0, 0, 0, 0, 0, 0]]
+		Out = [[0, 0, 1, 2, 3, 0, 0, 0]
+		       [0, 0, 4, 5, 6, 0, 0, 0]
+		       [0, 0, 0, 0, 0, 0, 0, 0]]

-      Case 1:
+	      Case 1:

-        paddings = [0, 1, 2, 1],
-        mode = 'reflect'
+		paddings = [0, 1, 2, 1],
+		mode = 'reflect'

-        Out = [[3, 2, 1, 2, 3, 2]
-               [6, 5, 4, 5, 6, 5]
-               [3, 2, 1, 2, 3, 2]]
+		Out = [[3, 2, 1, 2, 3, 2]
+		       [6, 5, 4, 5, 6, 5]
+		       [3, 2, 1, 2, 3, 2]]

-      Case 2:
+	      Case 2:

-        paddings = [0, 1, 2, 1],
-        mode = 'edge'
+		paddings = [0, 1, 2, 1],
+		mode = 'edge'

-        Out = [[1, 1, 1, 2, 3, 3]
-               [4, 4, 4, 5, 6, 6]
-               [4, 4, 4, 5, 6, 6]]
+		Out = [[1, 1, 1, 2, 3, 3]
+		       [4, 4, 4, 5, 6, 6]
+		       [4, 4, 4, 5, 6, 6]]


    Args:
@@ -7332,13 +7334,13 @@ def prelu(x, mode, param_attr=None, name=None):
    Args:
        x (Variable): The input tensor.
        param_attr(ParamAttr|None): The parameter attribute for the learnable
-                       weight (alpha).
+          weight (alpha).
        mode (string): The mode for weight sharing. It supports all, channel
-                       and element. all: all elements share same weight
-                       channel:elements in a channel share same weight
-                       element:each element has a weight
+          and element. all: all elements share same weight
+          channel:elements in a channel share same weight
+          element:each element has a weight
        name(str|None): A name for this layer(optional). If set None, the layer
-                       will be named automatically.
+          will be named automatically.

    Returns:
        Variable: The output tensor with the same shape as input.

--- a/python/paddle/fluid/metrics.py
+++ b/python/paddle/fluid/metrics.py
@@ -222,13 +222,13 @@ class Precision(MetricBase):
    Examples:
        .. code-block:: python

-        metric = fluid.metrics.Precision()
-        for pass in range(PASSES):
-            metric.reset()
-            for data in train_reader():
-                loss, preds, labels = exe.run(fetch_list=[cost, preds, labels])
-            metric.update(preds=preds, labels=labels)
-            numpy_precision = metric.eval()
+            metric = fluid.metrics.Precision()
+            for pass in range(PASSES):
+                metric.reset()
+                for data in train_reader():
+                    loss, preds, labels = exe.run(fetch_list=[cost, preds, labels])
+                metric.update(preds=preds, labels=labels)
+                numpy_precision = metric.eval()
    """

    def __init__(self, name=None):
@@ -267,13 +267,13 @@ class Recall(MetricBase):
    Examples:
        .. code-block:: python

-        metric = fluid.metrics.Recall()
-        for pass in range(PASSES):
-            metric.reset()
-            for data in train_reader():
-                loss, preds, labels = exe.run(fetch_list=[cost, preds, labels])
-            metric.update(preds=preds, labels=labels)
-            numpy_recall = metric.eval()
+            metric = fluid.metrics.Recall()
+            for pass in range(PASSES):
+                metric.reset()
+                for data in train_reader():
+                    loss, preds, labels = exe.run(fetch_list=[cost, preds, labels])
+                metric.update(preds=preds, labels=labels)
+                numpy_recall = metric.eval()
    """

    def __init__(self, name=None):
@@ -449,8 +449,9 @@ class EditDistance(MetricBase):
                distance_evaluator.update(distances, seq_num)
                distance, instance_error = distance_evaluator.eval()

-        In the above example:
+    In the above example:
        'distance' is the average of the edit distance in a pass.
+
        'instance_error' is the instance error rate in a pass.

    """

--- a/python/paddle/fluid/param_attr.py
+++ b/python/paddle/fluid/param_attr.py
@@ -50,8 +50,9 @@ class ParamAttr(object):

            w_param_attrs = fluid.ParamAttr(name="fc_weight",
                                            learning_rate=0.5,
-                                            regularizer=fluid.L2Decay(1.0),
+                                            regularizer=fluid.regularizer.L2Decay(1.0),
                                            trainable=True)
+	    x = fluid.layers.data(name='X', shape=[1], dtype='float32')
            y_predict = fluid.layers.fc(input=x, size=10, param_attr=w_param_attrs)
    """


--- a/python/paddle/fluid/transpiler/distribute_transpiler.py
+++ b/python/paddle/fluid/transpiler/distribute_transpiler.py
@@ -125,13 +125,14 @@ def slice_variable(var_list, slice_count, min_block_size):

 class DistributeTranspilerConfig(object):
    """
-    slice_var_up (bool): Do Tensor slice for pservers, default is True.
-    split_method (PSDispatcher): RoundRobin or HashName can be used
-        try to choose the best method to balance loads for pservers.
-    min_block_size (int): Minimum splitted element number in block.
-        According:https://github.com/PaddlePaddle/Paddle/issues/8638#issuecomment-369912156
-        We can use bandwidth effiently when data size is larger than 2MB.If you
-        want to change it, please be sure you see the slice_variable function.
+    Args:
+        slice_var_up (bool): Do Tensor slice for pservers, default is True.
+        split_method (PSDispatcher): RoundRobin or HashName can be used
+          try to choose the best method to balance loads for pservers.
+        min_block_size (int): Minimum splitted element number in block.
+          According:https://github.com/PaddlePaddle/Paddle/issues/8638#issuecomment-369912156
+          We can use bandwidth effiently when data size is larger than 2MB.If you
+          want to change it, please be sure you see the slice_variable function.
    """

    slice_var_up = True
@@ -163,35 +164,35 @@ class DistributeTranspiler(object):
    Examples:
        .. code-block:: python

-           # for pserver mode
-           pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
-           trainer_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
-           current_endpoint = "192.168.0.1:6174"
-           trainer_id = 0
-           trainers = 4
-           role = os.getenv("PADDLE_TRAINING_ROLE")
-
-           t = fluid.DistributeTranspiler()
-           t.transpile(
-                trainer_id, pservers=pserver_endpoints, trainers=trainers)
-           if role == "PSERVER":
-                pserver_program = t.get_pserver_program(current_endpoint)
-                pserver_startup_program = t.get_startup_program(current_endpoint,
+            # for pserver mode
+            pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
+            trainer_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
+            current_endpoint = "192.168.0.1:6174"
+            trainer_id = 0
+            trainers = 4
+            role = os.getenv("PADDLE_TRAINING_ROLE")
+	     
+            t = fluid.DistributeTranspiler()
+            t.transpile(
+                 trainer_id, pservers=pserver_endpoints, trainers=trainers)
+            if role == "PSERVER":
+                 pserver_program = t.get_pserver_program(current_endpoint)
+                 pserver_startup_program = t.get_startup_program(current_endpoint,
                                                                pserver_program)
-           elif role == "TRAINER":
-                trainer_program = t.get_trainer_program()
-
-           # for nccl2 mode
-           config = fluid.DistributeTranspilerConfig()
-           config.mode = "nccl2"
-           t = fluid.DistributeTranspiler(config=config)
-           t.transpile(trainer_id, workers=workers, current_endpoint=curr_ep)
-           exe = fluid.ParallelExecutor(
-               use_cuda,
-               loss_name=loss_var.name,
-               num_trainers=len(trainers.split(",)),
-               trainer_id=trainer_id
-           )
+            elif role == "TRAINER":
+                 trainer_program = t.get_trainer_program()
+
+            # for nccl2 mode
+            config = fluid.DistributeTranspilerConfig()
+            config.mode = "nccl2"
+            t = fluid.DistributeTranspiler(config=config)
+            t.transpile(trainer_id, workers=workers, current_endpoint=curr_ep)
+            exe = fluid.ParallelExecutor(
+                use_cuda,
+                loss_name=loss_var.name,
+                num_trainers=len(trainers.split(",)),
+                trainer_id=trainer_id
+            )
    """

    def __init__(self, config=None):