cherry-pick API reference for release1.2 (#14750)

* Add examples to some functions. (#14645) * Fix comments of ctc_greedy_decoder. (#14679) test=develop * fix api format and examples test=develop * Update executor.py test=develop * Update nn.py * Update nn.py test=develop * Update nn.py test=develop * Update clip.py test=release1.2

cherry-pick API reference for release1.2 (#14750)
* Add examples to some functions. (#14645) * Fix comments of ctc_greedy_decoder. (#14679) test=develop * fix api format and examples test=develop * Update executor.py test=develop * Update nn.py * Update nn.py test=develop * Update nn.py test=develop * Update clip.py test=release1.2
08f927de · Tink_Y · Cheerego · 8feb99b4 · 08f927de · 08f927de
9 changed file
--- a/python/paddle/fluid/clip.py
+++ b/python/paddle/fluid/clip.py
@@ -134,12 +134,12 @@ class GradientClipByValue(BaseGradientClipAttr):
    Examples:
        .. code-block:: python

-            w_param_attrs = ParamAttr(name=None,
-              initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
+            w_param_attrs = fluid.ParamAttr(name=None,
+              initializer=fluid.initializer.UniformInitializer(low=-1.0, high=1.0, seed=0),
              learning_rate=1.0,
-              regularizer=L1Decay(1.0),
+              regularizer=fluid.regularizer.L1Decay(1.0),
              trainable=True,
-              clip=GradientClipByValue(-1.0, 1.0))
+              clip=fluid.clip.GradientClipByValue(-1.0, 1.0))
            y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
    """

@@ -185,12 +185,12 @@ class GradientClipByNorm(BaseGradientClipAttr):
    Examples:
        .. code-block:: python

-            w_param_attrs = ParamAttr(name=None,
-              initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
+            w_param_attrs = fluid.ParamAttr(name=None,
+              initializer=fluid.initializer.UniformInitializer(low=-1.0, high=1.0, seed=0),
              learning_rate=1.0,
-              regularizer=L1Decay(1.0),
+              regularizer=fluid.regularizer.L1Decay(1.0),
              trainable=True,
-              clip=GradientClipByNorm(clip_norm=2.0))
+              clip=fluid.clip.GradientClipByNorm(clip_norm=2.0))
            y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)

    """

--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -20,7 +20,7 @@ import six
 from .framework import Program, default_main_program, Variable
 from . import core

-__all__ = ['Executor', 'global_scope', 'scope_guard', '_switch_scope']
+__all__ = ['Executor', 'global_scope', 'scope_guard']

 g_scope = core.Scope()

@@ -407,16 +407,17 @@ class Executor(object):

        Examples:

-            >>> data = layers.data(name='X', shape=[1], dtype='float32')
-            >>> hidden = layers.fc(input=data, size=10)
-            >>> layers.assign(hidden, out)
-            >>> loss = layers.mean(out)
+            >>> data = fluid.layers.data(name='X', shape=[1], dtype='float32')
+            >>> out = fluid.layers.create_tensor(dtype='float32')
+            >>> hidden = fluid.layers.fc(input=data, size=10)
+            >>> fluid.layers.assign(hidden,out)
+            >>> loss = fluid.layers.mean(out)
            >>> adam = fluid.optimizer.Adam()
 						>>> adam.minimize(loss)

            >>> cpu = core.CPUPlace()
-            >>> exe = Executor(cpu)
-            >>> exe.run(default_startup_program())
+            >>> exe = fluid.Executor(cpu)
+            >>> exe.run(fluid.default_startup_program())

            >>> x = numpy.random.random(size=(10, 1)).astype('float32')
            >>> outs = exe.run(

--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -89,6 +89,7 @@ def name_scope(prefix=None):

    Examples:
        .. code-block:: python
+
          with name_scope("encoder"):
             ...
          with name_scope("decoder"):

--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -943,7 +943,18 @@ def __create_unshared_decorated_reader__(op_type, reader, attrs, name=None):

 def shuffle(reader, buffer_size):
    """
-    Shuffle the reader.
+    Creates a data reader whose data output is shuffled.
+    Output from the iterator that created by original reader will be
+    buffered into shuffle buffer, and then shuffled. The size of shuffle buffer
+    is determined by argument buf_size.
+
+    Args:
+        param reader: the original reader whose output will be shuffled.
+        type reader: callable
+        param buf_size: shuffle buffer size.
+        type buf_size: int
+        return: the new reader whose output is shuffled.
+        rtype: callable
    """
    return __create_unshared_decorated_reader__(
        'create_shuffle_reader', reader, {'buffer_size': int(buffer_size)})

--- a/python/paddle/fluid/layers/learning_rate_scheduler.py
+++ b/python/paddle/fluid/layers/learning_rate_scheduler.py
@@ -308,14 +308,10 @@ def piecewise_decay(boundaries, values):


 def append_LARS(params_grads, learning_rate, weight_decay):
-    """Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
+    """
+    Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
    each layer.

-    ```python
-        learning_rate *= local_gw_ratio * sqrt(sumsq(param))
-                        / (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
-    ```
-
    Args:
        learning_rate: A learning rate Variable. This
          is the global learning rate for LARS.
@@ -323,6 +319,11 @@ def append_LARS(params_grads, learning_rate, weight_decay):

    Returns:
        The decayed learning rate
+    Examples:
+        .. code-block:: python
+        
+            learning_rate *= local_gw_ratio * sqrt(sumsq(param))
+                        / (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
    """

    def _balanced_weight(param_norm, grad_norm):

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -928,7 +928,7 @@ def dynamic_gru(input,
            emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
            hidden_dim = 512
            x = fluid.layers.fc(input=emb, size=hidden_dim * 3)
-            hidden = fluid.layers.dynamic_gru(input=x, dim=hidden_dim)
+            hidden = fluid.layers.dynamic_gru(input=x, size=hidden_dim)
    """

    helper = LayerHelper('gru', **locals())
@@ -3560,6 +3560,7 @@ def beam_search_decode(ids, scores, beam_size, end_id, name=None):

    Examples:
        .. code-block:: python
+
            # Suppose `ids` and `scores` are LodTensorArray variables reserving
            # the selected ids and scores of all steps
            finished_ids, finished_scores = layers.beam_search_decode(
@@ -4390,7 +4391,14 @@ def ctc_greedy_decoder(input, blank, name=None):

        input.lod = [[4, 4]]
      
-        Then:
+        Computation:
+
+        step1: Apply argmax to first input sequence which is input.data[0:4]. Then we get:
+               [[0], [2], [1], [0]]
+        step2: merge repeated tokens and remove blank which is 0. Then we get first output sequence:
+               [[2], [1]]
+
+        Finally:

        output.data = [[2],
                       [1],
@@ -4398,6 +4406,7 @@ def ctc_greedy_decoder(input, blank, name=None):

        output.lod = [[2, 1]]

+
    Args:

        input(Variable): (LoDTensor<float>), the probabilities of
@@ -4412,8 +4421,10 @@ def ctc_greedy_decoder(input, blank, name=None):
        name (str): The name of this layer. It is optional.

    Returns:
-        Variable: CTC greedy decode result. If all the sequences in result were
-        empty, the result LoDTensor will be [-1] with LoD [[]] and dims [1, 1].
+        Variable: CTC greedy decode result which is a 2-D tensor with shape [Lp, 1].
+                  'Lp' is the sum if all output sequences' length. If all the sequences
+                  in result were empty, the result LoDTensor will be [-1] with 
+                  LoD [[]] and dims [1, 1].

    Examples:
        .. code-block:: python
@@ -5834,7 +5845,6 @@ def pad_constant_like(x, y, pad_value=0., name=None):
                  [[38, 39, 40]],
                  [[41, 42, 43]]]]
            Y.shape = (1, 3, 1, 3)
-
 		And
            pad_value = -1,

@@ -6090,6 +6100,7 @@ def image_resize(input,
    Supporting resample methods:

        'BILINEAR' : Bilinear interpolation
+
        'NEAREST' : Nearest neighbor interpolation

    Args:
@@ -6745,7 +6756,7 @@ def crop(x, shape=None, offsets=None, name=None):

            # or
            z = fluid.layers.data(name="z", shape=[3, 5], dtype="float32")
-            crop = fluid.layers.crop(z, shape=[2, 3])
+            crop = fluid.layers.crop(z, shape=[-1, 2, 3])

    """
    helper = LayerHelper('crop', **locals())
@@ -7026,6 +7037,7 @@ def pad2d(input,
    than height-1. And the width dimension has the same condition.

    Example:
+        .. code-block:: text

 	      Given that X is a channel of image from input:

@@ -7745,6 +7757,11 @@ def uniform_random_batch_size_like(input,
    Returns:
        out (Variable): ${out_comment}

+    Examples:
+        .. code-block:: python
+
+            input = layers.data(name="input", shape=[13, 11], dtype='float32')
+            out = layers.uniform_random_batch_size_like(input, [-1, 11])
    """

    helper = LayerHelper('uniform_random_batch_size_like', **locals())
@@ -7782,6 +7799,10 @@ def gaussian_random(shape, mean=0.0, std=1.0, seed=0, dtype='float32'):
    Returns:
        out (Variable): ${out_comment}

+    Examples:
+        .. code-block:: python
+
+            out = layers.gaussian_random(shape=[20, 30])
    """

    helper = LayerHelper('gaussian_random', **locals())
@@ -7817,6 +7838,16 @@ def sampling_id(x, min=0.0, max=1.0, seed=0, dtype='float32'):
    Returns:
        out (Variable): ${out_comment}

+    Examples:
+        .. code-block:: python
+
+            x = layers.data(
+                name="X",
+                shape=[13, 11],
+                dtype='float32',
+                append_batch_size=False)
+
+            out = layers.sampling_id(x)
    """

    helper = LayerHelper('sampling_id', **locals())
@@ -7856,6 +7887,14 @@ def gaussian_random_batch_size_like(input,

    Returns:
        out (Variable): ${out_comment}
+
+    Examples:
+        .. code-block:: python
+
+            input = layers.data(name="input", shape=[13, 11], dtype='float32')
+
+            out = layers.gaussian_random_batch_size_like(
+                input, shape=[-1, 11], mean=1.0, std=2.0)
    """

    helper = LayerHelper('gaussian_random_batch_size_like', **locals())
@@ -7888,6 +7927,12 @@ def sum(x):

    Returns:
        out (Variable): ${out_comment}
+
+    Examples:
+        .. code-block:: python
+
+            input = layers.data(name="input", shape=[13, 11], dtype='float32')
+            out = layers.sum(input)
    """

    helper = LayerHelper('sum', **locals())
@@ -7916,6 +7961,17 @@ def slice(input, axes, starts, ends):
    Returns:
        out (Variable): ${out_comment}

+    Examples:
+        .. code-block:: python
+
+            starts = [1, 0, 2]
+            ends = [3, 3, 4]
+            axes = [0, 1, 2]
+
+            input = layers.data(
+                name="input", shape=[3, 4, 5, 6], dtype='float32')
+
+            out = layers.slice(input, axes=axes, starts=starts, ends=ends)
    """

    helper = LayerHelper('slice', **locals())
@@ -7943,6 +7999,12 @@ def shape(input):
    Returns:
        out (Variable): ${out_comment}

+    Examples:
+        .. code-block:: python
+
+            input = layers.data(
+                name="input", shape=[3, 100, 100], dtype="float32")
+            out = layers.shape(input)
    """

    helper = LayerHelper('shape', **locals())

--- a/python/paddle/fluid/metrics.py
+++ b/python/paddle/fluid/metrics.py
@@ -451,6 +451,7 @@ class EditDistance(MetricBase):

    In the above example:
        'distance' is the average of the edit distance in a pass.
+
        'instance_error' is the instance error rate in a pass.

    """

--- a/python/paddle/fluid/param_attr.py
+++ b/python/paddle/fluid/param_attr.py
@@ -50,8 +50,9 @@ class ParamAttr(object):

            w_param_attrs = fluid.ParamAttr(name="fc_weight",
                                            learning_rate=0.5,
-                                            regularizer=fluid.L2Decay(1.0),
+                                            regularizer=fluid.regularizer.L2Decay(1.0),
                                            trainable=True)
+	    x = fluid.layers.data(name='X', shape=[1], dtype='float32')
            y_predict = fluid.layers.fc(input=x, size=10, param_attr=w_param_attrs)
    """


--- a/python/paddle/fluid/transpiler/distribute_transpiler.py
+++ b/python/paddle/fluid/transpiler/distribute_transpiler.py
@@ -125,6 +125,7 @@ def slice_variable(var_list, slice_count, min_block_size):

 class DistributeTranspilerConfig(object):
    """
+    Args:
        slice_var_up (bool): Do Tensor slice for pservers, default is True.
        split_method (PSDispatcher): RoundRobin or HashName can be used
          try to choose the best method to balance loads for pservers.