fix api format and example (#14686)

* fix api format and examples test=develop * Update executor.py test=develop * Update nn.py * Update nn.py test=develop * Update nn.py test=develop

fix api format and example (#14686)
* fix api format and examples test=develop * Update executor.py test=develop * Update nn.py * Update nn.py test=develop * Update nn.py test=develop
6d04a9cf · Tink_Y · Cheerego · ff423730 · 6d04a9cf · 6d04a9cf
9 changed file
--- a/python/paddle/fluid/clip.py
+++ b/python/paddle/fluid/clip.py
@@ -134,12 +134,12 @@ class GradientClipByValue(BaseGradientClipAttr):
    Examples:
        .. code-block:: python

-            w_param_attrs = ParamAttr(name=None,
-              initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
+            w_param_attrs = fluid.ParamAttr(name=None,
+              initializer=fluid.initializer.UniformInitializer(low=-1.0, high=1.0, seed=0),
              learning_rate=1.0,
-              regularizer=L1Decay(1.0),
+              regularizer=fluid.regularizer.L1Decay(1.0),
              trainable=True,
-              clip=GradientClipByValue(-1.0, 1.0))
+              clip=fluid.clip.GradientClipByValue(-1.0, 1.0))
            y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
    """

@@ -185,12 +185,12 @@ class GradientClipByNorm(BaseGradientClipAttr):
    Examples:
        .. code-block:: python

-            w_param_attrs = ParamAttr(name=None,
-              initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
+            w_param_attrs = flui.ParamAttr(name=None,
+              initializer=fluid.initializer.UniformInitializer(low=-1.0, high=1.0, seed=0),
              learning_rate=1.0,
-              regularizer=L1Decay(1.0),
+              regularizer=fluid.regularizer.L1Decay(1.0),
              trainable=True,
-              clip=GradientClipByNorm(clip_norm=2.0))
+              clip=fluid.clip.GradientClipByNorm(clip_norm=2.0))
            y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)

    """

--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -20,7 +20,7 @@ import six
 from .framework import Program, default_main_program, Variable
 from . import core

-__all__ = ['Executor', 'global_scope', 'scope_guard', '_switch_scope']
+__all__ = ['Executor', 'global_scope', 'scope_guard']

 g_scope = core.Scope()

@@ -407,16 +407,17 @@ class Executor(object):

        Examples:

-            >>> data = layers.data(name='X', shape=[1], dtype='float32')
-            >>> hidden = layers.fc(input=data, size=10)
-            >>> layers.assign(hidden, out)
-            >>> loss = layers.mean(out)
+            >>> data = fluid.layers.data(name='X', shape=[1], dtype='float32')
+            >>> out = fluid.layers.create_tensor(dtype='float32')
+            >>> hidden = fluid.layers.fc(input=data, size=10)
+            >>> fluid.layers.assign(hidden,out)
+            >>> loss = fluid.layers.mean(out)
            >>> adam = fluid.optimizer.Adam()
 						>>> adam.minimize(loss)

            >>> cpu = core.CPUPlace()
-            >>> exe = Executor(cpu)
-            >>> exe.run(default_startup_program())
+            >>> exe = fluid.Executor(cpu)
+            >>> exe.run(fluid.default_startup_program())

            >>> x = numpy.random.random(size=(10, 1)).astype('float32')
            >>> outs = exe.run(

--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -89,6 +89,7 @@ def name_scope(prefix=None):

    Examples:
        .. code-block:: python
+
          with name_scope("encoder"):
             ...
          with name_scope("decoder"):

--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -943,7 +943,18 @@ def __create_unshared_decorated_reader__(op_type, reader, attrs, name=None):

 def shuffle(reader, buffer_size):
    """
-    Shuffle the reader.
+    Creates a data reader whose data output is shuffled.
+    Output from the iterator that created by original reader will be
+    buffered into shuffle buffer, and then shuffled. The size of shuffle buffer
+    is determined by argument buf_size.
+
+    Args:
+        param reader: the original reader whose output will be shuffled.
+        type reader: callable
+        param buf_size: shuffle buffer size.
+        type buf_size: int
+        return: the new reader whose output is shuffled.
+        rtype: callable
    """
    return __create_unshared_decorated_reader__(
        'create_shuffle_reader', reader, {'buffer_size': int(buffer_size)})

--- a/python/paddle/fluid/layers/learning_rate_scheduler.py
+++ b/python/paddle/fluid/layers/learning_rate_scheduler.py
@@ -308,14 +308,10 @@ def piecewise_decay(boundaries, values):


 def append_LARS(params_grads, learning_rate, weight_decay):
-    """Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
+    """
+    Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
    each layer.

-    ```python
-        learning_rate *= local_gw_ratio * sqrt(sumsq(param))
-                        / (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
-    ```
-
    Args:
        learning_rate: A learning rate Variable. This
          is the global learning rate for LARS.
@@ -323,6 +319,11 @@ def append_LARS(params_grads, learning_rate, weight_decay):

    Returns:
        The decayed learning rate
+    Examples:
+        .. code-block:: python
+        
+            learning_rate *= local_gw_ratio * sqrt(sumsq(param))
+                        / (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
    """

    def _balanced_weight(param_norm, grad_norm):

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -928,7 +928,7 @@ def dynamic_gru(input,
            emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
            hidden_dim = 512
            x = fluid.layers.fc(input=emb, size=hidden_dim * 3)
-            hidden = fluid.layers.dynamic_gru(input=x, dim=hidden_dim)
+            hidden = fluid.layers.dynamic_gru(input=x, size=hidden_dim)
    """

    helper = LayerHelper('gru', **locals())
@@ -3586,6 +3586,7 @@ def beam_search_decode(ids, scores, beam_size, end_id, name=None):

    Examples:
        .. code-block:: python
+
            # Suppose `ids` and `scores` are LodTensorArray variables reserving
            # the selected ids and scores of all steps
            finished_ids, finished_scores = layers.beam_search_decode(
@@ -5870,7 +5871,6 @@ def pad_constant_like(x, y, pad_value=0., name=None):
                  [[38, 39, 40]],
                  [[41, 42, 43]]]]
            Y.shape = (1, 3, 1, 3)
-
 		And
            pad_value = -1,

@@ -6126,6 +6126,7 @@ def image_resize(input,
    Supporting resample methods:

        'BILINEAR' : Bilinear interpolation
+
        'NEAREST' : Nearest neighbor interpolation

    Args:
@@ -6781,7 +6782,7 @@ def crop(x, shape=None, offsets=None, name=None):

            # or
            z = fluid.layers.data(name="z", shape=[3, 5], dtype="float32")
-            crop = fluid.layers.crop(z, shape=[2, 3])
+            crop = fluid.layers.crop(z, shape=[-1, 2, 3])

    """
    helper = LayerHelper('crop', **locals())
@@ -7062,6 +7063,7 @@ def pad2d(input,
    than height-1. And the width dimension has the same condition.

    Example:
+        .. code-block:: text

 	      Given that X is a channel of image from input:


--- a/python/paddle/fluid/metrics.py
+++ b/python/paddle/fluid/metrics.py
@@ -451,6 +451,7 @@ class EditDistance(MetricBase):

    In the above example:
        'distance' is the average of the edit distance in a pass.
+
        'instance_error' is the instance error rate in a pass.

    """

--- a/python/paddle/fluid/param_attr.py
+++ b/python/paddle/fluid/param_attr.py
@@ -50,8 +50,9 @@ class ParamAttr(object):

            w_param_attrs = fluid.ParamAttr(name="fc_weight",
                                            learning_rate=0.5,
-                                            regularizer=fluid.L2Decay(1.0),
+                                            regularizer=fluid.regularizer.L2Decay(1.0),
                                            trainable=True)
+	    x = fluid.layers.data(name='X', shape=[1], dtype='float32')
            y_predict = fluid.layers.fc(input=x, size=10, param_attr=w_param_attrs)
    """


--- a/python/paddle/fluid/transpiler/distribute_transpiler.py
+++ b/python/paddle/fluid/transpiler/distribute_transpiler.py
@@ -125,6 +125,7 @@ def slice_variable(var_list, slice_count, min_block_size):

 class DistributeTranspilerConfig(object):
    """
+    Args:
        slice_var_up (bool): Do Tensor slice for pservers, default is True.
        split_method (PSDispatcher): RoundRobin or HashName can be used
          try to choose the best method to balance loads for pservers.