From 512c0bb04d3fbee80da6381163432b7afbae0ac1 Mon Sep 17 00:00:00 2001
From: hong19860320 <9973393+hong19860320@users.noreply.github.com>
Date: Sat, 12 Oct 2019 12:13:51 +0800
Subject: [PATCH] refine the en api doc of ones, zeros, reverse, assign,
 increment, hsigmoid and create_py_reader_by_data ops (#20343)

* refine the en api doc of ones, zeros, reverse, increment, hsigmoid and create_py_reader_by_data ops
test=develop, test=document_preview, test=document_fix

* refine eng doc for hsigmoid and create_py_reader_by_data ops
test=develop, test=document_preview, test=document_fix

* update API.spec
test=document_fix

* Fix the parameter name axis of reverse op in eng doc
test=develop, test=document_fix

* Update API.spec
test=develop, test=document_fix

* Refine eng doc of zeros, ones, reverse and assign op
test=develop, test=document_fix

* Update API.spec for assign, ones, zeros and reverse
test=develop, test=document_fix

* Fix data type of reverse op in eng doc
test=develop, test=document_fix

* Update API.spec for reverse op
test=develop, test=document_fix
---
 paddle/fluid/API.spec                      |  14 +--
 python/paddle/fluid/layers/control_flow.py |  22 ++--
 python/paddle/fluid/layers/io.py           | 116 ++++++++++-----------
 python/paddle/fluid/layers/nn.py           | 114 ++++++++++----------
 python/paddle/fluid/layers/tensor.py       |  93 ++++++++---------
 5 files changed, 179 insertions(+), 180 deletions(-)

diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
index e743e2acbe..1516506e1c 100644
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -180,7 +180,7 @@ paddle.fluid.layers.transpose (ArgSpec(args=['x', 'perm', 'name'], varargs=None,
 paddle.fluid.layers.im2sequence (ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None)), ('document', 'fe352915a543cec434f74e9b32ac49da'))
 paddle.fluid.layers.nce (ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False)), ('document', '38297567127888e01542857839058d52'))
 paddle.fluid.layers.sampled_softmax_with_cross_entropy (ArgSpec(args=['logits', 'label', 'num_samples', 'num_true', 'remove_accidental_hits', 'use_customized_samples', 'customized_samples', 'customized_probabilities', 'seed'], varargs=None, keywords=None, defaults=(1, True, False, None, None, 0)), ('document', 'd4435a63d34203339831ee6a86ef9242'))
-paddle.fluid.layers.hsigmoid (ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False)), ('document', 'b83e7dfa81059b39bb137922dc914f50'))
+paddle.fluid.layers.hsigmoid (ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False)), ('document', '247de339879885526e7f4d271967088f'))
 paddle.fluid.layers.beam_search (ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False)), ('document', '2b505ddaa309fd7b9be5445e41ca76d5'))
 paddle.fluid.layers.row_conv (ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'a6477957b44907787b3c74157400b80c'))
 paddle.fluid.layers.multiplex (ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None), ('document', '2c4d1ae83da6ed35e3b36ba1b3b51d23'))
@@ -313,7 +313,7 @@ paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'd
 paddle.fluid.layers.read_file (ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None), ('document', 'd5b41c7b2df1b064fbd42dcf435268cd'))
 paddle.fluid.layers.double_buffer (ArgSpec(args=['reader', 'place', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '556fa82daf62cbb0fb393f4125daba77'))
 paddle.fluid.layers.py_reader (ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', 'd78a1c7344955c5caed8dc13adb7beb6'))
-paddle.fluid.layers.create_py_reader_by_data (ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True)), ('document', '2edf37d57862b24a7a26aa19a3573f73'))
+paddle.fluid.layers.create_py_reader_by_data (ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True)), ('document', '1321d4ce89d82f96fcfd5601f816b0f3'))
 paddle.fluid.layers.load (ArgSpec(args=['out', 'file_path', 'load_as_fp16'], varargs=None, keywords=None, defaults=(None,)), ('document', '309f9e5249463e1b207a7347b2a91134'))
 paddle.fluid.layers.create_tensor (ArgSpec(args=['dtype', 'name', 'persistable'], varargs=None, keywords=None, defaults=(None, False)), ('document', 'fdc2d964488e99fb0743887454c34e36'))
 paddle.fluid.layers.create_parameter (ArgSpec(args=['shape', 'dtype', 'name', 'attr', 'is_bias', 'default_initializer'], varargs=None, keywords=None, defaults=(None, None, False, None)), ('document', '727aa63c061919bee38547fb126d9428'))
@@ -322,15 +322,15 @@ paddle.fluid.layers.cast (ArgSpec(args=['x', 'dtype'], varargs=None, keywords=No
 paddle.fluid.layers.tensor_array_to_tensor (ArgSpec(args=['input', 'axis', 'name', 'use_stack'], varargs=None, keywords=None, defaults=(1, None, False)), ('document', '4aa82374218ccf593bb8011df79c71e3'))
 paddle.fluid.layers.concat (ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(0, None)), ('document', 'ec7d6e716fb29ef1e73e1e3efa5ca46b'))
 paddle.fluid.layers.sums (ArgSpec(args=['input', 'out'], varargs=None, keywords=None, defaults=(None,)), ('document', '5df743d578638cd2bbb9369499b44af4'))
-paddle.fluid.layers.assign (ArgSpec(args=['input', 'output'], varargs=None, keywords=None, defaults=(None,)), ('document', '8bd94aef4e123986d9a8c29f67b5532b'))
+paddle.fluid.layers.assign (ArgSpec(args=['input', 'output'], varargs=None, keywords=None, defaults=(None,)), ('document', '98ce6e7c3659b8377c04cecfc72c2000'))
 paddle.fluid.layers.fill_constant_batch_size_like (ArgSpec(args=['input', 'shape', 'dtype', 'value', 'input_dim_idx', 'output_dim_idx', 'force_cpu'], varargs=None, keywords=None, defaults=(0, 0, False)), ('document', '2bb57637664173fee5f654e55896aec6'))
 paddle.fluid.layers.fill_constant (ArgSpec(args=['shape', 'dtype', 'value', 'force_cpu', 'out'], varargs=None, keywords=None, defaults=(False, None)), ('document', '66e1e468666dd47e5b2715226cebeac0'))
 paddle.fluid.layers.argmin (ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)), ('document', '53629e27597e5dfb7020aac5bc639ebb'))
 paddle.fluid.layers.argmax (ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)), ('document', 'd9a89fbedbaebd5f65897ac75ee636f3'))
 paddle.fluid.layers.argsort (ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '8c7966eb4b37b2272a16717cac3a876c'))
-paddle.fluid.layers.ones (ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,)), ('document', '812c623ed52610b9773f9fc05413bc34'))
-paddle.fluid.layers.zeros (ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,)), ('document', '95379f9288c2d05356ec0e2375c6bc57'))
-paddle.fluid.layers.reverse (ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None), ('document', '628135603692137d52bcf5a8d8d6816d'))
+paddle.fluid.layers.ones (ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,)), ('document', '4c5de61e6920f59992d7d15452cf4248'))
+paddle.fluid.layers.zeros (ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,)), ('document', '6fe7fc0956ffbfb00c05fb5ea5b575ea'))
+paddle.fluid.layers.reverse (ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None), ('document', 'e735e3fe926f5cc9f68a2336fe229681'))
 paddle.fluid.layers.has_inf (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', 'aca8a35516cef98af836fb6a64ac8acb'))
 paddle.fluid.layers.has_nan (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '99f4cf36db08a4e23c8c3857e2af1316'))
 paddle.fluid.layers.isfinite (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', 'b9fff4ffc8d11934cde099f4c39bf841'))
@@ -347,7 +347,7 @@ paddle.fluid.layers.Switch ('paddle.fluid.layers.control_flow.Switch', ('documen
 paddle.fluid.layers.Switch.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.layers.Switch.case (ArgSpec(args=['self', 'condition'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.layers.Switch.default (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
-paddle.fluid.layers.increment (ArgSpec(args=['x', 'value', 'in_place'], varargs=None, keywords=None, defaults=(1.0, True)), ('document', 'f88b5787bb80ae6b8bf513a70dabbdc1'))
+paddle.fluid.layers.increment (ArgSpec(args=['x', 'value', 'in_place'], varargs=None, keywords=None, defaults=(1.0, True)), ('document', 'c5fe17688e64c2d7e57312128d61da98'))
 paddle.fluid.layers.array_write (ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd357f71a280bf06aab4c79de9bd4facf'))
 paddle.fluid.layers.create_array (ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None), ('document', '556de793fdf24d515f3fc91260e2c048'))
 paddle.fluid.layers.less_than (ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords=None, defaults=(None, None)), ('document', '329bdde01cba69463b08b8c13015560a'))
diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py
index 466ae0e8db..f7a0ded432 100755
--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -1018,26 +1018,24 @@ def array_to_lod_tensor(x, table):
 
 def increment(x, value=1.0, in_place=True):
     """
-    This function performs an operation that increments the value in the
-    input :math:`x` by an amount: :math:`value` as mentioned in the input
-    parameter. This operation is performed in-place by default. Notice that
-    the number of elements in :math:`x` must be equal to 1.
+    The OP is usually used for control flow to increment the data of :attr:`x` by an amount :attr:`value`.
+    Notice that the number of elements in :attr:`x` must be equal to 1.
 
-    Args:
-        x (Variable|list): The tensor that has the input values.
-        value (float): The amount by which the values should be incremented.
-        in_place (bool): If the increment should be performed in-place.
+    Parameters:
+        x (Variable): A tensor that must alway contain only one element, its data type supports
+            float32, float64, int32 and int64.
+        value (float, optional): The amount to increment the data of :attr:`x`. Default: 1.0.
+        in_place (bool, optional): Whether the OP should be performed in-place. Default: True.
 
     Returns:
-        Variable: The elementwise-incremented object.
+        Variable: The elementwise-incremented tensor with the same shape and data type as :attr:`x`.
 
     Examples:
         .. code-block:: python
 
           import paddle.fluid as fluid
-          data = fluid.layers.data(name='data', shape=[1], dtype='float32',
-                                   append_batch_size=False)
-          data = fluid.layers.increment(x=data, value=3.0, in_place=True)
+          counter = fluid.layers.zeros(shape=[1], dtype='float32') # [0.]
+          fluid.layers.increment(counter) # [1.]
     """
     helper = LayerHelper("increment", **locals())
     if not in_place:
diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py
index 181e879219..bf94555846 100644
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -682,70 +682,68 @@ def create_py_reader_by_data(capacity,
                              name=None,
                              use_double_buffer=True):
     """
-    Create a Python reader for data feeding in Python
-
-    This layer returns a Reader Variable.
-
-    Works much like py_reader except that it's input is feed_list
-    instead of shapes, dtypes and lod_levels
-
-    Args:
-       capacity(int): The buffer capacity maintained by :code:`py_reader`.
-       feed_list(list(Variable)): The data feed list.
-       name(basestring): The prefix Python queue name and Reader name. None will
-            be generated automatically.
-       use_double_buffer(bool): Whether use double buffer or not.
+    The OP creates a Python reader for data feeding in Python, it is similar
+    to :ref:`api_fluid_layers_py_reader` except that it can read data from
+    the list of feed variables.
+
+    Parameters:
+        capacity (int): The buffer capacity maintained by :code:`py_reader`. Its unit
+            is batch number. Set larger :attr:`capacity` if the reader is fast.
+        feed_list (list(Variable)): The feed variables, are usually created by
+            :code:`fluid.data()`.
+        name (str, optional): Normally there is no need for user to set this property.
+            For more information, please refer to :ref:`api_guide_Name`. Default: None.
+        use_double_buffer (bool, optional): Whether use double buffer. If it's True,
+            the OP would prefetch next batch data asynchronously. Default: True.
 
     Returns:
-       Variable: A Reader from which we can get feeding data.
+        Reader: A Reader for data feeding. The data types of read data are the same as the data types of variables of :attr:`feed_list`.
 
     Examples:
-       .. code-block:: python
-
-         import paddle
-         import paddle.fluid as fluid
-         import paddle.dataset.mnist as mnist
-         import paddle.fluid.compiler as compiler
-
-         def network(img, label):
-             # User defined network. Here a simple regression as example
-             predict = fluid.layers.fc(input=img, size=10, act='softmax')
-             loss = fluid.layers.cross_entropy(input=predict, label=label)
-             return fluid.layers.mean(loss)
-
-         MEMORY_OPT = False
-         USE_CUDA = False
-
-         image = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32')
-         label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-         reader = fluid.layers.create_py_reader_by_data(capacity=64,
-                                                        feed_list=[image, label])
-         reader.decorate_paddle_reader(
-             paddle.reader.shuffle(paddle.batch(mnist.train(), batch_size=5),
-                                   buf_size=500))
-
-         img, label = fluid.layers.read_file(reader)
-         loss = network(img, label)  # some network definition
-
-         place = fluid.CUDAPlace(0) if USE_CUDA else fluid.CPUPlace()
-         exe = fluid.Executor(place)
-         exe.run(fluid.default_startup_program())
-
-         build_strategy = fluid.BuildStrategy()
-         build_strategy.memory_optimize = True if MEMORY_OPT else False
-         compiled_prog = compiler.CompiledProgram(
-             fluid.default_main_program()).with_data_parallel(
-                 loss_name=loss.name,
-                 build_strategy=build_strategy,
-                 exec_strategy=exec_strategy)
+        .. code-block:: python
 
-         for epoch_id in range(2):
-             reader.start()
-             try:
-                 while True:
-                     exe.run(compiled_prog, fetch_list=[loss.name])
-             except fluid.core.EOFException:
-                 reader.reset()
+          import paddle
+          import paddle.fluid as fluid
+          import paddle.dataset.mnist as mnist
+
+          def network(img, label):
+              # User defined network. Here a simple regression as example
+              predict = fluid.layers.fc(input=img, size=10, act='softmax')
+              loss = fluid.layers.cross_entropy(input=predict, label=label)
+              return fluid.layers.mean(loss)
+
+          MEMORY_OPT = False
+          USE_CUDA = False
+
+          image = fluid.data(name='image', shape=[None, 1, 28, 28], dtype='float32')
+          label = fluid.data(name='label', shape=[None, 1], dtype='int64')
+          reader = fluid.layers.create_py_reader_by_data(capacity=64,
+                                                         feed_list=[image, label])
+          reader.decorate_paddle_reader(
+              paddle.reader.shuffle(paddle.batch(mnist.train(), batch_size=5), buf_size=500))
+          img, label = fluid.layers.read_file(reader)
+          loss = network(img, label) # The definition of custom network and the loss funtion
+
+          place = fluid.CUDAPlace(0) if USE_CUDA else fluid.CPUPlace()
+          exe = fluid.Executor(place)
+          exe.run(fluid.default_startup_program())
+
+          build_strategy = fluid.BuildStrategy()
+          build_strategy.memory_optimize = True if MEMORY_OPT else False
+          exec_strategy = fluid.ExecutionStrategy()
+          compiled_prog = fluid.compiler.CompiledProgram(
+          fluid.default_main_program()).with_data_parallel(
+              loss_name=loss.name,
+              build_strategy=build_strategy,
+              exec_strategy=exec_strategy)
+
+          for epoch_id in range(2):
+          reader.start()
+          try:
+              while True:
+                  exe.run(compiled_prog, fetch_list=[loss.name])
+          except fluid.core.EOFException:
+              reader.reset()
     """
     logging.warn(
         'paddle.fluid.layers.create_py_reader_by_data() may be deprecated in the near future. '
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index f5bc7fa383..5a588c7e75 100755
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -7542,70 +7542,74 @@ def hsigmoid(input,
              is_custom=False,
              is_sparse=False):
     """
-    The hierarchical sigmoid operator is used to accelerate the training
-    process of language model. This operator organizes the classes into a
-    complete binary tree, or you can use is_custom to pass your own tree to
-    implement hierarchical. Each leaf node represents a class(a word) and each
-    internal node acts as a binary classifier. For each word there's a unique
-    path from root to it's leaf node, hsigmoid calculate the cost for each
-    internal node on the path, and sum them to get a total cost. hsigmoid can
-    achive a acceleration from :math:`O(N)` to :math:`O(logN)`, where :math:`N`
-    represents the size of word dict.
-
-    Using default tree you can Refer to `Hierarchical Probabilistic Neural Network Language Model
-    <http://www.iro.umontreal.ca/~lisa/pointeurs/hierarchical-nnlm-aistats05.pdf>`_
-
-    And if you want to use the costumed tree by set 'is_custom' as true you may need to do following things first:
-
-    1. using your word dict to build a binary tree, each leaf node should be an word of your word dict
-    2. build a dict to store word_id -> word's leaf to root path, we call it path_table.
-    3. build a dict to store word_id -> code of word's leaf to root path, we call it path_code. Code
-       means label of each binary classification, using 1 indicate true, 0 indicate false.
-    4. now, each word should has its path and code along the path, you can pass a batch of path and code
-       related to the same batch of inputs.
+    The hierarchical sigmoid organizes the classes into a complete binary tree to reduce the computational complexity
+    and speed up the model training, especially the training of language model.
+    Each leaf node of the complete binary tree represents a class(word) and each non-leaf node acts as a binary classifier.
+    For each class(word), there's a unique path from root to itself, hsigmoid calculate the cost for each non-leaf node on
+    the path, and sum them to get a total cost.
+    Comparing to softmax, the OP can reduce the computational complexity from :math:`O(N)` to :math:`O(logN)`, where :math:`N`
+    represents the number of classes or the size of word dict.
+
+    The OP supports default tree and custom tree. For the default tree, you can refer to `Hierarchical Probabilistic Neural
+    Network Language Model <http://www.iro.umontreal.ca/~lisa/pointeurs/hierarchical-nnlm-aistats05.pdf>`. For the custom
+    tree, you need to set :attr:`is_custom` to True, and do the following steps (take the language model as an example):
+
+    1. Using a custom word dict to build a binary tree, each leaf node should be an word in the word dict.
+    2. Creating a dict map word_id -> path that from the word to the root node, we call it path_table.
+    3. Creating a dict map word_id -> code of path that from the word to the root node, we call it path_code.
+       Code means the label of each binary classifier, 1 indicate true, 0 indicate false.
+    4. Now, each word should has its path and code along the path, you can pass a batch of path and code related
+       to the same batch of inputs.
 
-    Args:
-        input (Variable): The input tensor variable with shape
-            :math:`[N \\times D]`, where :math:`N` is the size of mini-batch,
-            and :math:`D` is the feature size.
-        label (Variable): The tensor variable contains labels of training data.
-            It's a tensor with shape is :math:`[N \\times 1]`.
-        num_classes: (int), The number of classes, must not be less than 2. with default tree this has to be set,
-            it should never be None under is_custom=False, but while is_custom is true, it should be non leaf num
-            which indicates the num of classes using by binary classify.
-        param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
-             of hsigmoid. If it is set to None or one attribute of ParamAttr, hsigmoid
-             will create ParamAttr as param_attr. If the Initializer of the param_attr
-             is not set, the parameter is initialized with Xavier. Default: None.
-        bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of hsigmoid.
-             If it is set to False, no bias will be added to the output units.
-             If it is set to None or one attribute of ParamAttr, hsigmoid
-             will create ParamAttr as bias_attr. If the Initializer of the bias_attr
-             is not set, the bias is initialized zero. Default: None.
-        name (str|None): A name for this layer(optional). If set None, the layer
-             will be named automatically. Default: None.
-        path_table: (Variable|None) this variable can store each batch of samples' path to root,
-            it should be in leaf -> root order
-            path_table should have the same shape with path_code, and for each sample i path_table[i] indicates a np.array like
-            structure and each element in this array is indexes in parent nodes' Weight Matrix.
-        path_code:  (Variable|None) this variable can store each batch of samples' code,
-            each code consist with every code of parent nodes. it should be in leaf -> root order
-        is_custom: (bool|False)using user defined binary tree instead of default complete binary tree, if costum is
-             set you need to set path_table/path_code/num_classes, otherwise num_classes should be set
-        is_sparse: (bool|False)using sparse update instead of dense update, if set, the gradient
-             of W and input will be sparse.
+    Parameters:
+        input (Variable): A tensor with the shape [N, D], where N is the size of mini-batch,
+            and D is the feature size. Its data type supports float32 and float64.
+        label (Variable): A tensor contains the labels of training data. Its shape is [N, 1]
+            and data type is int64.
+        num_classes (int): The number of classes or the size of word dict, must be greater than 2.
+            If the default tree is used (:attr:`is_custom` is set to False), :attr:`num_classes`
+            should not be None. If the custom tree is used (:attr:`is_custom` is set to True),
+            :attr:`num_classes` should be the number of non-leaf nodes, which indicates the num of
+            classes using by the binary classifier.
+        param_attr (ParamAttr, optional): The parameter attribute for the learnable parameters/weights
+            of hsigmoid. If it is set to None or one attribute of ParamAttr, hsigmoid will create a
+            ParamAttr as param_attr. If the Initializer of the param_attr is not set, the parameter is
+            initialized with Xavier. Default: None.
+        bias_attr (ParamAttr|bool, optional): The parameter attribute for the bias of hsigmoid. If it
+            is set to False, no bias will be added. If it is set to None or one attribute of ParamAttr,
+            hsigmoid will create a ParamAttr as bias_attr. If the Initializer of the bias_attr is not
+            set, the bias is initialized zero. Default: None.
+        name (str, optional): Normally there is no need for user to set this property. For more information,
+            please refer to :ref:`api_guide_Name`. Default: None.
+        path_table (Variable, optional): A tensor that stores each batch of samples' path from leaf to root
+            node, its shape is [N, L] and data type is int64, where L is the length of path. For each sample i,
+            path_table[i] is a np.array like structure and each element in this array is the indexes in parent
+            nodes' weight matrix. Default: None.
+        path_code (Variable, optional): A tensor that stores each batch of samples' code of path from leaf
+            to root node, its shape is [N, L] and data type is int64, which is the same as :attr:`path_table`.
+            Each code of path is consisted with the code of nodes from leaf to root node. Default: None.
+        is_custom (bool, optional): Whether use custom binary tree. If it's True, :attr:`path_table`,
+            :attr:`path_code` and :attr:`num_classes` should be set, otherwise :attr:`num_classes` should
+            be set. Default: False.
+        is_sparse (bool, optional): Whether use sparse updating instead of dense updating, if it's True, the
+            gradient of W and input will be sparse. Default: False.
 
     Returns:
-        Out: (LodTensor) The cost of hierarchical sigmoid operator. the shape is [N, 1]
+        Variable: A tensor with the cost of hierarchical sigmoid, its shape is [N, 1] and data type is the same as :attr:`input`.
 
     Examples:
 
         .. code-block:: python
 
             import paddle.fluid as fluid
-            x = fluid.layers.data(name='x', shape=[2], dtype='float32')
-            y = fluid.layers.data(name='y', shape=[1], dtype='int64')
-            out = fluid.layers.hsigmoid(input=x, label=y, num_classes=6)
+            x = fluid.layers.fill_constant(shape=[4, 3], value=0.9, dtype='float32')
+            # x = [[0.9, 0.9, 0.9], [0.9, 0.9, 0.9], [0.9, 0.9, 0.9], [0.9, 0.9, 0.9]]
+            y = fluid.layers.fill_constant(
+                shape=[4, 1], value=1, dtype='int64')
+            # y = [[1], [1], [1], [1]]
+            out = fluid.layers.hsigmoid(input=x, label=y, num_classes=2, param_attr=fluid.initializer.Constant(
+                value=0.05), bias_attr=fluid.initializer.Constant(value=.0))
+            # out = [[0.62792355], [0.62792355], [0.62792355], [0.62792355]]
     """
 
     helper = LayerHelper('hierarchical_sigmoid', **locals())
diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py
index 7d4875e087..e7479c5bba 100644
--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -415,25 +415,27 @@ def sums(input, out=None):
 
 def assign(input, output=None):
     """
-    **Assign**
+    The OP copies the :attr:`input` to the :attr:`output`.
 
-    This function copies the *input* Variable to the *output* Variable.
-
-    Args:
-        input(Variable|numpy.ndarray): The source variable
-        output(Variable|None): The destination variable
+    Parameters:
+        input (Variable|numpy.ndarray): A tensor or numpy ndarray, its data type supports
+            float32, float64, int32 and int64.
+        output (Variable, optional): A tensor. If :attr:`output` is None, a new tensor will
+            be created as :attr:`output`. Default: None.
 
     Returns:
-        Variable: The destination variable that was supplied as the *output*.
+        Variable: A tensor with the same shape, data type and value as :attr:`input`.
 
     Examples:
         .. code-block:: python
 
           import paddle.fluid as fluid
-          data = fluid.layers.data(name="data", shape=[3, 32, 32], dtype="float32")
-          out = fluid.layers.create_tensor(dtype='float32')
-          hidden = fluid.layers.fc(input=data, size=10)
-          fluid.layers.assign(hidden, out)
+          import numpy as np
+          data = fluid.layers.fill_constant(shape=[3, 2], value=2.5, dtype='float64') # [[2.5, 2.5], [2.5, 2.5], [2.5, 2.5]]
+          result1 = fluid.layers.create_tensor(dtype='float64')
+          fluid.layers.assign(data, result1) # result1 = [[2.5, 2.5], [2.5, 2.5], [2.5, 2.5]]
+          result2 = fluid.layers.assign(data)  # result2 = [[2.5, 2.5], [2.5, 2.5], [2.5, 2.5]]
+          result3 = fluid.layers.assign(np.array([[2.5, 2.5], [2.5, 2.5], [2.5, 2.5]], dtype='float32')) # result3 = [[2.5, 2.5], [2.5, 2.5], [2.5, 2.5]]
     """
     helper = LayerHelper('assign', **locals())
     if isinstance(input, Variable):
@@ -808,25 +810,25 @@ def argsort(input, axis=-1, name=None):
 
 def ones(shape, dtype, force_cpu=False):
     """
-    **ones**
+    The OP creates a tensor of specified :attr:`shape` and :attr:`dtype`, and fills it with 1.
+    Its :attr:`stop_gradient` will be set to True to stop gradient computation.
 
-    This function creates a tensor of specified *shape* and
-    *dtype*, and initializes this with 1.
-
-    It also sets *stop_gradient* to True.
-
-    Args:
-        shape(tuple|list): Shape of output tensor
-        dtype(np.dtype|core.VarDesc.VarType|str): Data type of output tensor
+    Parameters:
+        shape (tuple|list): Shape of output tensor.
+        dtype (np.dtype|core.VarDesc.VarType|str): Data type of output tensor, it supports
+            bool, float16, float32, float64, int32 and int64.
+        force_cpu (bool, optional): Whether force to store the output tensor in CPU memory.
+            If :attr:`force_cpu` is False, the output tensor will be stored in running device memory.
+            Default: False.
 
     Returns:
-        Variable: The tensor variable storing the output
+        Variable: A tensor of data type :attr:`dtype` with shape :attr:`shape` and all elements set to 1.
 
     Examples:
         .. code-block:: python
 
           import paddle.fluid as fluid
-          data = fluid.layers.ones(shape=[1], dtype='int64')
+          data = fluid.layers.ones(shape=[2, 4], dtype='float32') # [[1., 1., 1., 1.], [1., 1., 1., 1.]]
     """
     assert isinstance(shape, list) or isinstance(
         shape, tuple), "The shape's type should be list or tuple."
@@ -837,53 +839,50 @@ def ones(shape, dtype, force_cpu=False):
 
 def zeros(shape, dtype, force_cpu=False):
     """
-    **zeros**
+    The OP creates a tensor of specified :attr:`shape` and :attr:`dtype`, and fills it with 0.
+    Its :attr:`stop_gradient` will be set to True to stop gradient computation.
 
-    This function creates a tensor of specified *shape* and
-    *dtype*, and initializes this with 0.
-
-    It also sets *stop_gradient* to True.
-
-    Args:
-        shape(tuple|list|None): Shape of output tensor.
-        dtype(np.dtype|core.VarDesc.VarType|str): Data type of output tensor.
-        force_cpu(bool, default False): Whether to make output stay on CPU.
+    Parameters:
+        shape (tuple|list): Shape of output tensor.
+        dtype (np.dtype|core.VarDesc.VarType|str): Data type of output tensor, it supports
+            bool, float16, float32, float64, int32 and int64.
+        force_cpu (bool, optional): Whether force to store the output tensor in CPU memory.
+            If :attr:`force_cpu` is False, the output tensor will be stored in running device memory.
+            Default: False.
 
     Returns:
-        Variable: The tensor variable storing the output.
+        Variable: A tensor of data type :attr:`dtype` with shape :attr:`shape` and all elements set to 0.
 
     Examples:
         .. code-block:: python
 
           import paddle.fluid as fluid
-          data = fluid.layers.zeros(shape=[1], dtype='int64')
+          data = fluid.layers.zeros(shape=[3, 2], dtype='float32') # [[0., 0.], [0., 0.], [0., 0.]]
     """
     return fill_constant(value=0.0, **locals())
 
 
 def reverse(x, axis):
     """
-    **reverse**
+    The OP reverses the tensor :attr:`x` along the given :attr:`axis`.
 
-    This function reverse the input 'x' along given axises.
-
-    Args:
-        x(Vairbale): the input to be reversed.
-        axis(int|tuple|list): Axis that along which order of elements
-                    is reversed. If it is a tuple or a list, reversing
-                    will be apply on each axis in the tuple or list.
+    Parameters:
+        x (Variable): A tensor to be reversed, its data type supports bool, float32, float64, int32, int64 and uint8.
+        axis (int|tuple|list): A dimension or a set of dimensions of :attr:`x` to reverse. Must be
+            in the range [-rank( :attr:`x` ), rank( :attr:`x` )). If it is a tuple or a list, reversing
+            will be apply on each axis in the tuple or list.
 
     Returns:
-        Variable: The reversed tensor.
+        Variable: The reversed tensor with the same shape and data type as :attr:`x`.
 
     Examples:
         .. code-block:: python
 
           import paddle.fluid as fluid
-          data = fluid.layers.data(name="data", shape=[4, 8], dtype="float32")
-          out = fluid.layers.reverse(x=data, axis=0)
-          # or:
-          out = fluid.layers.reverse(x=data, axis=[0,1])
+          import numpy as np
+          data = fluid.layers.assign(np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]], dtype='float32')) # [[0., 1., 2.], [3., 4., 5.], [6., 7., 8.]]
+          result1 = fluid.layers.reverse(data, 0) # [[6., 7., 8.], [3., 4., 5.], [0., 1., 2.]]
+          result2 = fluid.layers.reverse(data, [0, 1]) # [[8., 7., 6.], [5., 4., 3.], [2., 1., 0.]]
     """
     if isinstance(axis, int):
         axis = [axis]
-- 
GitLab