diff --git a/python/paddle/autograd/py_layer.py b/python/paddle/autograd/py_layer.py
index e187ea171d65820a8e2a16393a59f3f423568509..08c4fb9ac8d5ddb38ef1b8dc0907f4b76b95a133 100644
--- a/python/paddle/autograd/py_layer.py
+++ b/python/paddle/autograd/py_layer.py
@@ -27,6 +27,10 @@ def with_mateclass(meta, *bases):
 
 
 class PyLayerContext:
+    """
+    ``PyLayerContext`` can assist the :ref:`api_paddle_autograd_PyLayer` in implementing certain functionalities.
+    """
+
     def save_for_backward(self, *tensors):
         """
         Saves given tensors that backward need. Use ``saved_tensor`` in the `backward` to get the saved tensors.
@@ -242,10 +246,32 @@ class PyLayerMeta(type):
 
 
 class PyLayer(with_mateclass(PyLayerMeta, core.eager.PyLayer, PyLayerContext)):
+    """
+    Paddle implements Python custom operators on the PaddlePaddle framework by creating a subclass of
+    ``PyLayer``, which must comply with the following rules:
+
+    1. The subclass must contain static ``forward`` and ``backward`` functions, with the first argument being
+    :ref:`api_paddle_autograd_PyLayerContext`. If a returned value in ``backward`` corresponds to a ``Tensor`` that
+    requires gradients in ``forward``, the returned value must be a ``Tensor``.
+
+    2. Except for the first argument, other arguments of ``backward`` are gradients of the output ``Tensors``
+    of ``forward``. Therefore, the number of input ``Tensor`` in ``backward`` must be the same as the number
+    of output ``Tensor`` in ``forward``. If you need to use input ``Tensor`` from ``forward`` in ``backward``,
+    you can save these ``Tensors`` by inputting them into :ref:`api_paddle_autograd_PyLayerContext`'s
+    ``save_for_backward`` method and use them in ``backward`` later.
+
+    3. The output of ``backward`` can be ``Tensor`` or ``list/tuple(Tensor)``, which are gradients of the
+    output ``Tensor`` of ``forward``. Therefore, the number of output ``Tensor`` in ``backward`` is the same
+    as the number of input ``Tensor`` in ``forward``.
+
+    After building the custom operator, apply it by running the ``apply`` method.
+
+    """
+
     @staticmethod
     def forward(ctx, *args, **kwargs):
         """
-        It is to be overloaded by subclasses. It must accept a object of `PyLayerContext` as
+        It is to be overloaded by subclasses. It must accept a object of :ref:`api_paddle_autograd_PyLayerContext` as
         the first argument, followed by any number of arguments (tensors or other types).
         `None` can not be included in the returned result.
 
@@ -285,9 +311,9 @@ class PyLayer(with_mateclass(PyLayerMeta, core.eager.PyLayer, PyLayerContext)):
     def backward(ctx, *args):
         """
         This is a function to calculate the gradient. It is to be overloaded by subclasses.
-        It must accept a object of `PyLayerContext` as the first argument, and the rest
-        arguments are the gradient of forward's output tensors. Output tensors of backward
-        are the gradient of forward's input tensors.
+        It must accept a object of :ref:`api_paddle_autograd_PyLayerContext` as the first
+        argument, and the rest arguments are the gradient of forward's output tensors.
+        Output tensors of backward are the gradient of forward's input tensors.
 
         Args:
             *args(tuple): The gradient of forward's output tensor(s).
diff --git a/python/paddle/nn/initializer/Bilinear.py b/python/paddle/nn/initializer/Bilinear.py
index b3a1766d07cccc35f16011fa91d97754fc80dbd3..9782521c959fc8615f4d402f0e8a8d26f807995c 100644
--- a/python/paddle/nn/initializer/Bilinear.py
+++ b/python/paddle/nn/initializer/Bilinear.py
@@ -27,7 +27,10 @@ class Bilinear(Initializer):
     """
     This initializer can be used in transposed convolution operator to
     act as upsampling. Users can upsample a feature map with shape of
-    (B, C, H, W) by any integer factor. The usage is:
+    (B, C, H, W) by any integer factor.
+
+    Returns:
+        Bilinear initializer instance objects.
 
     Examples:
 
diff --git a/python/paddle/tensor/logic.py b/python/paddle/tensor/logic.py
index b0421c0f6ee1483a8244d98dc7c5ea5e2ea1886d..bb2060e55cfa73843f37ab727bd84c9f7a3cdfb0 100644
--- a/python/paddle/tensor/logic.py
+++ b/python/paddle/tensor/logic.py
@@ -357,8 +357,7 @@ def allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False, name=None):
     .. math::
         \left| x - y \right| \leq atol + rtol \times \left| y \right|
 
-    elementwise, for all elements of :math:`x` and :math:`y`. The behaviour of this
-    operator is analogous to :math:`numpy.allclose`, namely that it returns :math:`True` if
+    elementwise, for all elements of :math:`x` and :math:`y`. This is analogous to :math:`numpy.allclose`, namely that it returns :math:`True` if
     two tensors are elementwise equal within a tolerance.
 
     Args:
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
index fed360474c803f1a951a96fc7ee4a834ba1869f8..7c4a8b38e1e16622d0ebacf6be0f429d3fc339b5 100644
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -2813,25 +2813,25 @@ def scatter(x, index, updates, overwrite=True, name=None):
 
     .. code-block:: python
 
-        import numpy as np
+        import paddle
         #input:
-        x = np.array([[1, 1], [2, 2], [3, 3]])
-        index = np.array([2, 1, 0, 1])
+        x = paddle.to_tensor([[1, 1], [2, 2], [3, 3]], dtype='float32')
+        index = paddle.to_tensor([2, 1, 0, 1], dtype='int64')
         # shape of updates should be the same as x
         # shape of updates with dim > 1 should be the same as input
-        updates = np.array([[1, 1], [2, 2], [3, 3], [4, 4]])
+        updates = paddle.to_tensor([[1, 1], [2, 2], [3, 3], [4, 4]], dtype='float32')
         overwrite = False
         # calculation:
         if not overwrite:
             for i in range(len(index)):
-                x[index[i]] = np.zeros((2))
+                x[index[i]] = paddle.zeros([2])
         for i in range(len(index)):
             if (overwrite):
                 x[index[i]] = updates[i]
             else:
                 x[index[i]] += updates[i]
         # output:
-        out = np.array([[3, 3], [6, 6], [1, 1]])
+        out = paddle.to_tensor([[3, 3], [6, 6], [1, 1]])
         out.shape # [3, 2]
 
     **NOTICE**: The order in which updates are applied is nondeterministic,
@@ -2841,10 +2841,10 @@ def scatter(x, index, updates, overwrite=True, name=None):
         x (Tensor): The input N-D Tensor with ndim>=1. Data type can be float32, float64.
         index (Tensor): The index is a 1-D or 0-D Tensor. Data type can be int32, int64. The length of index cannot exceed updates's length, and the value in index cannot exceed input's length.
         updates (Tensor): Update input with updates parameter based on index. When the index is a 1-D tensor, the updates shape should be the same as input, and dim value with dim > 1 should be the same as input. When the index is a 0-D tensor, the updates should be a (N-1)-D tensor, the ith dim of the updates should be queal with the (i+1)th dim of the input.
-        overwrite (bool): The mode that updating the output when there are same indices.
+        overwrite (bool, optional): The mode that updating the output when there are same indices.
 
             If True, use the overwrite mode to update the output of the same index,
-            if False, use the accumulate mode to update the output of the same index.Default value is True.
+            if False, use the accumulate mode to update the output of the same index. Default value is True.
 
         name(str, optional): The default value is None. Normally there is no need for user to set this property.  For more information, please refer to :ref:`api_guide_Name` .
 
diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
index 923a5322308bb6e971328da7adcb67e77a192b74..ad38da6df68ffb26d830d76fb8e587acc01e0d1b 100644
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -750,6 +750,9 @@ def floor_divide(x, y, name=None):
     .. math::
         out = trunc(x / y)
 
+    - :math:`x`: Multidimensional Tensor.
+    - :math:`y`: Multidimensional Tensor.
+
     Note:
         ``paddle.floor_divide`` supports broadcasting. If you want know more about broadcasting, please refer to `Introduction to Tensor`_ .