diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
index 4b08e34de1d029b7059593bdd985a3a1e243effa..1ae3cd3213b152a2975daa444b09b1da9182f720 100644
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -1079,7 +1079,7 @@ paddle.fluid.regularizer.L1DecayRegularizer ('paddle.fluid.regularizer.L1DecayRe
 paddle.fluid.regularizer.L1DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.regularizer.L2DecayRegularizer ('paddle.fluid.regularizer.L2DecayRegularizer', ('document', 'e5d02740904686c1c50e8f80c1582861'))
 paddle.fluid.regularizer.L2DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
-paddle.fluid.LoDTensor ('paddle.fluid.core_avx.LoDTensor', ('document', '25e8432ed1b9a375868bc8911359aa0d'))
+paddle.fluid.LoDTensor ('paddle.fluid.core_avx.LoDTensor', ('document', '8ee00d246c952b92e5e8ca2d92a4fc00'))
 paddle.fluid.LoDTensor.__init__ 1. __init__(self: paddle.fluid.core_avx.LoDTensor, arg0: List[List[int]]) -> None  2. __init__(self: paddle.fluid.core_avx.LoDTensor) -> None
 paddle.fluid.LoDTensor.has_valid_recursive_sequence_lengths has_valid_recursive_sequence_lengths(self: paddle.fluid.core_avx.LoDTensor) -> bool
 paddle.fluid.LoDTensor.lod lod(self: paddle.fluid.core_avx.LoDTensor) -> List[List[int]]
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index 721c6be2a1f2770767711d752ef025e3aefbfff2..9858b6c8a189a457c98146dbeeafe2ae4140eee6 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -381,33 +381,92 @@ PYBIND11_MODULE(core_noavx, m) {
              return reinterpret_cast<uintptr_t>(self.mutable_data(place, type));
            })
       .def("_clear", &Tensor::clear)
-      .def("set", PyCPUTensorSetFromArray<float>)
-      .def("set", PyCPUTensorSetFromArray<int>)
-      .def("set", PyCPUTensorSetFromArray<double>)
-      .def("set", PyCPUTensorSetFromArray<int64_t>)
-      .def("set", PyCPUTensorSetFromArray<bool>)
-      .def("set", PyCPUTensorSetFromArray<uint16_t>)
-      .def("set", PyCPUTensorSetFromArray<uint8_t>)
-      .def("set", PyCPUTensorSetFromArray<int8_t>)
+      .def("set", PyCPUTensorSetFromArray<float>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCPUTensorSetFromArray<int>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCPUTensorSetFromArray<double>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCPUTensorSetFromArray<int64_t>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCPUTensorSetFromArray<bool>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCPUTensorSetFromArray<uint16_t>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCPUTensorSetFromArray<uint8_t>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCPUTensorSetFromArray<int8_t>, py::arg("array"),
+           py::arg("place"))
 #ifdef PADDLE_WITH_CUDA
-      .def("set", PyCUDATensorSetFromArray<float>)
-      .def("set", PyCUDATensorSetFromArray<int>)
-      .def("set", PyCUDATensorSetFromArray<double>)
-      .def("set", PyCUDATensorSetFromArray<int64_t>)
-      .def("set", PyCUDATensorSetFromArray<bool>)
-      .def("set", PyCUDATensorSetFromArray<uint16_t>)
-      .def("set", PyCUDATensorSetFromArray<uint8_t>)
-      .def("set", PyCUDATensorSetFromArray<int8_t>)
-      .def("set", PyCUDAPinnedTensorSetFromArray<float>)
-      .def("set", PyCUDAPinnedTensorSetFromArray<int>)
-      .def("set", PyCUDAPinnedTensorSetFromArray<double>)
-      .def("set", PyCUDAPinnedTensorSetFromArray<int64_t>)
-      .def("set", PyCUDAPinnedTensorSetFromArray<bool>)
-      .def("set", PyCUDAPinnedTensorSetFromArray<uint16_t>)
-      .def("set", PyCUDAPinnedTensorSetFromArray<uint8_t>)
-      .def("set", PyCUDAPinnedTensorSetFromArray<int8_t>)
+      .def("set", PyCUDATensorSetFromArray<float>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCUDATensorSetFromArray<int>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCUDATensorSetFromArray<double>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCUDATensorSetFromArray<int64_t>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCUDATensorSetFromArray<bool>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCUDATensorSetFromArray<uint16_t>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCUDATensorSetFromArray<uint8_t>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCUDATensorSetFromArray<int8_t>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCUDAPinnedTensorSetFromArray<float>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCUDAPinnedTensorSetFromArray<int>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCUDAPinnedTensorSetFromArray<double>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCUDAPinnedTensorSetFromArray<int64_t>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCUDAPinnedTensorSetFromArray<bool>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCUDAPinnedTensorSetFromArray<uint16_t>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCUDAPinnedTensorSetFromArray<uint8_t>, py::arg("array"),
+           py::arg("place"))
+      .def("set", PyCUDAPinnedTensorSetFromArray<int8_t>, py::arg("array"),
+           py::arg("place"), R"DOC(
+        Set the data of LoDTensor on place with given numpy array.
+        
+        Args:
+          lod (numpy.ndarray): The data to set.
+          place (CPUPlace|CUDAPlace|CUDAPinnedPlace): The place where the 
+          LoDTensor is to be set.
+
+        Returns:
+            None.
+
+        Examples:
+            .. code-block:: python
+
+                import paddle.fluid as fluid
+                import numpy as np
+
+                t = fluid.LoDTensor()
+                t.set(np.ndarray([5, 30]), fluid.CPUPlace())
+          )DOC")
 #endif
-      .def("shape", [](Tensor &self) { return vectorize(self.dims()); })
+      .def("shape", [](Tensor &self) { return vectorize(self.dims()); }, R"DOC(
+           Return the shape of LoDTensor.
+
+           Returns:
+               list[int]: The shape of LoDTensor.
+
+
+           Examples:
+               .. code-block:: python
+
+                  import paddle.fluid as fluid
+                  import numpy as np
+
+                  t = fluid.LoDTensor()
+                  t.set(np.ndarray([5, 30]), fluid.CPUPlace())
+                  print(t.shape())  # [5, 30]
+           )DOC")
       .def("_set_float_element", TensorSetElement<float>)
       .def("_get_float_element", TensorGetElement<float>)
       .def("_set_double_element", TensorSetElement<double>)
@@ -421,39 +480,82 @@ PYBIND11_MODULE(core_noavx, m) {
         return ostr.str();
       });
 
+  // TODO(cql): add reference: en_user_guide_lod_tensor
   py::class_<LoDTensor, Tensor>(m, "LoDTensor", R"DOC(
-    LoDTensor is a Tensor with optional LoD information.
-
-    np.array(lod_tensor) can convert LoDTensor to numpy array.
-    lod_tensor.lod() can retrieve the LoD information.
-
-    LoD is short for Level of Details and is usually used for varied sequence
-    length. You can skip the following comment if you don't need optional LoD.
-
-    For example, a LoDTensor X can look like the example below. It contains
-    2 sequences. The first has length 2 and the second has length 3, as
-    described by x.lod.
-
-    The first tensor dimension 5=2+3 is calculated from LoD if it's available.
-    It means the total number of sequence element. In X, each element has 2
-    columns, hence [5, 2].
-
-    x.lod  = [[2, 3]]
-
-    x.data = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
-
-    x.shape = [5, 2]
-
-    LoD can have multiple levels (for example, a paragraph can have multiple
-    sentences and a sentence can have multiple words). In the following
-    LodTensor Y, the lod_level is 2. It means there are 2 sequence, the
-    first sequence length is 2 (has 2 sub-sequences), the second one's
-    length is 1. The first sequence's 2 sub-sequences have length 2 and 2,
-    respectively. And the second sequence's 1 sub-sequence has length 3.
-
-    y.lod = [[2 1], [2 2 3]]
-
-    y.shape = [2+2+3, ...]
+    LoDTensor is a Tensor with optional LoD (Level of Details) information, 
+    it can be used for variable-length sequences, 
+    see :ref:`user_guide_lod_tensor` for details.
+
+    LoDTensor can be converted to numpy array using :code:`numpy.array(lod_tensor)`.
+
+    You can skip the following explanation if you don't need to know details 
+    of LoDTensor.
+
+    The following two examples show how to use LODtensor to represent 
+    variable-length sequences.
+    
+    Example 1:
+    
+    Suppose x is a LoDTensor representing a variable-length sequence. 
+    It contains two logical subsequences, the length of first logical sequence 
+    is 2 (e.g., number of samples is 2), the length of second logical sequence 
+    is 3, and the total length is 5. The data of the first logical sequence is 
+    [1, 2], [3, 4], and the data of the second logical sequence is [5, 6], 
+    [7, 8], [9, 10]. The data dimension of each sample is 2. So, the final 
+    shape of the LoDTensor is [5, 2], of which 5 is the total length and 2 is 
+    the dimension of each sample.
+    
+    Logically, we can represent the variable-length sequence in two ways: one 
+    is in the form of recursive sequence lengths, that is, 
+    x.recursive_sequence_lengths=[[2, 3]]; the other is in the form of offsets, 
+    that is, x.lod=[[0, 2, 2+3]]. These two representations are equivalent, and 
+    you can set and retrieve recursive_sequence_lengths or LoD through the 
+    corresponding interfaces of LoDTensor introduced later.
+
+    Actually, in order to access sequence faster, Paddle uses offset to store 
+    different lengths of sequences. 
+    Therefore, the operations on recursive_sequence_lengths will be converted 
+    to the operations on LoD eventually.
+    
+    .. code-block:: python
+
+      y.data = [[1, 2], [3, 4],
+                [5, 6], [7, 8],
+                [9, 10], [11, 12], [13, 14]]
+
+      y.shape = [2+2+3, 2]
+
+      y.recursive_sequence_lengths = [[2, 1], [2, 2, 3]]
+
+      y.lod = [[0, 2, 3], [0, 2, 4, 7]]
+
+    Example 2:
+
+    LoD may have more than one level (for example, a paragraph may have more 
+    than one sentence and a sentence may have more than one word). Suppose y 
+    is a LoDTensor and its lod_level is 2. 
+    From level = 0, there are two logical sequences, the length of which is 
+    2 and 1, respectively, indicating that the first logical sequence contains 
+    two sub-sequences and the second logical sequence contains one sub-sequence. 
+    From level = 1, the lengths of two sub-sequences contained by the first 
+    logical sequence is 2 and 2, and the length of sub-sequence contained by 
+    the second logical sequence is 3.
+      
+    Therefore, the LoDTensor is represented in the form of recursive sequence 
+    lengths as y.recursive_sequence_lengths=[[2,1], [2,2,3]]; and equally, in 
+    the form of offset, it is represented as y.lod=[[0,2,3], [0,2,4,7]].
+
+    .. code-block:: python
+
+      y.data = [[1, 2], [3, 4],
+                [5, 6], [7, 8],
+                [9, 10], [11, 12], [13, 14]]
+
+      y.shape = [2+2+3, 2]
+
+      y.recursive_sequence_lengths = [[2, 1], [2, 2, 3]]
+
+      y.lod = [[0, 2, 3], [0, 2, 4, 7]]
 
     Examples:
         .. code-block:: python
@@ -462,16 +564,6 @@ PYBIND11_MODULE(core_noavx, m) {
 
           t = fluid.LoDTensor()
 
-  Note:
-      In above description, LoD is length-based. In Paddle internal
-      implementation, lod is offset-based. Hence, internally,
-      y.lod is represented as [[0, 2, 3], [0, 2, 4, 7]] (length-based
-      equivlent would be [[2-0, 3-2], [2-0, 4-2, 7-4]]).
-
-      Sometimes LoD is called recursive_sequence_length to be more
-      self-explanatory. In this case, it must be length-based. Due to history
-      reasons. when LoD is called lod in public API, it might be offset-based.
-      Users should be careful about it.
         )DOC")
       .def("__array__", [](Tensor &self) { return TensorToPyArray(self); })
       .def("__init__",
@@ -510,7 +602,10 @@ PYBIND11_MODULE(core_noavx, m) {
            Set LoD of the LoDTensor.
 
            Args:
-               lod (List[List[int]]): the lod to be set.
+               lod (list[list[int]]): The lod to set.
+
+           Returns:
+                None.
 
            Examples:
                .. code-block:: python
@@ -521,6 +616,7 @@ PYBIND11_MODULE(core_noavx, m) {
                  t = fluid.LoDTensor()
                  t.set(np.ndarray([5, 30]), fluid.CPUPlace())
                  t.set_lod([[0, 2, 5]])
+                 print(t.lod()) # [[0, 2, 5]]
            )DOC")
       .def("set_recursive_sequence_lengths",
            [](LoDTensor &self, const std::vector<std::vector<size_t>>
@@ -539,14 +635,17 @@ PYBIND11_MODULE(core_noavx, m) {
              self.set_lod(new_offset_lod);
            },
            py::arg("recursive_sequence_lengths"), R"DOC(
-           Set LoD of the LoDTensor according to recursive sequence length.
+           Set LoD of the LoDTensor according to recursive sequence lengths.
 
-           For example, if recursive_sequence_lengths=[[2, 3]], meaning that
+           For example, if recursive_sequence_lengths=[[2, 3]], which means
            there are two sequences with length 2 and 3 respectively, the
-           corresponding lod would be [[0, 2, 2+3]], i.e, [[0, 2, 5]].
+           corresponding lod would be [[0, 2, 2+3]], i.e., [[0, 2, 5]].
 
            Args:
-                recursive_sequence_lengths (List[List[int]]): sequence lengths.
+                recursive_sequence_lengths (list[list[int]]): The recursive sequence lengths.
+           
+           Returns:
+                None.
 
            Examples:
                .. code-block:: python
@@ -557,6 +656,8 @@ PYBIND11_MODULE(core_noavx, m) {
                  t = fluid.LoDTensor()
                  t.set(np.ndarray([5, 30]), fluid.CPUPlace())
                  t.set_recursive_sequence_lengths([[2, 3]])
+                 print(t.recursive_sequence_length())  # [[2, 3]]
+                 print(t.lod())  # [[0, 2, 5]]
            )DOC")
       .def("lod",
            [](LoDTensor &self) -> std::vector<std::vector<size_t>> {
@@ -571,8 +672,8 @@ PYBIND11_MODULE(core_noavx, m) {
            Return the LoD of the LoDTensor.
 
            Returns:
-               out (List[List[int]]): the lod of the LoDTensor.
-
+               list[list[int]]: The lod of the LoDTensor.
+           
            Examples:
                .. code-block:: python
 
@@ -595,10 +696,11 @@ PYBIND11_MODULE(core_noavx, m) {
              return new_lod;
            },
            R"DOC(
-           Return the sequence length of the LoDTensor corresponding to LoD.
+           Return the recursive sequence lengths corresponding to of the LodD 
+           of the LoDTensor.
 
            Returns:
-               out (List[List[int]): the sequence lengths.
+                list[list[int]]: The recursive sequence lengths.
 
            Examples:
                .. code-block:: python
@@ -618,10 +720,10 @@ PYBIND11_MODULE(core_noavx, m) {
              return CheckLoD(self.lod(), vectorize(self.dims()).front());
            },
            R"DOC(
-           Check whether the lod of the LoDTensor is valid.
+           Check whether the LoD of the LoDTensor is valid.
 
            Returns:
-               out (bool): whether the lod is valid.
+               bool: Whether the LoD is valid.
 
            Examples:
                .. code-block:: python