diff --git a/imperative/python/megengine/core/autodiff/grad.py b/imperative/python/megengine/core/autodiff/grad.py
index fbd5d2a647ba7c55e9a5da597d56bc122a2d9b67..072f09d2e633ebeb31f8c92de1548d31350dff97 100644
--- a/imperative/python/megengine/core/autodiff/grad.py
+++ b/imperative/python/megengine/core/autodiff/grad.py
@@ -91,6 +91,7 @@ class Function(ops.PyOpBase):
     Examples:
 
     .. code-block::
+
         class Sigmoid(Function):
             def forward(self, x):
                 y = 1 / (1 + F.exp(-x))
diff --git a/imperative/python/megengine/core/tensor/array_method.py b/imperative/python/megengine/core/tensor/array_method.py
index 689e760826897daf7ca218f1041fad2470a836a5..d489bf2b88bd5a5eadb267ef1f4cc695a09b6778 100644
--- a/imperative/python/megengine/core/tensor/array_method.py
+++ b/imperative/python/megengine/core/tensor/array_method.py
@@ -362,6 +362,9 @@ class ArrayMethodMixin(abc.ABC):
 
     @property
     def ndim(self):
+        r"""
+        Returns the number of dimensions of self :class:`~.Tensor`.
+        """
         shape = self._tuple_shape
         if shape is None:
             raise ValueError("unkown ndim")
@@ -369,6 +372,10 @@ class ArrayMethodMixin(abc.ABC):
 
     @property
     def size(self):
+        r"""
+        Returns the size of the self :class:`~.Tensor`. 
+        The returned value is a subclass of :class:`tuple`.
+        """
         shape = self.shape
         if shape.__class__ is tuple:
             return np.prod(self.shape).item()
@@ -376,9 +383,16 @@ class ArrayMethodMixin(abc.ABC):
 
     @property
     def T(self):
+        r"""
+        alias of :attr:`~.Tensor.transpose`.
+        """
         return self.transpose()
 
     def item(self, *args):
+        r"""
+        Returns the value of this :class:`~.Tensor` as a standard Python :class:`numbers.Number`. 
+        This only works for tensors with one element. For other cases, see :meth:`~.tolist`.
+        """
         if not args:
             if isinstance(self.size, int):
                 assert self.size == 1
@@ -386,12 +400,26 @@ class ArrayMethodMixin(abc.ABC):
         return self[args].item()
 
     def tolist(self):
+        r"""
+        Returns the tensor as a (nested) list. 
+        For scalars, a standard Python number is returned, just like with :meth:`~.item`. 
+        Tensors are automatically moved to the CPU first if necessary.
+
+        This operation is not differentiable.
+        """
         return self.numpy().tolist()
 
     def astype(self, dtype):
+        r"""
+        Returns a :class:`Tensor` with the same data and number of elements
+        with the specified :attr:`~.Tensor.dtype`.
+        """
         return utils.astype(self, dtype)
 
     def reshape(self, *args):
+        r"""
+        See :func:`~.reshape`.
+        """
         return _reshape(self, _expand_args(args))
 
     # FIXME: remove this method
@@ -399,6 +427,9 @@ class ArrayMethodMixin(abc.ABC):
         return _broadcast(self, _expand_args(args))
 
     def transpose(self, *args):
+        r"""
+        See :func:`~.transpose`.
+        """
         if self.ndim == 0:
             assert (
                 len(args) == 0
@@ -411,19 +442,22 @@ class ArrayMethodMixin(abc.ABC):
         return _transpose(self, _expand_args(args))
 
     def flatten(self):
+        r"""
+        See :func:`~.flatten`.
+        """
         return self.reshape(-1)
 
     def sum(self, axis=None, keepdims: bool = False):
         r"""
         Returns the sum of each row of the input tensor in the given dimension ``axis``.
+        
         If ``axis`` is a list of axises, reduce over all of them.
-
-        If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor, except in the dimension(s) ``axis`` where it is of size 1. Otherwise, ``axis`` is squeezed(see :meth:`~.functional.tensor.squeeze`).
-
-        Same for prod/mean/max/min.
+        If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor,
+        except in the dimension(s) ``axis`` where it is of size 1.
+        Otherwise, ``axis`` is squeezed (see :func:`~.squeeze`).
 
         :param axis: the dimension or dimensions to reduce.
-        :param keepdim: whether the output tensor has ndim retained or not.
+        :param keepdims: whether the output tensor has ndim retained or not.
         :return: output tensor.
 
         Examples:
@@ -441,12 +475,139 @@ class ArrayMethodMixin(abc.ABC):
         .. testoutput::
 
             2
-            10.
+            10.0
 
         """
         return _reduce("SUM")(self, axis, keepdims)
 
-    prod = _reduce("PRODUCT")
-    min = _reduce("MIN")
-    max = _reduce("MAX")
-    mean = _reduce("MEAN")
+    def prod(self, axis=None, keepdims: bool = False):
+        r"""
+        Returns the product of each row of the input tensor in the given dimension ``axis``.
+        
+        If ``axis`` is a list of axises, reduce over all of them.
+        If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor,
+        except in the dimension(s) ``axis`` where it is of size 1.
+        Otherwise, ``axis`` is squeezed (see :func:`~.squeeze`).
+
+        :param axis: the dimension or dimensions to reduce.
+        :param keepdims: whether the output tensor has ndim retained or not.
+        :return: output tensor.
+
+        Examples:
+
+        .. testcode::
+
+            from megengine import tensor
+            a = tensor([False, True, True, False])
+            b = tensor([1.0, 2.0, 3.0, 4.0])
+            print(a.prod().numpy())
+            print(b.prod().numpy())
+
+        Outputs:
+
+        .. testoutput::
+
+            0
+            24.0
+
+        """
+        return _reduce("PRODUCT")(self, axis, keepdims)
+
+    def min(self, axis=None, keepdims: bool = False):
+        r"""
+        Returns the min value of each row of the input tensor in the given dimension ``axis``.
+        
+        If ``axis`` is a list of axises, reduce over all of them.
+        If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor,
+        except in the dimension(s) ``axis`` where it is of size 1.
+        Otherwise, ``axis`` is squeezed (see :func:`~.squeeze`).
+
+        :param axis: the dimension or dimensions to reduce.
+        :param keepdims: whether the output tensor has ndim retained or not.
+        :return: output tensor.
+
+        Examples:
+
+        .. testcode::
+
+            from megengine import tensor
+            a = tensor([False, True, True, False])
+            b = tensor([1.0, 2.0, 3.0, 4.0])
+            print(a.min().numpy())
+            print(b.min().numpy())
+
+        Outputs:
+
+        .. testoutput::
+
+            False
+            1.0
+
+        """
+        return _reduce("MIN")(self, axis, keepdims)
+
+    def max(self, axis=None, keepdims: bool = False):
+        r"""
+        Returns the max value of each row of the input tensor in the given dimension ``axis``.
+        
+        If ``axis`` is a list of axises, reduce over all of them.
+        If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor,
+        except in the dimension(s) ``axis`` where it is of size 1.
+        Otherwise, ``axis`` is squeezed (see :func:`~.squeeze`).
+
+        :param axis: the dimension or dimensions to reduce.
+        :param keepdims: whether the output tensor has ndim retained or not.
+        :return: output tensor.
+
+        Examples:
+
+        .. testcode::
+
+            from megengine import tensor
+            a = tensor([False, True, True, False])
+            b = tensor([1.0, 2.0, 3.0, 4.0])
+            print(a.max().numpy())
+            print(b.max().numpy())
+
+        Outputs:
+
+        .. testoutput::
+
+            True
+            4.0
+
+        """
+        return _reduce("MAX")(self, axis, keepdims)
+
+    def mean(self, axis=None, keepdims: bool = False):
+        r"""
+        Returns the mean value of each row of the input tensor in the given dimension ``axis``.
+        
+        If ``axis`` is a list of axises, reduce over all of them.
+        If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor,
+        except in the dimension(s) ``axis`` where it is of size 1.
+        Otherwise, ``axis`` is squeezed (see :func:`~.squeeze`).
+
+        :param axis: the dimension or dimensions to reduce.
+        :param keepdims: whether the output tensor has ndim retained or not.
+        :return: output tensor.
+
+        Examples:
+
+        .. testcode::
+
+            from megengine import tensor
+            a = tensor([False, True, True, False])
+            b = tensor([1.0, 2.0, 3.0, 4.0])
+            print(a.mean().numpy())
+            print(b.mean().numpy())
+
+        Outputs:
+
+        .. testoutput::
+
+            0.5
+            2.5
+
+        """
+        return _reduce("MEAN")(self, axis, keepdims)
diff --git a/imperative/python/megengine/data/dataloader.py b/imperative/python/megengine/data/dataloader.py
index deafaec3a471c5591a5ff31c4688fbba356e93a6..f75e7a594f40bae8e5e390d6fd0647388176fb9c 100644
--- a/imperative/python/megengine/data/dataloader.py
+++ b/imperative/python/megengine/data/dataloader.py
@@ -42,6 +42,9 @@ def raise_timeout_error():
 
 
 class DataLoader:
+    r"""
+    Provides a convenient way to iterate on a given dataset.
+    """
     __initialized = False
 
     def __init__(
@@ -56,8 +59,6 @@ class DataLoader:
         divide: bool = False,
     ):
         r"""
-        Provides a convenient way to iterate on a given dataset.
-
         `DataLoader` combines a dataset with `sampler`, `transform` and `collator`,
         make it flexible to get minibatch continually from a dataset.
 
@@ -87,7 +88,6 @@ class DataLoader:
             different sub-process will process different batch. Default: False
 
         """
-
         if num_workers < 0:
             raise ValueError("num_workers should not be negative")
 
diff --git a/imperative/python/megengine/data/dataset/meta_dataset.py b/imperative/python/megengine/data/dataset/meta_dataset.py
index 779ab34094339cb6727fc7b5ad8e0e0076052d90..cf4567fda2b6a655e893d68f53afad0f229fe775 100644
--- a/imperative/python/megengine/data/dataset/meta_dataset.py
+++ b/imperative/python/megengine/data/dataset/meta_dataset.py
@@ -12,7 +12,8 @@ from typing import Tuple
 
 class Dataset(ABC):
     r"""
-    An abstract class for all datasets.
+    An abstract base class for all datasets.
+
     __getitem__ and __len__ method are aditionally needed.
     """
 
@@ -32,6 +33,7 @@ class Dataset(ABC):
 class StreamDataset(Dataset):
     r"""
     An abstract class for stream data.
+
     __iter__ method is aditionally needed.
     """
 
@@ -51,12 +53,14 @@ class StreamDataset(Dataset):
 
 
 class ArrayDataset(Dataset):
+    r"""
+    ArrayDataset is a dataset for numpy array data.
+
+    One or more numpy arrays are needed to initiate the dataset. 
+    And the dimensions represented sample number are expected to be the same.
+    """
+
     def __init__(self, *arrays):
-        r"""
-        ArrayDataset is a dataset for numpy array data, one or more numpy arrays
-         are needed to initiate the dataset. And the dimensions represented sample number
-         are expected to be the same.
-        """
         super().__init__()
         if not all(len(arrays[0]) == len(array) for array in arrays):
             raise ValueError("lengths of input arrays are inconsistent")
diff --git a/imperative/python/megengine/data/dataset/vision/cifar.py b/imperative/python/megengine/data/dataset/vision/cifar.py
index d5ff83caf12477db517ed8db3319a683a3f4dc39..81bc14331fd18ca35bc0685ff30633a378f19abb 100644
--- a/imperative/python/megengine/data/dataset/vision/cifar.py
+++ b/imperative/python/megengine/data/dataset/vision/cifar.py
@@ -21,7 +21,7 @@ logger = get_logger(__name__)
 
 
 class CIFAR10(VisionDataset):
-    r""" ``Dataset`` for CIFAR10 meta data.
+    r""" :class:`~.Dataset` for CIFAR10 meta data.
     """
 
     url_path = "http://www.cs.utoronto.ca/~kriz/"
@@ -138,6 +138,9 @@ class CIFAR10(VisionDataset):
 
 
 class CIFAR100(CIFAR10):
+    r""" :class:`~.Dataset` for CIFAR100 meta data.
+    """
+
     url_path = "http://www.cs.utoronto.ca/~kriz/"
     raw_file_name = "cifar-100-python.tar.gz"
     raw_file_md5 = "eb9058c3a382ffc7106e4002c42a8d85"
diff --git a/imperative/python/megengine/data/dataset/vision/folder.py b/imperative/python/megengine/data/dataset/vision/folder.py
index ab44141a60b4df8c9ce934f654833508d584c861..1ecafb80494e86134e6931af74c5b73e388de4b8 100644
--- a/imperative/python/megengine/data/dataset/vision/folder.py
+++ b/imperative/python/megengine/data/dataset/vision/folder.py
@@ -26,24 +26,25 @@ from .utils import is_img
 
 
 class ImageFolder(VisionDataset):
-    def __init__(self, root: str, check_valid_func=None, class_name: bool = False):
-        r"""
-        ImageFolder is a class for loading image data and labels from a organized folder.
+    r"""
+    ImageFolder is a class for loading image data and labels from a organized folder.
+
+    The folder is expected to be organized as followed: root/cls/xxx.img_ext
 
-        The folder is expected to be organized as followed: root/cls/xxx.img_ext
+    Labels are indices of sorted classes in the root directory.
 
-        Labels are indices of sorted classes in the root directory.
+    :param root: root directory of an image folder.
+    :param loader: a function used to load image from path,
+                   if ``None``, default function that loads
+                   images with PIL will be called.
+    :param check_valid_func: a function used to check if files in folder are
+                             expected image files, if ``None``, default function
+                             that checks file extensions will be called.
+    :param class_name: if ``True``, return class name instead of class index.
 
-        :param root: root directory of an image folder.
-        :param loader: a function used to load image from path,
-                       if ``None``, default function that loads
-                       images with PIL will be called.
-        :param check_valid_func: a function used to check if files in folder are
-                                 expected image files, if ``None``, default function
-                                 that checks file extensions will be called.
-        :param class_name: if ``True``, return class name instead of class index.
+    """
 
-        """
+    def __init__(self, root: str, check_valid_func=None, class_name: bool = False):
         super().__init__(root, order=("image", "image_category"))
 
         self.root = root
diff --git a/imperative/python/megengine/data/dataset/vision/mnist.py b/imperative/python/megengine/data/dataset/vision/mnist.py
index 278d749bf1e96775e6244e6c1284c28e9de36a17..1e62f1071585618207888d4b17a1273f2d492b29 100644
--- a/imperative/python/megengine/data/dataset/vision/mnist.py
+++ b/imperative/python/megengine/data/dataset/vision/mnist.py
@@ -22,7 +22,7 @@ logger = get_logger(__name__)
 
 
 class MNIST(VisionDataset):
-    r""" ``Dataset`` for MNIST meta data.
+    r""" :class:`~.Dataset` for MNIST meta data.
     """
 
     url_path = "http://yann.lecun.com/exdb/mnist/"
diff --git a/imperative/python/megengine/data/sampler.py b/imperative/python/megengine/data/sampler.py
index 1646cd4d9802a4ce4deb3e02736a111aff6eec85..c31efb98d76bd6a65d90dd662346de9bc97d1e4c 100644
--- a/imperative/python/megengine/data/sampler.py
+++ b/imperative/python/megengine/data/sampler.py
@@ -18,7 +18,7 @@ import megengine.distributed as dist
 
 class Sampler(ABC):
     r"""
-    An abstract class for all Sampler
+    An abstract base class for all Sampler
     """
 
     @abstractmethod
@@ -27,6 +27,28 @@ class Sampler(ABC):
 
 
 class MapSampler(Sampler):
+    r"""
+    Sampler for map dataset.
+
+    :type dataset: `dataset`
+    :param dataset: dataset to sample from.
+    :type batch_size: positive integer
+    :param batch_size: batch size for batch method.
+    :type drop_last: bool
+    :param drop_last: set ``True`` to drop the last incomplete batch,
+        if the dataset size is not divisible by the batch size. If ``False`` and 
+        the size of dataset is not divisible by the batch_size, then the last batch will
+        be smaller. Default: False
+    :type num_samples: positive integer
+    :param num_samples: number of samples assigned to one rank.
+    :type world_size: positive integer
+    :param world_size: number of ranks.
+    :type rank: non-negative integer within 0 and world_size
+    :param rank: rank id, non-negative interger within 0 and ``world_size``.
+    :type seed: non-negative integer
+    :param seed: seed for random operators.
+    """
+
     def __init__(
         self,
         dataset,
@@ -37,27 +59,6 @@ class MapSampler(Sampler):
         rank=None,
         seed=None,
     ):
-        r"""
-        An abstract class for all sampler.
-
-        :type dataset: `dataset`
-        :param dataset: dataset to sample from.
-        :type batch_size: positive integer
-        :param batch_size: batch size for batch method.
-        :type drop_last: bool
-        :param drop_last: set ``True`` to drop the last incomplete batch,
-            if the dataset size is not divisible by the batch size. If ``False`` and 
-            the size of dataset is not divisible by the batch_size, then the last batch will
-            be smaller. Default: False
-        :type num_samples: positive integer
-        :param num_samples: number of samples assigned to one rank.
-        :type world_size: positive integer
-        :param world_size: number of ranks.
-        :type rank: non-negative integer within 0 and world_size
-        :param rank: rank id, non-negative interger within 0 and ``world_size``.
-        :type seed: non-negative integer
-        :param seed: seed for random operators.
-        """
         if (
             not isinstance(batch_size, int)
             or isinstance(batch_size, bool)
@@ -156,7 +157,7 @@ class MapSampler(Sampler):
 
 
 class StreamSampler(Sampler):
-    """
+    r"""
     Sampler for stream dataset.
 
     .. warning::
@@ -181,6 +182,10 @@ class StreamSampler(Sampler):
 
 
 class SequentialSampler(MapSampler):
+    r"""
+    Sample elements sequentially.
+    """
+
     def __init__(
         self,
         dataset,
@@ -190,9 +195,6 @@ class SequentialSampler(MapSampler):
         world_size=None,
         rank=None,
     ):
-        r"""
-        Sample elements sequentially.
-        """
         super().__init__(dataset, batch_size, drop_last, None, world_size, rank)
         if indices is not None and not isinstance(indices, collections.abc.Sequence):
             raise ValueError(
@@ -212,6 +214,10 @@ class SequentialSampler(MapSampler):
 
 
 class RandomSampler(MapSampler):
+    r"""
+    Sample elements randomly without replacement.
+    """
+
     def __init__(
         self,
         dataset,
@@ -222,9 +228,6 @@ class RandomSampler(MapSampler):
         rank=None,
         seed=None,
     ):
-        r"""
-        Sample elements randomly without replacement.
-        """
         super().__init__(dataset, batch_size, drop_last, None, world_size, rank, seed)
         if indices is not None and not isinstance(indices, collections.abc.Sequence):
             raise ValueError(
@@ -241,6 +244,13 @@ class RandomSampler(MapSampler):
 
 
 class ReplacementSampler(MapSampler):
+    r"""
+    Sample elements randomly with replacement.
+
+    :type weights: List
+    :param weights: weights for sampling indices, it could be unnormalized weights.
+    """
+
     def __init__(
         self,
         dataset,
@@ -252,12 +262,6 @@ class ReplacementSampler(MapSampler):
         rank=None,
         seed=None,
     ):
-        r"""
-        Sample elements randomly with replacement.
-
-        :type weights: List
-        :param weights: weights for sampling indices, it could be unnormalized weights.
-        """
         super().__init__(
             dataset, batch_size, drop_last, num_samples, world_size, rank, seed
         )
diff --git a/imperative/python/megengine/data/transform/vision/transform.py b/imperative/python/megengine/data/transform/vision/transform.py
index 7d2813ec852b36ad06cd0b0df4da28a381a13b49..c39bd37def11857efe87734de821fe8ddeee340c 100644
--- a/imperative/python/megengine/data/transform/vision/transform.py
+++ b/imperative/python/megengine/data/transform/vision/transform.py
@@ -410,6 +410,10 @@ class Resize(VisionTransform):
 
 
 class ShortestEdgeResize(VisionTransform):
+    r"""
+    Resize the input data with specified shortset edge.
+    """
+
     def __init__(
         self,
         min_size,
@@ -1010,6 +1014,15 @@ class ColorJitter(VisionTransform):
 
 
 class Lighting(VisionTransform):
+    r"""
+    Apply AlexNet-Style "lighting" augmentation to input data.
+
+    Input images are assumed to have 'RGB' channel order.
+
+    The degree of color jittering is randomly sampled via a normal distribution,
+    with standard deviation given by the scale parameter.
+    """
+
     def __init__(self, scale, *, order=None):
         super().__init__(order)
         if scale < 0:
diff --git a/imperative/python/megengine/distributed/__init__.py b/imperative/python/megengine/distributed/__init__.py
index 8607256a2b2cfca0eab637cfe9c4007942dd5d44..b6e8359325c1ca74d4d796bbcba16e546e850e34 100644
--- a/imperative/python/megengine/distributed/__init__.py
+++ b/imperative/python/megengine/distributed/__init__.py
@@ -8,6 +8,7 @@
 # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 from .group import (
     WORLD,
+    Group,
     get_backend,
     get_client,
     get_mm_server_addr,
diff --git a/imperative/python/megengine/distributed/group.py b/imperative/python/megengine/distributed/group.py
index 1ccce0fa74a573e30e10b70a3c561a181b80aedc..b99ba916be4501c4ffd12a5ccd1a62c8ce896fc3 100644
--- a/imperative/python/megengine/distributed/group.py
+++ b/imperative/python/megengine/distributed/group.py
@@ -29,6 +29,17 @@ _sd = None
 
 
 class Group:
+    r"""
+    Include ranked nodes running collective communication (See :mod:`~.functional.distributed`).
+
+    By default collectives operate on the default group (also called ``WORLD``) 
+    and require all processes to enter the distributed function call. 
+
+    :param proc_ranks: rank list of the group, the first one is root rank.
+
+    
+    """
+
     def __init__(self, proc_ranks):
         if len(proc_ranks) == 0:  # empty group
             self.proc_ranks = None
diff --git a/imperative/python/megengine/functional/nn.py b/imperative/python/megengine/functional/nn.py
index 91f484cefaf748941f64f9e9cc061ebfdfc5a202..33a8484cf08c50957468972bfc8f8c01a5f2210f 100644
--- a/imperative/python/megengine/functional/nn.py
+++ b/imperative/python/megengine/functional/nn.py
@@ -108,7 +108,7 @@ def conv2d(
     """
     2D convolution operation.
 
-    Refer to :class:`~.Conv2d` for more information.
+    Refer to :class:`~.module.Conv2d` for more information.
 
     :param inp: feature map of the convolution operation.
     :param weight: convolution kernel.
@@ -1046,9 +1046,9 @@ def warp_affine(
 
     .. note::
 
-    Here all available options for params are listed,
-    however it does not mean that you can use all the combinations.
-    On different platforms, different combinations are supported.
+       Here all available options for params are listed,
+       however it does not mean that you can use all the combinations.
+       On different platforms, different combinations are supported.
     """
     op = builtin.WarpAffine(
         border_mode=border_mode, border_val=border_val, format=format, imode=imode
@@ -1088,9 +1088,9 @@ def warp_perspective(
         Default: "LINEAR". Currently only support "LINEAR" mode.
     :return: output tensor.
 
-    Note:
+    .. note::
 
-    The transformation matrix is the inverse of that used by `cv2.warpPerspective`.
+       The transformation matrix is the inverse of that used by `cv2.warpPerspective`.
 
     Examples:
 
diff --git a/imperative/python/megengine/module/batch_matmul_activation.py b/imperative/python/megengine/module/batch_matmul_activation.py
index 67bd98ac3d457ba51cd2854e11df78dfb3e0a480..4f5c8ac62272c22fdae03441a68e1b33fe8e5fbc 100644
--- a/imperative/python/megengine/module/batch_matmul_activation.py
+++ b/imperative/python/megengine/module/batch_matmul_activation.py
@@ -15,7 +15,7 @@ from .module import Module
 
 class BatchMatMulActivation(Module):
     r"""
-    Batched MatMul with activation(only relu supported), no transpose anywhere.
+    Batched :func:`~.matmul` with activation(only :func:`~.relu` supported), no transpose anywhere.
     """
 
     def __init__(
diff --git a/imperative/python/megengine/module/concat.py b/imperative/python/megengine/module/concat.py
index 46eb1d6ce55ddebaa486be71ce61cfdc47b0fcdb..117f1a81c76c877af99212b95074c8c267c5e831 100644
--- a/imperative/python/megengine/module/concat.py
+++ b/imperative/python/megengine/module/concat.py
@@ -14,8 +14,8 @@ from .module import Module
 
 class Concat(Module):
     r"""
-    A :class:`~.Module` to do functional concat. Could be replaced with :class:`~.QATModule`
-    version :class:`~.qat.concat.Concat` using :func:`~.quantize.quantize_qat`.
+    A :class:`~.Module` to do functional :func:`~.concat`. Could be replaced with :class:`~.QATModule`
+    version :class:`~.qat.Concat` using :func:`~.quantize.quantize_qat`.
     """
 
     def forward(self, inps: Iterable[Tensor], axis: int = 0):
diff --git a/imperative/python/megengine/module/conv.py b/imperative/python/megengine/module/conv.py
index 1d25ca731f93ef81b6e721cfa6957bb41da90690..0d598e1260c271f86bb5bcc62a0ee45d1a236f81 100644
--- a/imperative/python/megengine/module/conv.py
+++ b/imperative/python/megengine/module/conv.py
@@ -100,7 +100,7 @@ class Conv1d(_ConvNd):
 
     For instance, given an input of the size :math:`(N, C_{\text{in}}, H)`,
     this layer generates an output of the size
-    :math:`(N, C_{\text{out}}, H_{\text{out}}})` through the
+    :math:`(N, C_{\text{out}}, H_{\text{out}})` through the
     process described as below:
 
     .. math::
@@ -130,7 +130,7 @@ class Conv1d(_ConvNd):
         spatial dimensions. Only zero-padding is supported. Default: 0
     :param dilation: dilation of the 1D convolution operation. Default: 1
     :param groups: number of groups into which the input and output channels are divided,
-    so as to perform a "grouped convolution". When ``groups`` is not 1,
+        so as to perform a "grouped convolution". When ``groups`` is not 1,
         ``in_channels`` and ``out_channels`` must be divisible by ``groups``,
         and there would be an extra dimension at the beginning of the weight's
         shape. Specifically, the shape of weight would be `(groups,
@@ -290,7 +290,7 @@ class Conv2d(_ConvNd):
         spatial dimensions. Only zero-padding is supported. Default: 0
     :param dilation: dilation of the 2D convolution operation. Default: 1
     :param groups: number of groups into which the input and output channels are divided,
-    so as to perform a "grouped convolution". When ``groups`` is not 1,
+        so as to perform a "grouped convolution". When ``groups`` is not 1,
         ``in_channels`` and ``out_channels`` must be divisible by ``groups``,
         and there would be an extra dimension at the beginning of the weight's
         shape. Specifically, the shape of weight would be `(groups,
@@ -422,7 +422,7 @@ class ConvTranspose2d(_ConvNd):
         spatial dimensions. Only zero-padding is supported. Default: 0
     :param dilation: dilation of the 2D convolution operation. Default: 1
     :param groups: number of groups into which the input and output channels are divided,
-    so as to perform a "grouped convolution". When ``groups`` is not 1,
+        so as to perform a "grouped convolution". When ``groups`` is not 1,
         ``in_channels`` and ``out_channels`` must be divisible by ``groups``,
         and there would be an extra dimension at the beginning of the weight's
         shape. Specifically, the shape of weight would be ``(groups,
@@ -592,9 +592,8 @@ class LocalConv2d(Conv2d):
 
 class ConvRelu2d(Conv2d):
     r"""
-    A fused :class:`~.Module` including Conv2d and relu. Could be replaced
-    with :class:`~.QATModule` version :class:`~.qat.conv.ConvRelu2d` using
-    :func:`~.quantize.quantize_qat`.
+    A fused :class:`~.Module` including :class:`~.module.Conv2d` and :func:`~.relu`.
+    Could be replaced with :class:`~.QATModule` version :class:`~.qat.ConvRelu2d` using :func:`~.quantize.quantize_qat`.
     """
 
     def forward(self, inp):
diff --git a/imperative/python/megengine/module/conv_bn.py b/imperative/python/megengine/module/conv_bn.py
index 2616c6ec1bc040c7bf1b10c613c18f2adaf19e18..390ec3b11c3407ab5000da141c74223451fc64eb 100644
--- a/imperative/python/megengine/module/conv_bn.py
+++ b/imperative/python/megengine/module/conv_bn.py
@@ -51,8 +51,8 @@ class _ConvBnActivation2d(Module):
 
 class ConvBn2d(_ConvBnActivation2d):
     r"""
-    A fused :class:`~.Module` including Conv2d, BatchNorm2d. Could be replaced
-    with :class:`~.QATModule` version :class:`~.qat.conv_bn.ConvBn2d` using
+    A fused :class:`~.Module` including :class:`~.module.Conv2d` and :class:`~.module.BatchNorm2d`. 
+    Could be replaced with :class:`~.QATModule` version :class:`~.qat.ConvBn2d` using
     :func:`~.quantize.quantize_qat`.
     """
 
@@ -62,9 +62,8 @@ class ConvBn2d(_ConvBnActivation2d):
 
 class ConvBnRelu2d(_ConvBnActivation2d):
     r"""
-    A fused :class:`~.Module` including Conv2d, BatchNorm2d and relu. Could be replaced
-    with :class:`~.QATModule` version :class:`~.qat.conv_bn.ConvBnRelu2d` using
-    :func:`~.quantize.quantize_qat`.
+    A fused :class:`~.Module` including :class:`~.module.Conv2d`, :class:`~.module.BatchNorm2d` and :func:`~.relu`. 
+    Could be replaced with :class:`~.QATModule` version :class:`~.qat.ConvBnRelu2d` using :func:`~.quantize.quantize_qat`.
     """
 
     def forward(self, inp):
diff --git a/imperative/python/megengine/module/elemwise.py b/imperative/python/megengine/module/elemwise.py
index d600d3de73748b2c69a9e783650c8b6942457bf9..a3879ab8d40ff6a418973e32d0f138128c4f1975 100644
--- a/imperative/python/megengine/module/elemwise.py
+++ b/imperative/python/megengine/module/elemwise.py
@@ -12,8 +12,8 @@ from .module import Module
 
 class Elemwise(Module):
     r"""
-    A :class:`~.Module` to do elemwise operator. Could be replaced with :class:`~.QATModule`
-    version :class:`~.qat.elemwise.Elemwise` using :func:`~.quantize.quantize_qat`.
+    A :class:`~.Module` to do :mod:`~.functional.elemwise` operator. Could be replaced with :class:`~.QATModule`
+    version :class:`~.qat.Elemwise` using :func:`~.quantize.quantize_qat`.
 
     :param method: the elemwise method, support the following string.
         It will do the normal elemwise operator for float.
diff --git a/imperative/python/megengine/module/qat/batch_matmul_activation.py b/imperative/python/megengine/module/qat/batch_matmul_activation.py
index 732ef1a1e19f318e907e4cecfc559595015c0a89..3de9e1753dcb369b6f444d0ba28b2474c964905a 100644
--- a/imperative/python/megengine/module/qat/batch_matmul_activation.py
+++ b/imperative/python/megengine/module/qat/batch_matmul_activation.py
@@ -12,6 +12,10 @@ from .module import QATModule
 
 
 class BatchMatMulActivation(Float.BatchMatMulActivation, QATModule):
+    r"""
+    A :class:`~.QATModule` :class:`~.module.BatchMatMulActivation` with QAT support.
+    """
+
     def forward(self, inp):
         w_qat = self.apply_quant_weight(self.weight)
         b_qat = fake_quant_bias(self.bias, inp, w_qat)
diff --git a/imperative/python/megengine/module/qat/concat.py b/imperative/python/megengine/module/qat/concat.py
index fd856c052a246fc21e025e9a4524857f628e23bb..818462ca3d452d33a9c83829cfea7a334429e3cc 100644
--- a/imperative/python/megengine/module/qat/concat.py
+++ b/imperative/python/megengine/module/qat/concat.py
@@ -14,7 +14,7 @@ from .module import QATModule
 
 class Concat(Float.Concat, QATModule):
     r"""
-    A :class:`~.QATModule` to do functional concat with QAT support.
+    A :class:`~.QATModule` to do functional :func:`~.concat` with QAT support.
     Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`.
     """
 
diff --git a/imperative/python/megengine/module/qat/conv.py b/imperative/python/megengine/module/qat/conv.py
index 8e5fe40809dfe4367f6bb2e5120dd449928f1c26..c5f842dcea904c124d42415a82e487354a73a4ac 100644
--- a/imperative/python/megengine/module/qat/conv.py
+++ b/imperative/python/megengine/module/qat/conv.py
@@ -13,7 +13,7 @@ from .module import QATModule
 
 class Conv2d(Float.Conv2d, QATModule):
     r"""
-    A :class:`~.QATModule` Conv2d with QAT support.
+    A :class:`~.QATModule` :class:`~.module.Conv2d` with QAT support.
     Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`.
     """
 
@@ -54,7 +54,7 @@ class Conv2d(Float.Conv2d, QATModule):
 
 class ConvRelu2d(Conv2d):
     r"""
-    A :class:`~.QATModule` include Conv2d and Relu with QAT support.
+    A :class:`~.QATModule` include :class:`~.module.Conv2d` and :func:`~.relu` with QAT support.
     Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`.
     """
 
diff --git a/imperative/python/megengine/module/qat/conv_bn.py b/imperative/python/megengine/module/qat/conv_bn.py
index e87f2f29d473ea739b6fbd8af6380bd92737896c..409c49b5d7637a39a008f2501d57cf83b3445598 100644
--- a/imperative/python/megengine/module/qat/conv_bn.py
+++ b/imperative/python/megengine/module/qat/conv_bn.py
@@ -164,7 +164,7 @@ class _ConvBnActivation2d(Float._ConvBnActivation2d, QATModule):
 
 class ConvBn2d(_ConvBnActivation2d):
     r"""
-    A fused :class:`~.QATModule` including Conv2d, BatchNorm2d with QAT support.
+    A fused :class:`~.QATModule` including :class:`~.module.Conv2d` and :class:`~.module.BatchNorm2d` with QAT support.
     Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`.
     """
 
@@ -174,7 +174,7 @@ class ConvBn2d(_ConvBnActivation2d):
 
 class ConvBnRelu2d(_ConvBnActivation2d):
     r"""
-    A fused :class:`~.QATModule` including Conv2d, BatchNorm2d and relu with QAT support.
+    A fused :class:`~.QATModule` including :class:`~.module.Conv2d`, :class:`~.module.BatchNorm2d` and :func:`~.relu` with QAT support.
     Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`.
     """
 
diff --git a/imperative/python/megengine/module/qat/elemwise.py b/imperative/python/megengine/module/qat/elemwise.py
index 144019546ff3a20f62ca2e14e27792c99a143c60..3692bdf18bb7d74652c9bb06ede5be6f9b266436 100644
--- a/imperative/python/megengine/module/qat/elemwise.py
+++ b/imperative/python/megengine/module/qat/elemwise.py
@@ -11,10 +11,10 @@ from .module import QATModule
 
 class Elemwise(Float.Elemwise, QATModule):
     r"""
-    A :class:`~.QATModule` to do elemwise operator with QAT support.
+    A :class:`~.QATModule` to do :mod:`~.functional.elemwise` operator with QAT support.
     Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`.
 
-    :param method: the elemwise method, see :class:`~.module.elemwise.Elemwise` for detail.
+    :param method: the elemwise method, see :class:`~.module.Elemwise` for detail.
     """
 
     with_weight = False
diff --git a/imperative/python/megengine/module/qat/linear.py b/imperative/python/megengine/module/qat/linear.py
index 598777d2bf6f152f57c073437bd54f281a516c2e..98bf2452d78727c1d807b1402e827fe7bd8e8f10 100644
--- a/imperative/python/megengine/module/qat/linear.py
+++ b/imperative/python/megengine/module/qat/linear.py
@@ -12,7 +12,7 @@ from .module import QATModule
 
 class Linear(Float.Linear, QATModule):
     r"""
-    A :class:`~.QATModule` version of :class:`~.module.linear.Linear`.
+    A :class:`~.QATModule` version of :class:`~.module.Linear`.
     Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`.
 
     :param in_features: size of each input sample.
diff --git a/imperative/python/megengine/module/qat/module.py b/imperative/python/megengine/module/qat/module.py
index b53a76d6629c7328dca1dcb7dcb4931cf9720d84..85eca44951290af5a7bf3a85ddb30f3533caf84b 100644
--- a/imperative/python/megengine/module/qat/module.py
+++ b/imperative/python/megengine/module/qat/module.py
@@ -14,9 +14,9 @@ from ..module import Module
 
 class QATModule(Module):
     r"""
-    Base class of quantized-float related Module, basically for QAT and Calibration.
+    Base class of quantized-float related :class:`~.Module`, basically for QAT and Calibration.
 
-    Use :meth:`~.QATModule.from_float_module` to generate a instance from float :class:`~.Module`.
+    Use :meth:`from_float_module` to generate a instance from float :class:`~.Module`.
     Or use :func:`~.quantize.quantize_qat` to do it recursively and automatically.
 
     Can also be converted to :class:`~.QuantizedModule` for deployment using
diff --git a/imperative/python/megengine/module/qat/quant_dequant.py b/imperative/python/megengine/module/qat/quant_dequant.py
index 3fd49c64bde4a9b948677e4e1c2d90276d295e87..e75a35b8e6faba5da222bcdd451023d86166762a 100644
--- a/imperative/python/megengine/module/qat/quant_dequant.py
+++ b/imperative/python/megengine/module/qat/quant_dequant.py
@@ -11,7 +11,7 @@ from .module import QATModule
 
 class QuantStub(Float.QuantStub, QATModule):
     r"""
-    A helper QATModule simply return input, but will quantize
+    A helper :class:`~.QATModule` simply return input, but will quantize
     input after converted to :class:`~.QuantizedModule`.
     """
 
@@ -31,7 +31,7 @@ class QuantStub(Float.QuantStub, QATModule):
 
 class DequantStub(Float.DequantStub, QATModule):
     r"""
-    A helper QATModule simply return input, but will de-quantize
+    A helper :class:`~.QATModule` simply return input, but will de-quantize
     input after converted to :class:`~.QuantizedModule`.
     """
 
diff --git a/imperative/python/megengine/module/quantized/batch_matmul_activation.py b/imperative/python/megengine/module/quantized/batch_matmul_activation.py
index 604a478d4611d49690507b91729d948e16fd4bf4..0ce763f5903bddfc5d0288ad5af7f272b3905ca9 100644
--- a/imperative/python/megengine/module/quantized/batch_matmul_activation.py
+++ b/imperative/python/megengine/module/quantized/batch_matmul_activation.py
@@ -19,6 +19,8 @@ from .module import QuantizedModule
 
 
 class BatchMatMulActivation(Float.BatchMatMulActivation, QuantizedModule):
+    r"""Quantized version of :class:`~.qat.BatchMatMulActivation`."""
+
     def __init__(
         self,
         batch: int,
diff --git a/imperative/python/megengine/module/quantized/concat.py b/imperative/python/megengine/module/quantized/concat.py
index a84e22ccb794c920d77656282e2197d41baedb98..f8cc6b8a13cdd98d0fb8624ad682710018cae6a9 100644
--- a/imperative/python/megengine/module/quantized/concat.py
+++ b/imperative/python/megengine/module/quantized/concat.py
@@ -15,7 +15,7 @@ from .module import QuantizedModule
 
 class Concat(QuantizedModule):
     r"""
-    A :class:`~.QuantizedModule` to do quantized concat, used for inference only.
+    A :class:`~.QuantizedModule` to do quantized :func:`~.concat`, used for inference only.
     """
 
     def __init__(self, dtype=None):
diff --git a/imperative/python/megengine/module/quantized/conv.py b/imperative/python/megengine/module/quantized/conv.py
index cda723ed683f1b2171f7149b3e766a9e5de33ce4..34e51a726e908250b234bf3bf78d452e46d0dc52 100644
--- a/imperative/python/megengine/module/quantized/conv.py
+++ b/imperative/python/megengine/module/quantized/conv.py
@@ -18,11 +18,11 @@ from .module import QuantizedModule
 
 
 class Conv2d(Float.Conv2d, QuantizedModule):
-    r"""Quantized version of :class:`~.qat.conv.Conv2d`."""
-    r"""
+    r"""Quantized version of :class:`~.qat.Conv2d`.
+
     Applies a 2D convolution over a quantized input tensor, used for inference only.
 
-    The parameter is same with :class: `~.Conv2d`.
+    The parameter is same with :class:`~.module.Conv2d`.
     """
 
     def __init__(
@@ -102,7 +102,7 @@ class Conv2d(Float.Conv2d, QuantizedModule):
 
 
 class ConvRelu2d(Conv2d):
-    r"""Quantized version of :class:`~.qat.conv.ConvRelu2d`."""
+    r"""Quantized version of :class:`~.qat.ConvRelu2d`."""
 
     def forward(self, inp):
         return self.calc_conv_quantized(inp, nonlinear_mode="RELU")
diff --git a/imperative/python/megengine/module/quantized/conv_bn.py b/imperative/python/megengine/module/quantized/conv_bn.py
index 79fb5e7e98459ceaacfe5ced23de1b79383cfd48..55b9466a01a9e066a69ecbce26549771c45c266f 100644
--- a/imperative/python/megengine/module/quantized/conv_bn.py
+++ b/imperative/python/megengine/module/quantized/conv_bn.py
@@ -14,7 +14,7 @@ class _ConvBnActivation2d(Conv2d):
     r"""
     Applies a 2D convolution over a quantized input tensor, used for inference only.
 
-    The parameter is same with :class: `~.Conv2d`.
+    The parameter is same with :class: `~.module.Conv2d`.
     """
 
     @classmethod
@@ -44,14 +44,14 @@ class _ConvBnActivation2d(Conv2d):
 
 
 class ConvBn2d(_ConvBnActivation2d):
-    r"""Quantized version of :class:`~.qat.conv_bn.ConvBn2d`."""
+    r"""Quantized version of :class:`~.qat.ConvBn2d`."""
 
     def forward(self, inp):
         return self.calc_conv_quantized(inp, nonlinear_mode="IDENTITY")
 
 
 class ConvBnRelu2d(_ConvBnActivation2d):
-    r"""Quantized version of :class:`~.qat.conv_bn.ConvBnRelu2d`."""
+    r"""Quantized version of :class:`~.qat.ConvBnRelu2d`."""
 
     def forward(self, inp):
         return self.calc_conv_quantized(inp, nonlinear_mode="RELU")
diff --git a/imperative/python/megengine/module/quantized/elemwise.py b/imperative/python/megengine/module/quantized/elemwise.py
index 7285b6cee47f3adc91d885cc2dfb1ad62e63e364..6a76c7b84911fcc21182e832c511370a417fcc9c 100644
--- a/imperative/python/megengine/module/quantized/elemwise.py
+++ b/imperative/python/megengine/module/quantized/elemwise.py
@@ -12,7 +12,7 @@ from .module import QuantizedModule
 
 
 class Elemwise(QuantizedModule):
-    r"""Quantized version of :class:`~.qat.elemwise.Elemwise`."""
+    r"""Quantized version of :class:`~.qat.Elemwise`."""
 
     def __init__(self, method, dtype=None):
         super().__init__()
diff --git a/imperative/python/megengine/module/quantized/linear.py b/imperative/python/megengine/module/quantized/linear.py
index 095e450155421d881628b95ae2089d5dc0328591..51a32581dfcb753c9a36b9eb0a80ac040950864a 100644
--- a/imperative/python/megengine/module/quantized/linear.py
+++ b/imperative/python/megengine/module/quantized/linear.py
@@ -15,7 +15,7 @@ from .module import QuantizedModule
 
 
 class Linear(QuantizedModule):
-    r"""Quantized version of :class:`~.qat.linear.Linear`."""
+    r"""Quantized version of :class:`~.qat.Linear`."""
 
     def __init__(self, dtype: np.dtype = None):
         super().__init__()
diff --git a/imperative/python/megengine/module/quantized/module.py b/imperative/python/megengine/module/quantized/module.py
index 0f24b336a5e8d6326c99d9c3540c2e4a8983237b..dad477ed56ed2737b54d05f2172f2eee2e9aff9a 100644
--- a/imperative/python/megengine/module/quantized/module.py
+++ b/imperative/python/megengine/module/quantized/module.py
@@ -13,8 +13,8 @@ from ..qat import QATModule
 
 class QuantizedModule(Module):
     r"""
-    Base class of quantized Module, which should be converted from QATModule
-    and not support traning.
+    Base class of quantized :class:`~.Module`, 
+    which should be converted from :class:`~.QATModule` and not support traning.
     """
 
     def __call__(self, *inputs, **kwargs):
diff --git a/imperative/python/megengine/module/quantized/quant_dequant.py b/imperative/python/megengine/module/quantized/quant_dequant.py
index 1f6daae9a3d18b4ac90033f454fe69bbca77d1c9..c8eadafeef62f49fb68432f5921f5df154c9e700 100644
--- a/imperative/python/megengine/module/quantized/quant_dequant.py
+++ b/imperative/python/megengine/module/quantized/quant_dequant.py
@@ -11,7 +11,7 @@ from .module import QuantizedModule
 
 class QuantStub(QuantizedModule):
     r"""
-    Quantized version of :class:`~.qat.quant_dequant.QuantStub`,
+    Quantized version of :class:`~.qat.QuantStub`,
     will convert input to quantized dtype.
     """
 
@@ -33,7 +33,7 @@ class QuantStub(QuantizedModule):
 
 class DequantStub(QuantizedModule):
     r"""
-    Quantized version of :class:`~.qat.quant_dequant.DequantStub`,
+    Quantized version of :class:`~.qat.DequantStub`,
     will restore quantized input to float32 dtype.
     """
 
diff --git a/imperative/python/megengine/tensor.py b/imperative/python/megengine/tensor.py
index fc0ca2d351ab8d146f8992f1644066725d10cfe3..9194c34bea5313674f63890a51281317cc0fb439 100644
--- a/imperative/python/megengine/tensor.py
+++ b/imperative/python/megengine/tensor.py
@@ -24,6 +24,10 @@ from .utils.naming import auto_naming
 
 
 class Tensor(_Tensor, ArrayMethodMixin):
+    r"""
+    A tensor object represents a multidimensional, homogeneous array of fixed-size items.
+    """
+
     grad = None
     dmap_callback = None
     _q_dict = None
@@ -59,6 +63,20 @@ class Tensor(_Tensor, ArrayMethodMixin):
 
     @property
     def shape(self) -> Union[tuple, "Tensor"]:
+        r"""
+        Returns a :class:`tuple` or a :class:`~.Tensor` represents tensor dimensions.
+
+        .. note::
+           
+           The shape of a tensor was usually represented by a :class:`tuple`.
+           But if a tensor was treated as symbolic placeholder with tracing, 
+           it's shape could also be a :class:`~.Tensor`. See :class:`~.trace` for more details.
+
+        The shape property is usually used to get the current shape of a tensor, 
+        but may also be used to reshape the tensor in-place by assigning a tuple of tensor dimensions to it. 
+        As with :func:`~.reshape`, one of the new shape dimensions can be -1, 
+        in which case its value is inferred from the size of the tensor and the remaining dimensions.
+        """
         shape = super().shape
         if shape == () or not use_symbolic_shape():
             return shape
@@ -68,8 +86,18 @@ class Tensor(_Tensor, ArrayMethodMixin):
     def _tuple_shape(self):
         return super().shape
 
+    @property
+    def device(self) -> CompNode:
+        r"""
+        Returns a string represents the device a :class:`~.Tensor` storaged on. 
+        """
+        return super().device
+
     @property
     def dtype(self) -> np.dtype:
+        r"""
+        Returns a :class:`numpy.dtype` object represents the data type of a :class:`~.Tensor`.
+        """
         return super().dtype
 
     @property
@@ -79,8 +107,17 @@ class Tensor(_Tensor, ArrayMethodMixin):
         return self._q_dict
 
     def numpy(self) -> np.ndarray:
+        r"""
+        Returns self :class:`~.Tensor` as a :class:`numpy.ndarray`.
+        """
         return super().numpy()
 
+    def detach(self):
+        r"""
+        Returns a new :class:`~.Tensor`, detached from the current graph.
+        """
+        return super().detach()
+
     def _reset(self, other):
         super()._reset(other)
 
@@ -113,6 +150,9 @@ class Tensor(_Tensor, ArrayMethodMixin):
         self *= 0
 
     def to(self, device):
+        r"""
+        Copy self :class:`~.Tensor` to specified device. See :func:`~.copy`
+        """
         if isinstance(device, str) and not _valid_device(device):
             raise ValueError(
                 "invalid device name {}. For the correct format of the device name, please refer to the instruction of megengine.device.set_default_device()".format(