From ed2bb0519a1e620f36fa302550316dbb509c06d2 Mon Sep 17 00:00:00 2001
From: Nyakku Shigure <sigure.qaq@gmail.com>
Date: Fri, 23 Sep 2022 17:28:33 +0800
Subject: [PATCH] [CodeStyle][W191][E101] remove tabs in python files (#46288)

---
 .../distributed/fleet/dataset/dataset.py      |   2 +-
 .../incubate/nn/layer/fused_transformer.py    |   2 +-
 python/paddle/incubate/sparse/unary.py        |   2 +-
 python/paddle/nn/functional/common.py         | 188 +++++++++---------
 python/paddle/nn/functional/loss.py           |  51 +++--
 python/paddle/nn/layer/loss.py                |  12 +-
 python/paddle/nn/layer/norm.py                |  96 ++++-----
 python/paddle/optimizer/adagrad.py            |  26 +--
 python/paddle/optimizer/adam.py               |  26 +--
 python/paddle/optimizer/adamax.py             |  26 +--
 python/paddle/optimizer/adamw.py              |  12 +-
 python/paddle/tensor/attribute.py             |   6 -
 python/paddle/tensor/manipulation.py          |  20 +-
 python/paddle/text/datasets/conll05.py        |  12 +-
 14 files changed, 234 insertions(+), 247 deletions(-)

diff --git a/python/paddle/distributed/fleet/dataset/dataset.py b/python/paddle/distributed/fleet/dataset/dataset.py
index ed06a0db684..907a099f0e8 100755
--- a/python/paddle/distributed/fleet/dataset/dataset.py
+++ b/python/paddle/distributed/fleet/dataset/dataset.py
@@ -1493,7 +1493,7 @@ class BoxPSDataset(InMemoryDataset):
               filelist = ["a.txt", "b.txt"]
               dataset.set_filelist(filelist)
               dataset.load_into_memory()
-	    """
+        """
         self._prepare_to_run()
         self.boxps.load_into_memory()
 
diff --git a/python/paddle/incubate/nn/layer/fused_transformer.py b/python/paddle/incubate/nn/layer/fused_transformer.py
index ba14ac5b865..3af26db37a0 100644
--- a/python/paddle/incubate/nn/layer/fused_transformer.py
+++ b/python/paddle/incubate/nn/layer/fused_transformer.py
@@ -684,7 +684,7 @@ class FusedTransformerEncoderLayer(Layer):
 
         .. code-block:: python
 
-	    # required: gpu
+            # required: gpu
             import paddle
             from paddle.incubate.nn import FusedTransformerEncoderLayer
 
diff --git a/python/paddle/incubate/sparse/unary.py b/python/paddle/incubate/sparse/unary.py
index 621e31bc3e8..bb18a571547 100644
--- a/python/paddle/incubate/sparse/unary.py
+++ b/python/paddle/incubate/sparse/unary.py
@@ -511,7 +511,7 @@ def coalesce(x):
             #[[0, 1], [1, 2]]
             print(sp_x.values())
             #[3.0, 3.0]
-	"""
+    """
     return _C_ops.sparse_coalesce(x)
 
 
diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py
index 007f2ee1853..34818152f9a 100644
--- a/python/paddle/nn/functional/common.py
+++ b/python/paddle/nn/functional/common.py
@@ -347,23 +347,23 @@ def interpolate(x,
     Examples:
         .. code-block:: python
 
-		import paddle
-		import paddle.nn.functional as F
-
-		input_data = paddle.randn(shape=(2,3,6,10)).astype(paddle.float32)
-		output_1 = F.interpolate(x=input_data, size=[12,12])
-		print(output_1.shape)
-		    # [2L, 3L, 12L, 12L]
-
-		# given scale
-		output_2 = F.interpolate(x=input_data, scale_factor=[2,1])
-		print(output_2.shape)
-		# [2L, 3L, 12L, 10L]
-
-		# bilinear interp
-		output_3 = F.interpolate(x=input_data, scale_factor=[2,1], mode="bilinear")
-		print(output_2.shape)
-		# [2L, 3L, 12L, 10L]
+            import paddle
+            import paddle.nn.functional as F
+
+            input_data = paddle.randn(shape=(2,3,6,10)).astype(paddle.float32)
+            output_1 = F.interpolate(x=input_data, size=[12,12])
+            print(output_1.shape)
+            # [2L, 3L, 12L, 12L]
+
+            # given scale
+            output_2 = F.interpolate(x=input_data, scale_factor=[2,1])
+            print(output_2.shape)
+            # [2L, 3L, 12L, 10L]
+
+            # bilinear interp
+            output_3 = F.interpolate(x=input_data, scale_factor=[2,1], mode="bilinear")
+            print(output_2.shape)
+            # [2L, 3L, 12L, 10L]
     """
     data_format = data_format.upper()
     resample = mode.upper()
@@ -818,17 +818,17 @@ def upsample(x,
         or 5-D Tensor of the shape (num_batches, channels, out_d, out_h, out_w) or (num_batches, out_d, out_h, out_w, channels).
 
         Examples:
-        .. code-block:: python
+            .. code-block:: python
 
-		import paddle
-		import paddle.nn as nn
+                import paddle
+                import paddle.nn as nn
 
-		input_data = paddle.randn(shape=(2,3,6,10)).astype(paddle.float32)
-		upsample_out = paddle.nn.Upsample(size=[12,12])
+                input_data = paddle.randn(shape=(2,3,6,10)).astype(paddle.float32)
+                upsample_out = paddle.nn.Upsample(size=[12,12])
 
-		output = upsample_out(x=input_data)
-		print(output.shape)
-		# [2L, 3L, 12L, 12L]
+                output = upsample_out(x=input_data)
+                print(output.shape)
+                # [2L, 3L, 12L, 12L]
 
     """
     return interpolate(x, size, scale_factor, mode, align_corners, align_mode,
@@ -842,30 +842,30 @@ def bilinear(x1, x2, weight, bias=None, name=None):
     See :ref:`api_nn_Bilinear` for details and output shape.
 
     Parameters:
-       x1 (Tensor): the first input tensor, it's data type should be float32, float64.
-       x2 (Tensor): the second input tensor, it's data type should be float32, float64.
-       weight (Parameter): The learnable weights of this layer, shape is [out_features, in1_features, in2_features].
-       bias (Parameter, optional): The learnable bias(Bias) of this layer, shape is [1, out_features]. If it is set to None, no bias will be added to the output units. The default value is None.
-       name (str, optional): The default value is None. Normally there is no need for user
-           to set this property. For more information, please refer to :ref:`api_guide_Name`. Default: None.
+        x1 (Tensor): the first input tensor, it's data type should be float32, float64.
+        x2 (Tensor): the second input tensor, it's data type should be float32, float64.
+        weight (Parameter): The learnable weights of this layer, shape is [out_features, in1_features, in2_features].
+        bias (Parameter, optional): The learnable bias(Bias) of this layer, shape is [1, out_features]. If it is set to None, no bias will be added to the output units. The default value is None.
+        name (str, optional): The default value is None. Normally there is no need for user
+            to set this property. For more information, please refer to :ref:`api_guide_Name`. Default: None.
 
     Returns:
-       Tensor: A 2-D Tensor of shape [batch_size, out_features].
+        Tensor: A 2-D Tensor of shape [batch_size, out_features].
 
     Examples:
-       .. code-block:: python
+        .. code-block:: python
 
-		import paddle
-		import paddle.nn.functional as F
+            import paddle
+            import paddle.nn.functional as F
 
-		x1 = paddle.randn((5, 5)).astype(paddle.float32)
-		x2 = paddle.randn((5, 4)).astype(paddle.float32)
-		w = paddle.randn((1000, 5, 4)).astype(paddle.float32)
-		b = paddle.randn((1, 1000)).astype(paddle.float32)
+            x1 = paddle.randn((5, 5)).astype(paddle.float32)
+            x2 = paddle.randn((5, 4)).astype(paddle.float32)
+            w = paddle.randn((1000, 5, 4)).astype(paddle.float32)
+            b = paddle.randn((1, 1000)).astype(paddle.float32)
 
-		result = F.bilinear(x1, x2, w, b)
-		print(result.shape)
-		# [5, 1000]
+            result = F.bilinear(x1, x2, w, b)
+            print(result.shape)
+            # [5, 1000]
     """
 
     if in_dygraph_mode():
@@ -1008,38 +1008,38 @@ def dropout(x,
 
         .. code-block:: python
 
-		import paddle
-
-		x = paddle.to_tensor([[1,2,3], [4,5,6]]).astype(paddle.float32)
-		y_train = paddle.nn.functional.dropout(x, 0.5)
-		y_test = paddle.nn.functional.dropout(x, 0.5, training=False)
-		y_0 = paddle.nn.functional.dropout(x, axis=0)
-		y_1 = paddle.nn.functional.dropout(x, axis=1)
-		y_01 = paddle.nn.functional.dropout(x, axis=[0,1])
-		print(x)
-		# Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
-		#        [[1., 2., 3.],
-		#         [4., 5., 6.]])
-		print(y_train)
-		# Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
-		#        [[2. , 0. , 6. ],
-		#         [8. , 0. , 12.]])
-		print(y_test)
-		# Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
-		#        [[1., 2., 3.],
-		#         [4., 5., 6.]])
-		print(y_0)
-		# Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
-		#        [[0. , 0. , 0. ],
-		#         [8. , 10., 12.]])
-		print(y_1)
-		# Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
-		#        [[2. , 0. , 6. ],
-		#         [8. , 0. , 12.]])
-		print(y_01)
-		# Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
-		#        [[0. , 0. , 0. ],
-		#         [8. , 0. , 12.]])
+            import paddle
+
+            x = paddle.to_tensor([[1,2,3], [4,5,6]]).astype(paddle.float32)
+            y_train = paddle.nn.functional.dropout(x, 0.5)
+            y_test = paddle.nn.functional.dropout(x, 0.5, training=False)
+            y_0 = paddle.nn.functional.dropout(x, axis=0)
+            y_1 = paddle.nn.functional.dropout(x, axis=1)
+            y_01 = paddle.nn.functional.dropout(x, axis=[0,1])
+            print(x)
+            # Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [[1., 2., 3.],
+            #         [4., 5., 6.]])
+            print(y_train)
+            # Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [[2. , 0. , 6. ],
+            #         [8. , 0. , 12.]])
+            print(y_test)
+            # Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [[1., 2., 3.],
+            #         [4., 5., 6.]])
+            print(y_0)
+            # Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [[0. , 0. , 0. ],
+            #         [8. , 10., 12.]])
+            print(y_1)
+            # Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [[2. , 0. , 6. ],
+            #         [8. , 0. , 12.]])
+            print(y_01)
+            # Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [[0. , 0. , 0. ],
+            #         [8. , 0. , 12.]])
 
     """
     if not isinstance(p, (float, int, Variable)):
@@ -1239,14 +1239,14 @@ def dropout3d(x, p=0.5, training=True, data_format='NCDHW', name=None):
     Examples:
         .. code-block:: python
 
-		import paddle
+            import paddle
 
-		x = paddle.randn(shape=(2, 3, 4, 5, 6)).astype(paddle.float32)
-		y_train = paddle.nn.functional.dropout3d(x)  #train
-		y_test = paddle.nn.functional.dropout3d(x, training=False) #test
-		print(x[0,0,:,:,:])
-		print(y_train[0,0,:,:,:]) # may all 0
-		print(y_test[0,0,:,:,:])
+            x = paddle.randn(shape=(2, 3, 4, 5, 6)).astype(paddle.float32)
+            y_train = paddle.nn.functional.dropout3d(x)  #train
+            y_test = paddle.nn.functional.dropout3d(x, training=False) #test
+            print(x[0,0,:,:,:])
+            print(y_train[0,0,:,:,:]) # may all 0
+            print(y_test[0,0,:,:,:])
 
     """
 
@@ -1287,19 +1287,19 @@ def alpha_dropout(x, p=0.5, training=True, name=None):
     Examples:
         .. code-block:: python
 
-		import paddle
-
-		x = paddle.to_tensor([[-1, 1], [-1, 1]]).astype(paddle.float32)
-		y_train = paddle.nn.functional.alpha_dropout(x, 0.5)
-		y_test = paddle.nn.functional.alpha_dropout(x, 0.5, training=False)
-		print(y_train)
-		# Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
-		#        [[-0.10721093, -0.77919382],
-		#         [-0.10721093,  1.66559887]]) (randomly)
-		print(y_test)
-		# Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
-		#        [[-1.,  1.],
-		#         [-1.,  1.]])
+            import paddle
+
+            x = paddle.to_tensor([[-1, 1], [-1, 1]]).astype(paddle.float32)
+            y_train = paddle.nn.functional.alpha_dropout(x, 0.5)
+            y_test = paddle.nn.functional.alpha_dropout(x, 0.5, training=False)
+            print(y_train)
+            # Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [[-0.10721093, -0.77919382],
+            #         [-0.10721093,  1.66559887]]) (randomly)
+            print(y_test)
+            # Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [[-1.,  1.],
+            #         [-1.,  1.]])
     """
     if not isinstance(p, (float, int)):
         raise TypeError("p argument should be a float or int")
diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py
index 8d0d64c7c61..31022690da3 100755
--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
@@ -472,10 +472,9 @@ def edit_distance(input,
         NOTE: This Api is different from fluid.metrics.EditDistance
 
     Returns:
-	Tuple:
-
-        distance(Tensor): edit distance result, its data type is float32, and its shape is (batch_size, 1).
-        sequence_num(Tensor): sequence number, its data type is float32, and its shape is (1,).
+        Tuple:
+            distance(Tensor): edit distance result, its data type is float32, and its shape is (batch_size, 1).
+            sequence_num(Tensor): sequence number, its data type is float32, and its shape is (1,).
 
     Examples:
         .. code-block:: python
@@ -2959,29 +2958,29 @@ def multi_label_soft_margin_loss(input,
         name (str, optional): Name for the operation (optional, default is None).
                 For more information, please refer to :ref:`api_guide_Name`.
 
-	Shape:
-            input: N-D Tensor, the shape is [N, \*], N is batch size and `\*` means number of classes, available dtype is float32, float64. The sum operationoperates over all the elements.
-            label: N-D Tensor, same shape as the input.
-            weight:N-D Tensor, the shape is [N,1]
-            output: scalar. If :attr:`reduction` is ``'none'``, then same shape as the input.
+    Shape:
+        input: N-D Tensor, the shape is [N, \*], N is batch size and `\*` means number of classes, available dtype is float32, float64. The sum operationoperates over all the elements.
+        label: N-D Tensor, same shape as the input.
+        weight:N-D Tensor, the shape is [N,1]
+        output: scalar. If :attr:`reduction` is ``'none'``, then same shape as the input.
 
-	Returns:
-            Tensor, The tensor variable storing the multi_label_soft_margin_loss of input and label.
+    Returns:
+        Tensor, The tensor variable storing the multi_label_soft_margin_loss of input and label.
 
-	Examples:
-            .. code-block:: python
+    Examples:
+        .. code-block:: python
 
-                import paddle
-                import paddle.nn.functional as F
-                input = paddle.to_tensor([[1, -2, 3], [0, -1, 2], [1, 0, 1]], dtype=paddle.float32)
-                # label elements in {1., -1.}
-                label = paddle.to_tensor([[-1, 1, -1], [1, 1, 1], [1, -1, 1]], dtype=paddle.float32)
-                loss = F.multi_label_soft_margin_loss(input, label, reduction='none')
-                print(loss)
-                # Tensor([3.49625897, 0.71111226, 0.43989015])
-                loss = F.multi_label_soft_margin_loss(input, label, reduction='mean')
-                print(loss)
-                # Tensor([1.54908717])
+            import paddle
+            import paddle.nn.functional as F
+            input = paddle.to_tensor([[1, -2, 3], [0, -1, 2], [1, 0, 1]], dtype=paddle.float32)
+            # label elements in {1., -1.}
+            label = paddle.to_tensor([[-1, 1, -1], [1, 1, 1], [1, -1, 1]], dtype=paddle.float32)
+            loss = F.multi_label_soft_margin_loss(input, label, reduction='none')
+            print(loss)
+            # Tensor([3.49625897, 0.71111226, 0.43989015])
+            loss = F.multi_label_soft_margin_loss(input, label, reduction='mean')
+            print(loss)
+            # Tensor([1.54908717])
     """
     if reduction not in ['sum', 'mean', 'none']:
         raise ValueError(
@@ -3266,8 +3265,8 @@ def triplet_margin_with_distance_loss(input,
 
         distance_function (callable, optional): Quantifies the distance between two tensors. if not specified, 2 norm functions will be used.
 
-	    margin (float, optional):Default: :math:`1`.A nonnegative margin representing the minimum difference
-            between the positive and negative distances required for the loss to be 0.
+        margin (float, optional): A nonnegative margin representing the minimum difference
+            between the positive and negative distances required for the loss to be 0. Default value is :math:`1`.
 
         swap (bool, optional):The distance swap changes the negative distance to the swap distance (distance between positive samples
                 and negative samples) if swap distance smaller than negative distance. Default: ``False``.
diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py
index 7b1415c1a50..711174c8a8c 100644
--- a/python/paddle/nn/layer/loss.py
+++ b/python/paddle/nn/layer/loss.py
@@ -1219,7 +1219,7 @@ class MultiLabelSoftMarginLoss(Layer):
         :math:`y` and :math:`x` must have the same size.
 
         Parameters:
-	        weight (Tensor,optional): a manual rescaling weight given to each class.
+            weight (Tensor,optional): a manual rescaling weight given to each class.
                     If given, has to be a Tensor of size C and the data type is float32, float64.
                     Default is ``'None'`` .
             reduction (str, optional): Indicate how to average the loss by batch_size,
@@ -1482,7 +1482,7 @@ class TripletMarginWithDistanceLoss(Layer):
     where the default `distance_function`
 
     .. math::
-    	d(x_i, y_i) = \left\lVert {\bf x}_i - {\bf y}_i \right\rVert_2
+        d(x_i, y_i) = \left\lVert {\bf x}_i - {\bf y}_i \right\rVert_2
 
     or user can define their own distance function. `margin` is a nonnegative margin representing the minimum difference
     between the positive and negative distances that is required for the loss to be 0. If `swap` is true, it will compare distance of (input, negative) with
@@ -1510,15 +1510,15 @@ class TripletMarginWithDistanceLoss(Layer):
 
     Shapes:
         input (Tensor):Input tensor, the data type is float32 or float64.
-	the shape is [N, \*], N is batch size and `\*` means any number of additional dimensions, available dtype is float32, float64.
+    the shape is [N, \*], N is batch size and `\*` means any number of additional dimensions, available dtype is float32, float64.
 
         positive (Tensor):Positive tensor, the data type is float32 or float64.
-	The shape of label is the same as the shape of input.
+    The shape of label is the same as the shape of input.
 
         negative (Tensor):Negative tensor, the data type is float32 or float64.
-	The shape of label is the same as the shape of input.
+    The shape of label is the same as the shape of input.
 
-	    output(Tensor): The tensor variable storing the triplet_margin_with_distance_loss of input and positive and negative.
+        output(Tensor): The tensor variable storing the triplet_margin_with_distance_loss of input and positive and negative.
 
     Return：
         A callable object of TripletMarginWithDistanceLoss
diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py
index 46b4f6adefd..fe553fd2741 100644
--- a/python/paddle/nn/layer/norm.py
+++ b/python/paddle/nn/layer/norm.py
@@ -134,15 +134,15 @@ Where `H` means height of feature map, `W` means width of feature map.
             numerical stability. Default is 1e-5.
         momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9.
         weight_attr(ParamAttr|bool, optional): The parameter attribute for Parameter `scale`
-             of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
-	     will create ParamAttr as weight_attr, the name of scale can be set in ParamAttr.
-	     If the Initializer of the weight_attr is not set, the parameter is initialized
-	     one. If it is set to False, will not create weight_attr. Default: None.
+            of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
+            will create ParamAttr as weight_attr, the name of scale can be set in ParamAttr.
+            If the Initializer of the weight_attr is not set, the parameter is initialized
+            one. If it is set to False, will not create weight_attr. Default: None.
         bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of instance_norm.
-             If it is set to None or one attribute of ParamAttr, instance_norm
-	     will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
-	     If the Initializer of the bias_attr is not set, the bias is initialized zero.
-             If it is set to False, will not create bias_attr. Default: None.
+            If it is set to None or one attribute of ParamAttr, instance_norm
+            will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
+            If the Initializer of the bias_attr is not set, the bias is initialized zero.
+            If it is set to False, will not create bias_attr. Default: None.
         data_format(str, optional): Specify the input data format, may be "NC", "NCL". Default "NCL".
         name(str, optional): Name for the InstanceNorm, default is None. For more information, please refer to :ref:`api_guide_Name`..
 
@@ -202,15 +202,15 @@ Where `H` means height of feature map, `W` means width of feature map.
             numerical stability. Default is 1e-5.
         momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9.
         weight_attr(ParamAttr|bool, optional): The parameter attribute for Parameter `scale`
-             of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
-	     will create ParamAttr as weight_attr, the name of scale can be set in ParamAttr.
-	     If the Initializer of the weight_attr is not set, the parameter is initialized
-	     one. If it is set to False, will not create weight_attr. Default: None.
+            of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
+            will create ParamAttr as weight_attr, the name of scale can be set in ParamAttr.
+            If the Initializer of the weight_attr is not set, the parameter is initialized
+            one. If it is set to False, will not create weight_attr. Default: None.
         bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of instance_norm.
-             If it is set to None or one attribute of ParamAttr, instance_norm
-	     will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
-	     If the Initializer of the bias_attr is not set, the bias is initialized zero.
-             If it is set to False, will not create bias_attr. Default: None.
+            If it is set to None or one attribute of ParamAttr, instance_norm
+            will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
+            If the Initializer of the bias_attr is not set, the bias is initialized zero.
+    `       If it is set to False, will not create bias_attr. Default: None.
         data_format(str, optional): Specify the input data format, could be "NCHW". Default: NCHW.
         name(str, optional): Name for the InstanceNorm, default is None. For more information, please refer to :ref:`api_guide_Name`..
 
@@ -226,13 +226,13 @@ Where `H` means height of feature map, `W` means width of feature map.
 
         .. code-block:: python
 
-          import paddle
+            import paddle
 
-          x = paddle.rand((2, 2, 2, 3))
-          instance_norm = paddle.nn.InstanceNorm2D(2)
-          instance_norm_out = instance_norm(x)
+            x = paddle.rand((2, 2, 2, 3))
+            instance_norm = paddle.nn.InstanceNorm2D(2)
+            instance_norm_out = instance_norm(x)
 
-          print(instance_norm_out)
+            print(instance_norm_out)
     """
 
     def _check_input_dim(self, input):
@@ -268,15 +268,15 @@ Where `H` means height of feature map, `W` means width of feature map.
             numerical stability. Default is 1e-5.
         momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9.
         weight_attr(ParamAttr|bool, optional): The parameter attribute for Parameter `scale`
-             of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
-	     will create ParamAttr as weight_attr, the name of scale can be set in ParamAttr.
-	     If the Initializer of the weight_attr is not set, the parameter is initialized
-	     one. If it is set to False, will not create weight_attr. Default: None.
+            of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
+            will create ParamAttr as weight_attr, the name of scale can be set in ParamAttr.
+            If the Initializer of the weight_attr is not set, the parameter is initialized
+            one. If it is set to False, will not create weight_attr. Default: None.
         bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of instance_norm.
-             If it is set to None or one attribute of ParamAttr, instance_norm
-	     will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
-	     If the Initializer of the bias_attr is not set, the bias is initialized zero.
-             If it is set to False, will not create bias_attr. Default: None.
+            If it is set to None or one attribute of ParamAttr, instance_norm
+            will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
+            If the Initializer of the bias_attr is not set, the bias is initialized zero.
+            If it is set to False, will not create bias_attr. Default: None.
         data_format(str, optional): Specify the input data format, could be "NCDHW". Default: NCDHW.
         name(str, optional): Name for the InstanceNorm, default is None. For more information, please refer to :ref:`api_guide_Name`..
 
@@ -292,13 +292,13 @@ Where `H` means height of feature map, `W` means width of feature map.
 
         .. code-block:: python
 
-          import paddle
+            import paddle
 
-          x = paddle.rand((2, 2, 2, 2, 3))
-          instance_norm = paddle.nn.InstanceNorm3D(2)
-          instance_norm_out = instance_norm(x)
+            x = paddle.rand((2, 2, 2, 2, 3))
+            instance_norm = paddle.nn.InstanceNorm3D(2)
+            instance_norm_out = instance_norm(x)
 
-          print(instance_norm_out.numpy)
+            print(instance_norm_out.numpy)
     """
 
     def _check_input_dim(self, input):
@@ -318,13 +318,13 @@ class GroupNorm(Layer):
         num_groups(int): The number of groups that divided from channels.
         num_channels(int): The number of channels of input.
         epsilon(float, optional): The small value added to the variance to prevent
-                                  division by zero. Default: 1e-05.
+            division by zero. Default: 1e-05.
         weight_attr(ParamAttr|bool, optional): The parameter attribute for the learnable
-                                         scale :math:`g`. If it is set to False, no scale will be added to the output units.
-                                         If it is set to None, the bias is initialized one. Default: None.
+            scale :math:`g`. If it is set to False, no scale will be added to the output units.
+            If it is set to None, the bias is initialized one. Default: None.
         bias_attr(ParamAttr|bool, optional): The parameter attribute for the learnable
-                                        bias :math:`b`. If it is set to False, no bias will be added to the output units.
-                                        If it is set to None, the bias is initialized zero. Default: None.
+            bias :math:`b`. If it is set to False, no bias will be added to the output units.
+            If it is set to None, the bias is initialized zero. Default: None.
         data_format(str, optional): Specify the input data format. Only NCHW is supported. Default: NCHW.
         name(str, optional): Name for the GroupNorm, default is None. For more information, please refer to :ref:`api_guide_Name`..
 
@@ -338,17 +338,17 @@ class GroupNorm(Layer):
     Examples:
         .. code-block:: python
 
-          import paddle
-          import numpy as np
+            import paddle
+            import numpy as np
 
-          paddle.disable_static()
-          np.random.seed(123)
-          x_data = np.random.random(size=(2, 6, 2, 2)).astype('float32')
-          x = paddle.to_tensor(x_data)
-          group_norm = paddle.nn.GroupNorm(num_channels=6, num_groups=6)
-          group_norm_out = group_norm(x)
+            paddle.disable_static()
+            np.random.seed(123)
+            x_data = np.random.random(size=(2, 6, 2, 2)).astype('float32')
+            x = paddle.to_tensor(x_data)
+            group_norm = paddle.nn.GroupNorm(num_channels=6, num_groups=6)
+            group_norm_out = group_norm(x)
 
-          print(group_norm_out.numpy())
+            print(group_norm_out.numpy())
     """
 
     def __init__(self,
diff --git a/python/paddle/optimizer/adagrad.py b/python/paddle/optimizer/adagrad.py
index 99de4243e52..180af110ac4 100644
--- a/python/paddle/optimizer/adagrad.py
+++ b/python/paddle/optimizer/adagrad.py
@@ -45,19 +45,19 @@ class Adagrad(Optimizer):
             It can be a float value or a ``Variable`` with a float type.
         epsilon (float, optional): A small float value for numerical stability.
             The default value is 1e-06.
-	parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \
-	    This parameter is required in dygraph mode. And you can specify different options for \
-            different parameter groups such as the learning rate, weight decay, etc, \
-            then the parameters are list of dict. Note that the learning_rate in paramter groups \
-            represents the scale of base learning_rate. \
-	    The default value is None in static mode, at this time all parameters will be updated.
-	weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
-	    It canbe a float value as coeff of L2 regularization or \
-	    :ref:`api_paddle_regularizer_L1Decay`, :ref:`api_paddle_regularizer_L2Decay`.
-	    If a parameter has set regularizer using :ref:`api_paddle_fluid_param_attr_aramAttr` already, \
-	    the regularization setting here in optimizer will be ignored for this parameter. \
-	    Otherwise, the regularization setting here in optimizer will take effect. \
-	    Default None, meaning there is no regularization.
+        parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``.
+            This parameter is required in dygraph mode. And you can specify different options for
+            different parameter groups such as the learning rate, weight decay, etc,
+            then the parameters are list of dict. Note that the learning_rate in paramter groups
+            represents the scale of base learning_rate.
+            The default value is None in static mode, at this time all parameters will be updated.
+        weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization.
+            It canbe a float value as coeff of L2 regularization or
+            :ref:`api_paddle_regularizer_L1Decay`, :ref:`api_paddle_regularizer_L2Decay`.
+            If a parameter has set regularizer using :ref:`api_paddle_fluid_param_attr_aramAttr` already,
+            the regularization setting here in optimizer will be ignored for this parameter.
+            Otherwise, the regularization setting here in optimizer will take effect.
+            Default None, meaning there is no regularization.
         grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of
             some derived class of ``GradientClipBase`` . There are three cliping strategies,
             ClipGradByGlobalNorm, ClipGradByNorm and ClipGradByValue. Default None,
diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py
index 26d082690b7..4f8122121b6 100644
--- a/python/paddle/optimizer/adam.py
+++ b/python/paddle/optimizer/adam.py
@@ -67,19 +67,19 @@ class Adam(Optimizer):
         epsilon (float|Tensor, optional): A small float value for numerical stability.
             It should be a float number or a Tensor with shape [1] and data type as float32.
             The default value is 1e-08.
-	parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \
-	    This parameter is required in dygraph mode. And you can specify different options for \
-            different parameter groups such as the learning rate, weight decay, etc, \
-            then the parameters are list of dict. Note that the learning_rate in paramter groups \
-            represents the scale of base learning_rate. \
-	    The default value is None in static mode, at this time all parameters will be updated.
-	weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
-	    It canbe a float value as coeff of L2 regularization or \
-	    :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.
-	    If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \
-	    the regularization setting here in optimizer will be ignored for this parameter. \
-	    Otherwise, the regularization setting here in optimizer will take effect. \
-	    Default None, meaning there is no regularization.
+        parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``.
+            This parameter is required in dygraph mode. And you can specify different options for
+            different parameter groups such as the learning rate, weight decay, etc,
+            then the parameters are list of dict. Note that the learning_rate in paramter groups
+            represents the scale of base learning_rate.
+            The default value is None in static mode, at this time all parameters will be updated.
+        weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization.
+            It canbe a float value as coeff of L2 regularization or
+            :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.
+            If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already,
+            the regularization setting here in optimizer will be ignored for this parameter.
+            Otherwise, the regularization setting here in optimizer will take effect.
+            Default None, meaning there is no regularization.
         grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of
             some derived class of ``GradientClipBase`` . There are three cliping strategies
             ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` ,
diff --git a/python/paddle/optimizer/adamax.py b/python/paddle/optimizer/adamax.py
index 03f766e646d..86b4ed97d1c 100644
--- a/python/paddle/optimizer/adamax.py
+++ b/python/paddle/optimizer/adamax.py
@@ -57,19 +57,19 @@ class Adamax(Optimizer):
             The default value is 0.999.
         epsilon (float, optional): A small float value for numerical stability.
             The default value is 1e-08.
-	parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \
-	    This parameter is required in dygraph mode. And you can specify different options for \
-            different parameter groups such as the learning rate, weight decay, etc, \
-            then the parameters are list of dict. Note that the learning_rate in paramter groups \
-            represents the scale of base learning_rate. \
-	    The default value is None in static mode, at this time all parameters will be updated.
-	weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
-	    It canbe a float value as coeff of L2 regularization or \
-	    :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.
-	    If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \
-	    the regularization setting here in optimizer will be ignored for this parameter. \
-	    Otherwise, the regularization setting here in optimizer will take effect. \
-	    Default None, meaning there is no regularization.
+        parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``.
+            This parameter is required in dygraph mode. And you can specify different options for
+            different parameter groups such as the learning rate, weight decay, etc,
+            then the parameters are list of dict. Note that the learning_rate in paramter groups
+            represents the scale of base learning_rate.
+            The default value is None in static mode, at this time all parameters will be updated.
+        weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization.
+            It canbe a float value as coeff of L2 regularization or
+            :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.
+            If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already,
+            the regularization setting here in optimizer will be ignored for this parameter.
+            Otherwise, the regularization setting here in optimizer will take effect.
+            Default None, meaning there is no regularization.
         grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of
             some derived class of ``GradientClipBase`` . There are three cliping strategies
             ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` ,
diff --git a/python/paddle/optimizer/adamw.py b/python/paddle/optimizer/adamw.py
index 4c13b8f7897..1c0dbb3134c 100644
--- a/python/paddle/optimizer/adamw.py
+++ b/python/paddle/optimizer/adamw.py
@@ -54,12 +54,12 @@ class AdamW(Optimizer):
     Args:
         learning_rate (float|LRScheduler, optional): The learning rate used to update ``Parameter``.
             It can be a float value or a LRScheduler. The default value is 0.001.
-        parameters (list|tuple, optional): List/Tuple of ``Tensor`` names to update to minimize ``loss``. \
-            This parameter is required in dygraph mode. And you can specify different options for \
-            different parameter groups such as the learning rate, weight decay, etc, \
-            then the parameters are list of dict. Note that the learning_rate in paramter groups \
-            represents the scale of base learning_rate. \
-	    The default value is None in static mode, at this time all parameters will be updated.
+        parameters (list|tuple, optional): List/Tuple of ``Tensor`` names to update to minimize ``loss``.
+            This parameter is required in dygraph mode. And you can specify different options for
+            different parameter groups such as the learning rate, weight decay, etc,
+            then the parameters are list of dict. Note that the learning_rate in paramter groups
+            represents the scale of base learning_rate.
+            The default value is None in static mode, at this time all parameters will be updated.
         beta1 (float|Tensor, optional): The exponential decay rate for the 1st moment estimates.
             It should be a float number or a Tensor with shape [1] and data type as float32.
             The default value is 0.9.
diff --git a/python/paddle/tensor/attribute.py b/python/paddle/tensor/attribute.py
index d480f24a68b..79b7e2dc9d9 100644
--- a/python/paddle/tensor/attribute.py
+++ b/python/paddle/tensor/attribute.py
@@ -63,12 +63,6 @@ def rank(input):
 
 def shape(input):
     """
-    :alias_main: paddle.shape
-	:alias: paddle.shape,paddle.tensor.shape,paddle.tensor.attribute.shape
-	:old_api: paddle.fluid.layers.shape
-
-    **Shape Layer**
-
     Get the shape of the input.
 
     .. code-block:: text
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
index e0076c8964d..8f72efbfffe 100644
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -416,12 +416,6 @@ def transpose(x, perm, name=None):
 
 def unstack(x, axis=0, num=None):
     """
-    :alias_main: paddle.unstack
-	:alias: paddle.unstack,paddle.tensor.unstack,paddle.tensor.manipulation.unstack
-	:old_api: paddle.fluid.layers.unstack
-
-    **UnStack Layer**
-
     This layer unstacks input Tensor :code:`x` into several Tensors along :code:`axis`.
 
     If :code:`axis` < 0, it would be replaced with :code:`axis+rank(x)`.
@@ -1693,12 +1687,12 @@ def stack(x, axis=0, name=None):
             #  [[3., 4.]],
             #  [[5., 6.]]]
 
-	    out = paddle.stack([x1, x2, x3], axis=-2)
-	    print(out.shape)  # [1, 3, 2]
-	    print(out)
-	    # [[[1., 2.],
-	    #   [3., 4.],
-	    #   [5., 6.]]]
+        out = paddle.stack([x1, x2, x3], axis=-2)
+        print(out.shape)  # [1, 3, 2]
+        print(out)
+        # [[[1., 2.],
+        #   [3., 4.],
+        #   [5., 6.]]]
     """
     axis = 0 if axis is None else axis
 
@@ -2663,7 +2657,7 @@ def scatter(x, index, updates, overwrite=True, name=None):
         overwrite (bool): The mode that updating the output when there are same indices.
 
             If True, use the overwrite mode to update the output of the same index,
-	        if False, use the accumulate mode to update the output of the same index.Default value is True.
+            if False, use the accumulate mode to update the output of the same index.Default value is True.
 
         name(str, optional): The default value is None. Normally there is no need for user to set this property.  For more information, please refer to :ref:`api_guide_Name` .
 
diff --git a/python/paddle/text/datasets/conll05.py b/python/paddle/text/datasets/conll05.py
index 119ce9fea51..862b0d0a4cb 100644
--- a/python/paddle/text/datasets/conll05.py
+++ b/python/paddle/text/datasets/conll05.py
@@ -302,10 +302,10 @@ class Conll05st(Dataset):
 
             .. code-block:: python
 
-            	from paddle.text.datasets import Conll05st
+                from paddle.text.datasets import Conll05st
 
-            	conll05st = Conll05st()
-            	word_dict, predicate_dict, label_dict = conll05st.get_dict()
+                conll05st = Conll05st()
+                word_dict, predicate_dict, label_dict = conll05st.get_dict()
         """
         return self.word_dict, self.predicate_dict, self.label_dict
 
@@ -317,9 +317,9 @@ class Conll05st(Dataset):
 
             .. code-block:: python
 
-            	from paddle.text.datasets import Conll05st
+                from paddle.text.datasets import Conll05st
 
-            	conll05st = Conll05st()
-            	emb_file = conll05st.get_embedding()
+                conll05st = Conll05st()
+                emb_file = conll05st.get_embedding()
         """
         return self.emb_file
-- 
GitLab