[CodeStyle][W191][E101] remove tabs in python files (#46288)

ed2bb051 · Nyakku Shigure · GitHub · 330b1a0a · ed2bb051 · ed2bb051
14 changed file
--- a/python/paddle/distributed/fleet/dataset/dataset.py
+++ b/python/paddle/distributed/fleet/dataset/dataset.py
@@ -1493,7 +1493,7 @@ class BoxPSDataset(InMemoryDataset):
              filelist = ["a.txt", "b.txt"]
              dataset.set_filelist(filelist)
              dataset.load_into_memory()
-	    """
+        """
        self._prepare_to_run()
        self.boxps.load_into_memory()


--- a/python/paddle/incubate/nn/layer/fused_transformer.py
+++ b/python/paddle/incubate/nn/layer/fused_transformer.py
@@ -684,7 +684,7 @@ class FusedTransformerEncoderLayer(Layer):

        .. code-block:: python

-	    # required: gpu
+            # required: gpu
            import paddle
            from paddle.incubate.nn import FusedTransformerEncoderLayer


--- a/python/paddle/incubate/sparse/unary.py
+++ b/python/paddle/incubate/sparse/unary.py
@@ -511,7 +511,7 @@ def coalesce(x):
            #[[0, 1], [1, 2]]
            print(sp_x.values())
            #[3.0, 3.0]
-	"""
+    """
    return _C_ops.sparse_coalesce(x)



--- a/python/paddle/nn/functional/common.py
+++ b/python/paddle/nn/functional/common.py
@@ -347,23 +347,23 @@ def interpolate(x,
    Examples:
        .. code-block:: python

-		import paddle
-		import paddle.nn.functional as F
-
-		input_data = paddle.randn(shape=(2,3,6,10)).astype(paddle.float32)
-		output_1 = F.interpolate(x=input_data, size=[12,12])
-		print(output_1.shape)
-		    # [2L, 3L, 12L, 12L]
-
-		# given scale
-		output_2 = F.interpolate(x=input_data, scale_factor=[2,1])
-		print(output_2.shape)
-		# [2L, 3L, 12L, 10L]
-
-		# bilinear interp
-		output_3 = F.interpolate(x=input_data, scale_factor=[2,1], mode="bilinear")
-		print(output_2.shape)
-		# [2L, 3L, 12L, 10L]
+            import paddle
+            import paddle.nn.functional as F
+
+            input_data = paddle.randn(shape=(2,3,6,10)).astype(paddle.float32)
+            output_1 = F.interpolate(x=input_data, size=[12,12])
+            print(output_1.shape)
+            # [2L, 3L, 12L, 12L]
+
+            # given scale
+            output_2 = F.interpolate(x=input_data, scale_factor=[2,1])
+            print(output_2.shape)
+            # [2L, 3L, 12L, 10L]
+
+            # bilinear interp
+            output_3 = F.interpolate(x=input_data, scale_factor=[2,1], mode="bilinear")
+            print(output_2.shape)
+            # [2L, 3L, 12L, 10L]
    """
    data_format = data_format.upper()
    resample = mode.upper()
@@ -818,17 +818,17 @@ def upsample(x,
        or 5-D Tensor of the shape (num_batches, channels, out_d, out_h, out_w) or (num_batches, out_d, out_h, out_w, channels).

        Examples:
-        .. code-block:: python
+            .. code-block:: python

-		import paddle
-		import paddle.nn as nn
+                import paddle
+                import paddle.nn as nn

-		input_data = paddle.randn(shape=(2,3,6,10)).astype(paddle.float32)
-		upsample_out = paddle.nn.Upsample(size=[12,12])
+                input_data = paddle.randn(shape=(2,3,6,10)).astype(paddle.float32)
+                upsample_out = paddle.nn.Upsample(size=[12,12])

-		output = upsample_out(x=input_data)
-		print(output.shape)
-		# [2L, 3L, 12L, 12L]
+                output = upsample_out(x=input_data)
+                print(output.shape)
+                # [2L, 3L, 12L, 12L]

    """
    return interpolate(x, size, scale_factor, mode, align_corners, align_mode,
@@ -842,30 +842,30 @@ def bilinear(x1, x2, weight, bias=None, name=None):
    See :ref:`api_nn_Bilinear` for details and output shape.

    Parameters:
-       x1 (Tensor): the first input tensor, it's data type should be float32, float64.
-       x2 (Tensor): the second input tensor, it's data type should be float32, float64.
-       weight (Parameter): The learnable weights of this layer, shape is [out_features, in1_features, in2_features].
-       bias (Parameter, optional): The learnable bias(Bias) of this layer, shape is [1, out_features]. If it is set to None, no bias will be added to the output units. The default value is None.
-       name (str, optional): The default value is None. Normally there is no need for user
-           to set this property. For more information, please refer to :ref:`api_guide_Name`. Default: None.
+        x1 (Tensor): the first input tensor, it's data type should be float32, float64.
+        x2 (Tensor): the second input tensor, it's data type should be float32, float64.
+        weight (Parameter): The learnable weights of this layer, shape is [out_features, in1_features, in2_features].
+        bias (Parameter, optional): The learnable bias(Bias) of this layer, shape is [1, out_features]. If it is set to None, no bias will be added to the output units. The default value is None.
+        name (str, optional): The default value is None. Normally there is no need for user
+            to set this property. For more information, please refer to :ref:`api_guide_Name`. Default: None.

    Returns:
-       Tensor: A 2-D Tensor of shape [batch_size, out_features].
+        Tensor: A 2-D Tensor of shape [batch_size, out_features].

    Examples:
-       .. code-block:: python
+        .. code-block:: python

-		import paddle
-		import paddle.nn.functional as F
+            import paddle
+            import paddle.nn.functional as F

-		x1 = paddle.randn((5, 5)).astype(paddle.float32)
-		x2 = paddle.randn((5, 4)).astype(paddle.float32)
-		w = paddle.randn((1000, 5, 4)).astype(paddle.float32)
-		b = paddle.randn((1, 1000)).astype(paddle.float32)
+            x1 = paddle.randn((5, 5)).astype(paddle.float32)
+            x2 = paddle.randn((5, 4)).astype(paddle.float32)
+            w = paddle.randn((1000, 5, 4)).astype(paddle.float32)
+            b = paddle.randn((1, 1000)).astype(paddle.float32)

-		result = F.bilinear(x1, x2, w, b)
-		print(result.shape)
-		# [5, 1000]
+            result = F.bilinear(x1, x2, w, b)
+            print(result.shape)
+            # [5, 1000]
    """

    if in_dygraph_mode():
@@ -1008,38 +1008,38 @@ def dropout(x,

        .. code-block:: python

-		import paddle
-
-		x = paddle.to_tensor([[1,2,3], [4,5,6]]).astype(paddle.float32)
-		y_train = paddle.nn.functional.dropout(x, 0.5)
-		y_test = paddle.nn.functional.dropout(x, 0.5, training=False)
-		y_0 = paddle.nn.functional.dropout(x, axis=0)
-		y_1 = paddle.nn.functional.dropout(x, axis=1)
-		y_01 = paddle.nn.functional.dropout(x, axis=[0,1])
-		print(x)
-		# Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
-		#        [[1., 2., 3.],
-		#         [4., 5., 6.]])
-		print(y_train)
-		# Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
-		#        [[2. , 0. , 6. ],
-		#         [8. , 0. , 12.]])
-		print(y_test)
-		# Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
-		#        [[1., 2., 3.],
-		#         [4., 5., 6.]])
-		print(y_0)
-		# Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
-		#        [[0. , 0. , 0. ],
-		#         [8. , 10., 12.]])
-		print(y_1)
-		# Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
-		#        [[2. , 0. , 6. ],
-		#         [8. , 0. , 12.]])
-		print(y_01)
-		# Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
-		#        [[0. , 0. , 0. ],
-		#         [8. , 0. , 12.]])
+            import paddle
+
+            x = paddle.to_tensor([[1,2,3], [4,5,6]]).astype(paddle.float32)
+            y_train = paddle.nn.functional.dropout(x, 0.5)
+            y_test = paddle.nn.functional.dropout(x, 0.5, training=False)
+            y_0 = paddle.nn.functional.dropout(x, axis=0)
+            y_1 = paddle.nn.functional.dropout(x, axis=1)
+            y_01 = paddle.nn.functional.dropout(x, axis=[0,1])
+            print(x)
+            # Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [[1., 2., 3.],
+            #         [4., 5., 6.]])
+            print(y_train)
+            # Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [[2. , 0. , 6. ],
+            #         [8. , 0. , 12.]])
+            print(y_test)
+            # Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [[1., 2., 3.],
+            #         [4., 5., 6.]])
+            print(y_0)
+            # Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [[0. , 0. , 0. ],
+            #         [8. , 10., 12.]])
+            print(y_1)
+            # Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [[2. , 0. , 6. ],
+            #         [8. , 0. , 12.]])
+            print(y_01)
+            # Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [[0. , 0. , 0. ],
+            #         [8. , 0. , 12.]])

    """
    if not isinstance(p, (float, int, Variable)):
@@ -1239,14 +1239,14 @@ def dropout3d(x, p=0.5, training=True, data_format='NCDHW', name=None):
    Examples:
        .. code-block:: python

-		import paddle
+            import paddle

-		x = paddle.randn(shape=(2, 3, 4, 5, 6)).astype(paddle.float32)
-		y_train = paddle.nn.functional.dropout3d(x)  #train
-		y_test = paddle.nn.functional.dropout3d(x, training=False) #test
-		print(x[0,0,:,:,:])
-		print(y_train[0,0,:,:,:]) # may all 0
-		print(y_test[0,0,:,:,:])
+            x = paddle.randn(shape=(2, 3, 4, 5, 6)).astype(paddle.float32)
+            y_train = paddle.nn.functional.dropout3d(x)  #train
+            y_test = paddle.nn.functional.dropout3d(x, training=False) #test
+            print(x[0,0,:,:,:])
+            print(y_train[0,0,:,:,:]) # may all 0
+            print(y_test[0,0,:,:,:])

    """

@@ -1287,19 +1287,19 @@ def alpha_dropout(x, p=0.5, training=True, name=None):
    Examples:
        .. code-block:: python

-		import paddle
-
-		x = paddle.to_tensor([[-1, 1], [-1, 1]]).astype(paddle.float32)
-		y_train = paddle.nn.functional.alpha_dropout(x, 0.5)
-		y_test = paddle.nn.functional.alpha_dropout(x, 0.5, training=False)
-		print(y_train)
-		# Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
-		#        [[-0.10721093, -0.77919382],
-		#         [-0.10721093,  1.66559887]]) (randomly)
-		print(y_test)
-		# Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
-		#        [[-1.,  1.],
-		#         [-1.,  1.]])
+            import paddle
+
+            x = paddle.to_tensor([[-1, 1], [-1, 1]]).astype(paddle.float32)
+            y_train = paddle.nn.functional.alpha_dropout(x, 0.5)
+            y_test = paddle.nn.functional.alpha_dropout(x, 0.5, training=False)
+            print(y_train)
+            # Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [[-0.10721093, -0.77919382],
+            #         [-0.10721093,  1.66559887]]) (randomly)
+            print(y_test)
+            # Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            #        [[-1.,  1.],
+            #         [-1.,  1.]])
    """
    if not isinstance(p, (float, int)):
        raise TypeError("p argument should be a float or int")

--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
@@ -472,10 +472,9 @@ def edit_distance(input,
        NOTE: This Api is different from fluid.metrics.EditDistance

    Returns:
-	Tuple:
-
-        distance(Tensor): edit distance result, its data type is float32, and its shape is (batch_size, 1).
-        sequence_num(Tensor): sequence number, its data type is float32, and its shape is (1,).
+        Tuple:
+            distance(Tensor): edit distance result, its data type is float32, and its shape is (batch_size, 1).
+            sequence_num(Tensor): sequence number, its data type is float32, and its shape is (1,).

    Examples:
        .. code-block:: python
@@ -2959,29 +2958,29 @@ def multi_label_soft_margin_loss(input,
        name (str, optional): Name for the operation (optional, default is None).
                For more information, please refer to :ref:`api_guide_Name`.

-	Shape:
-            input: N-D Tensor, the shape is [N, \*], N is batch size and `\*` means number of classes, available dtype is float32, float64. The sum operationoperates over all the elements.
-            label: N-D Tensor, same shape as the input.
-            weight:N-D Tensor, the shape is [N,1]
-            output: scalar. If :attr:`reduction` is ``'none'``, then same shape as the input.
+    Shape:
+        input: N-D Tensor, the shape is [N, \*], N is batch size and `\*` means number of classes, available dtype is float32, float64. The sum operationoperates over all the elements.
+        label: N-D Tensor, same shape as the input.
+        weight:N-D Tensor, the shape is [N,1]
+        output: scalar. If :attr:`reduction` is ``'none'``, then same shape as the input.

-	Returns:
-            Tensor, The tensor variable storing the multi_label_soft_margin_loss of input and label.
+    Returns:
+        Tensor, The tensor variable storing the multi_label_soft_margin_loss of input and label.

-	Examples:
-            .. code-block:: python
+    Examples:
+        .. code-block:: python

-                import paddle
-                import paddle.nn.functional as F
-                input = paddle.to_tensor([[1, -2, 3], [0, -1, 2], [1, 0, 1]], dtype=paddle.float32)
-                # label elements in {1., -1.}
-                label = paddle.to_tensor([[-1, 1, -1], [1, 1, 1], [1, -1, 1]], dtype=paddle.float32)
-                loss = F.multi_label_soft_margin_loss(input, label, reduction='none')
-                print(loss)
-                # Tensor([3.49625897, 0.71111226, 0.43989015])
-                loss = F.multi_label_soft_margin_loss(input, label, reduction='mean')
-                print(loss)
-                # Tensor([1.54908717])
+            import paddle
+            import paddle.nn.functional as F
+            input = paddle.to_tensor([[1, -2, 3], [0, -1, 2], [1, 0, 1]], dtype=paddle.float32)
+            # label elements in {1., -1.}
+            label = paddle.to_tensor([[-1, 1, -1], [1, 1, 1], [1, -1, 1]], dtype=paddle.float32)
+            loss = F.multi_label_soft_margin_loss(input, label, reduction='none')
+            print(loss)
+            # Tensor([3.49625897, 0.71111226, 0.43989015])
+            loss = F.multi_label_soft_margin_loss(input, label, reduction='mean')
+            print(loss)
+            # Tensor([1.54908717])
    """
    if reduction not in ['sum', 'mean', 'none']:
        raise ValueError(
@@ -3266,8 +3265,8 @@ def triplet_margin_with_distance_loss(input,

        distance_function (callable, optional): Quantifies the distance between two tensors. if not specified, 2 norm functions will be used.

-	    margin (float, optional):Default: :math:`1`.A nonnegative margin representing the minimum difference
-            between the positive and negative distances required for the loss to be 0.
+        margin (float, optional): A nonnegative margin representing the minimum difference
+            between the positive and negative distances required for the loss to be 0. Default value is :math:`1`.

        swap (bool, optional):The distance swap changes the negative distance to the swap distance (distance between positive samples
                and negative samples) if swap distance smaller than negative distance. Default: ``False``.

--- a/python/paddle/nn/layer/loss.py
+++ b/python/paddle/nn/layer/loss.py
@@ -1219,7 +1219,7 @@ class MultiLabelSoftMarginLoss(Layer):
        :math:`y` and :math:`x` must have the same size.

        Parameters:
-	        weight (Tensor,optional): a manual rescaling weight given to each class.
+            weight (Tensor,optional): a manual rescaling weight given to each class.
                    If given, has to be a Tensor of size C and the data type is float32, float64.
                    Default is ``'None'`` .
            reduction (str, optional): Indicate how to average the loss by batch_size,
@@ -1482,7 +1482,7 @@ class TripletMarginWithDistanceLoss(Layer):
    where the default `distance_function`

    .. math::
-    	d(x_i, y_i) = \left\lVert {\bf x}_i - {\bf y}_i \right\rVert_2
+        d(x_i, y_i) = \left\lVert {\bf x}_i - {\bf y}_i \right\rVert_2

    or user can define their own distance function. `margin` is a nonnegative margin representing the minimum difference
    between the positive and negative distances that is required for the loss to be 0. If `swap` is true, it will compare distance of (input, negative) with
@@ -1510,15 +1510,15 @@ class TripletMarginWithDistanceLoss(Layer):

    Shapes:
        input (Tensor):Input tensor, the data type is float32 or float64.
-	the shape is [N, \*], N is batch size and `\*` means any number of additional dimensions, available dtype is float32, float64.
+    the shape is [N, \*], N is batch size and `\*` means any number of additional dimensions, available dtype is float32, float64.

        positive (Tensor):Positive tensor, the data type is float32 or float64.
-	The shape of label is the same as the shape of input.
+    The shape of label is the same as the shape of input.

        negative (Tensor):Negative tensor, the data type is float32 or float64.
-	The shape of label is the same as the shape of input.
+    The shape of label is the same as the shape of input.

-	    output(Tensor): The tensor variable storing the triplet_margin_with_distance_loss of input and positive and negative.
+        output(Tensor): The tensor variable storing the triplet_margin_with_distance_loss of input and positive and negative.

    Return：
        A callable object of TripletMarginWithDistanceLoss

--- a/python/paddle/nn/layer/norm.py
+++ b/python/paddle/nn/layer/norm.py
@@ -134,15 +134,15 @@ Where `H` means height of feature map, `W` means width of feature map.
            numerical stability. Default is 1e-5.
        momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9.
        weight_attr(ParamAttr|bool, optional): The parameter attribute for Parameter `scale`
-             of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
-	     will create ParamAttr as weight_attr, the name of scale can be set in ParamAttr.
-	     If the Initializer of the weight_attr is not set, the parameter is initialized
-	     one. If it is set to False, will not create weight_attr. Default: None.
+            of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
+            will create ParamAttr as weight_attr, the name of scale can be set in ParamAttr.
+            If the Initializer of the weight_attr is not set, the parameter is initialized
+            one. If it is set to False, will not create weight_attr. Default: None.
        bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of instance_norm.
-             If it is set to None or one attribute of ParamAttr, instance_norm
-	     will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
-	     If the Initializer of the bias_attr is not set, the bias is initialized zero.
-             If it is set to False, will not create bias_attr. Default: None.
+            If it is set to None or one attribute of ParamAttr, instance_norm
+            will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
+            If the Initializer of the bias_attr is not set, the bias is initialized zero.
+            If it is set to False, will not create bias_attr. Default: None.
        data_format(str, optional): Specify the input data format, may be "NC", "NCL". Default "NCL".
        name(str, optional): Name for the InstanceNorm, default is None. For more information, please refer to :ref:`api_guide_Name`..

@@ -202,15 +202,15 @@ Where `H` means height of feature map, `W` means width of feature map.
            numerical stability. Default is 1e-5.
        momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9.
        weight_attr(ParamAttr|bool, optional): The parameter attribute for Parameter `scale`
-             of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
-	     will create ParamAttr as weight_attr, the name of scale can be set in ParamAttr.
-	     If the Initializer of the weight_attr is not set, the parameter is initialized
-	     one. If it is set to False, will not create weight_attr. Default: None.
+            of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
+            will create ParamAttr as weight_attr, the name of scale can be set in ParamAttr.
+            If the Initializer of the weight_attr is not set, the parameter is initialized
+            one. If it is set to False, will not create weight_attr. Default: None.
        bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of instance_norm.
-             If it is set to None or one attribute of ParamAttr, instance_norm
-	     will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
-	     If the Initializer of the bias_attr is not set, the bias is initialized zero.
-             If it is set to False, will not create bias_attr. Default: None.
+            If it is set to None or one attribute of ParamAttr, instance_norm
+            will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
+            If the Initializer of the bias_attr is not set, the bias is initialized zero.
+    `       If it is set to False, will not create bias_attr. Default: None.
        data_format(str, optional): Specify the input data format, could be "NCHW". Default: NCHW.
        name(str, optional): Name for the InstanceNorm, default is None. For more information, please refer to :ref:`api_guide_Name`..

@@ -226,13 +226,13 @@ Where `H` means height of feature map, `W` means width of feature map.

        .. code-block:: python

-          import paddle
+            import paddle

-          x = paddle.rand((2, 2, 2, 3))
-          instance_norm = paddle.nn.InstanceNorm2D(2)
-          instance_norm_out = instance_norm(x)
+            x = paddle.rand((2, 2, 2, 3))
+            instance_norm = paddle.nn.InstanceNorm2D(2)
+            instance_norm_out = instance_norm(x)

-          print(instance_norm_out)
+            print(instance_norm_out)
    """

    def _check_input_dim(self, input):
@@ -268,15 +268,15 @@ Where `H` means height of feature map, `W` means width of feature map.
            numerical stability. Default is 1e-5.
        momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9.
        weight_attr(ParamAttr|bool, optional): The parameter attribute for Parameter `scale`
-             of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
-	     will create ParamAttr as weight_attr, the name of scale can be set in ParamAttr.
-	     If the Initializer of the weight_attr is not set, the parameter is initialized
-	     one. If it is set to False, will not create weight_attr. Default: None.
+            of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
+            will create ParamAttr as weight_attr, the name of scale can be set in ParamAttr.
+            If the Initializer of the weight_attr is not set, the parameter is initialized
+            one. If it is set to False, will not create weight_attr. Default: None.
        bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of instance_norm.
-             If it is set to None or one attribute of ParamAttr, instance_norm
-	     will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
-	     If the Initializer of the bias_attr is not set, the bias is initialized zero.
-             If it is set to False, will not create bias_attr. Default: None.
+            If it is set to None or one attribute of ParamAttr, instance_norm
+            will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
+            If the Initializer of the bias_attr is not set, the bias is initialized zero.
+            If it is set to False, will not create bias_attr. Default: None.
        data_format(str, optional): Specify the input data format, could be "NCDHW". Default: NCDHW.
        name(str, optional): Name for the InstanceNorm, default is None. For more information, please refer to :ref:`api_guide_Name`..

@@ -292,13 +292,13 @@ Where `H` means height of feature map, `W` means width of feature map.

        .. code-block:: python

-          import paddle
+            import paddle

-          x = paddle.rand((2, 2, 2, 2, 3))
-          instance_norm = paddle.nn.InstanceNorm3D(2)
-          instance_norm_out = instance_norm(x)
+            x = paddle.rand((2, 2, 2, 2, 3))
+            instance_norm = paddle.nn.InstanceNorm3D(2)
+            instance_norm_out = instance_norm(x)

-          print(instance_norm_out.numpy)
+            print(instance_norm_out.numpy)
    """

    def _check_input_dim(self, input):
@@ -318,13 +318,13 @@ class GroupNorm(Layer):
        num_groups(int): The number of groups that divided from channels.
        num_channels(int): The number of channels of input.
        epsilon(float, optional): The small value added to the variance to prevent
-                                  division by zero. Default: 1e-05.
+            division by zero. Default: 1e-05.
        weight_attr(ParamAttr|bool, optional): The parameter attribute for the learnable
-                                         scale :math:`g`. If it is set to False, no scale will be added to the output units.
-                                         If it is set to None, the bias is initialized one. Default: None.
+            scale :math:`g`. If it is set to False, no scale will be added to the output units.
+            If it is set to None, the bias is initialized one. Default: None.
        bias_attr(ParamAttr|bool, optional): The parameter attribute for the learnable
-                                        bias :math:`b`. If it is set to False, no bias will be added to the output units.
-                                        If it is set to None, the bias is initialized zero. Default: None.
+            bias :math:`b`. If it is set to False, no bias will be added to the output units.
+            If it is set to None, the bias is initialized zero. Default: None.
        data_format(str, optional): Specify the input data format. Only NCHW is supported. Default: NCHW.
        name(str, optional): Name for the GroupNorm, default is None. For more information, please refer to :ref:`api_guide_Name`..

@@ -338,17 +338,17 @@ class GroupNorm(Layer):
    Examples:
        .. code-block:: python

-          import paddle
-          import numpy as np
+            import paddle
+            import numpy as np

-          paddle.disable_static()
-          np.random.seed(123)
-          x_data = np.random.random(size=(2, 6, 2, 2)).astype('float32')
-          x = paddle.to_tensor(x_data)
-          group_norm = paddle.nn.GroupNorm(num_channels=6, num_groups=6)
-          group_norm_out = group_norm(x)
+            paddle.disable_static()
+            np.random.seed(123)
+            x_data = np.random.random(size=(2, 6, 2, 2)).astype('float32')
+            x = paddle.to_tensor(x_data)
+            group_norm = paddle.nn.GroupNorm(num_channels=6, num_groups=6)
+            group_norm_out = group_norm(x)

-          print(group_norm_out.numpy())
+            print(group_norm_out.numpy())
    """

    def __init__(self,

--- a/python/paddle/optimizer/adagrad.py
+++ b/python/paddle/optimizer/adagrad.py
@@ -45,19 +45,19 @@ class Adagrad(Optimizer):
            It can be a float value or a ``Variable`` with a float type.
        epsilon (float, optional): A small float value for numerical stability.
            The default value is 1e-06.
-	parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \
-	    This parameter is required in dygraph mode. And you can specify different options for \
-            different parameter groups such as the learning rate, weight decay, etc, \
-            then the parameters are list of dict. Note that the learning_rate in paramter groups \
-            represents the scale of base learning_rate. \
-	    The default value is None in static mode, at this time all parameters will be updated.
-	weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
-	    It canbe a float value as coeff of L2 regularization or \
-	    :ref:`api_paddle_regularizer_L1Decay`, :ref:`api_paddle_regularizer_L2Decay`.
-	    If a parameter has set regularizer using :ref:`api_paddle_fluid_param_attr_aramAttr` already, \
-	    the regularization setting here in optimizer will be ignored for this parameter. \
-	    Otherwise, the regularization setting here in optimizer will take effect. \
-	    Default None, meaning there is no regularization.
+        parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``.
+            This parameter is required in dygraph mode. And you can specify different options for
+            different parameter groups such as the learning rate, weight decay, etc,
+            then the parameters are list of dict. Note that the learning_rate in paramter groups
+            represents the scale of base learning_rate.
+            The default value is None in static mode, at this time all parameters will be updated.
+        weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization.
+            It canbe a float value as coeff of L2 regularization or
+            :ref:`api_paddle_regularizer_L1Decay`, :ref:`api_paddle_regularizer_L2Decay`.
+            If a parameter has set regularizer using :ref:`api_paddle_fluid_param_attr_aramAttr` already,
+            the regularization setting here in optimizer will be ignored for this parameter.
+            Otherwise, the regularization setting here in optimizer will take effect.
+            Default None, meaning there is no regularization.
        grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of
            some derived class of ``GradientClipBase`` . There are three cliping strategies,
            ClipGradByGlobalNorm, ClipGradByNorm and ClipGradByValue. Default None,

--- a/python/paddle/optimizer/adam.py
+++ b/python/paddle/optimizer/adam.py
@@ -67,19 +67,19 @@ class Adam(Optimizer):
        epsilon (float|Tensor, optional): A small float value for numerical stability.
            It should be a float number or a Tensor with shape [1] and data type as float32.
            The default value is 1e-08.
-	parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \
-	    This parameter is required in dygraph mode. And you can specify different options for \
-            different parameter groups such as the learning rate, weight decay, etc, \
-            then the parameters are list of dict. Note that the learning_rate in paramter groups \
-            represents the scale of base learning_rate. \
-	    The default value is None in static mode, at this time all parameters will be updated.
-	weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
-	    It canbe a float value as coeff of L2 regularization or \
-	    :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.
-	    If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \
-	    the regularization setting here in optimizer will be ignored for this parameter. \
-	    Otherwise, the regularization setting here in optimizer will take effect. \
-	    Default None, meaning there is no regularization.
+        parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``.
+            This parameter is required in dygraph mode. And you can specify different options for
+            different parameter groups such as the learning rate, weight decay, etc,
+            then the parameters are list of dict. Note that the learning_rate in paramter groups
+            represents the scale of base learning_rate.
+            The default value is None in static mode, at this time all parameters will be updated.
+        weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization.
+            It canbe a float value as coeff of L2 regularization or
+            :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.
+            If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already,
+            the regularization setting here in optimizer will be ignored for this parameter.
+            Otherwise, the regularization setting here in optimizer will take effect.
+            Default None, meaning there is no regularization.
        grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of
            some derived class of ``GradientClipBase`` . There are three cliping strategies
            ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` ,

--- a/python/paddle/optimizer/adamax.py
+++ b/python/paddle/optimizer/adamax.py
@@ -57,19 +57,19 @@ class Adamax(Optimizer):
            The default value is 0.999.
        epsilon (float, optional): A small float value for numerical stability.
            The default value is 1e-08.
-	parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \
-	    This parameter is required in dygraph mode. And you can specify different options for \
-            different parameter groups such as the learning rate, weight decay, etc, \
-            then the parameters are list of dict. Note that the learning_rate in paramter groups \
-            represents the scale of base learning_rate. \
-	    The default value is None in static mode, at this time all parameters will be updated.
-	weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
-	    It canbe a float value as coeff of L2 regularization or \
-	    :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.
-	    If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \
-	    the regularization setting here in optimizer will be ignored for this parameter. \
-	    Otherwise, the regularization setting here in optimizer will take effect. \
-	    Default None, meaning there is no regularization.
+        parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``.
+            This parameter is required in dygraph mode. And you can specify different options for
+            different parameter groups such as the learning rate, weight decay, etc,
+            then the parameters are list of dict. Note that the learning_rate in paramter groups
+            represents the scale of base learning_rate.
+            The default value is None in static mode, at this time all parameters will be updated.
+        weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization.
+            It canbe a float value as coeff of L2 regularization or
+            :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.
+            If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already,
+            the regularization setting here in optimizer will be ignored for this parameter.
+            Otherwise, the regularization setting here in optimizer will take effect.
+            Default None, meaning there is no regularization.
        grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of
            some derived class of ``GradientClipBase`` . There are three cliping strategies
            ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` ,

--- a/python/paddle/optimizer/adamw.py
+++ b/python/paddle/optimizer/adamw.py
@@ -54,12 +54,12 @@ class AdamW(Optimizer):
    Args:
        learning_rate (float|LRScheduler, optional): The learning rate used to update ``Parameter``.
            It can be a float value or a LRScheduler. The default value is 0.001.
-        parameters (list|tuple, optional): List/Tuple of ``Tensor`` names to update to minimize ``loss``. \
-            This parameter is required in dygraph mode. And you can specify different options for \
-            different parameter groups such as the learning rate, weight decay, etc, \
-            then the parameters are list of dict. Note that the learning_rate in paramter groups \
-            represents the scale of base learning_rate. \
-	    The default value is None in static mode, at this time all parameters will be updated.
+        parameters (list|tuple, optional): List/Tuple of ``Tensor`` names to update to minimize ``loss``.
+            This parameter is required in dygraph mode. And you can specify different options for
+            different parameter groups such as the learning rate, weight decay, etc,
+            then the parameters are list of dict. Note that the learning_rate in paramter groups
+            represents the scale of base learning_rate.
+            The default value is None in static mode, at this time all parameters will be updated.
        beta1 (float|Tensor, optional): The exponential decay rate for the 1st moment estimates.
            It should be a float number or a Tensor with shape [1] and data type as float32.
            The default value is 0.9.

--- a/python/paddle/tensor/attribute.py
+++ b/python/paddle/tensor/attribute.py
@@ -63,12 +63,6 @@ def rank(input):

 def shape(input):
    """
-    :alias_main: paddle.shape
-	:alias: paddle.shape,paddle.tensor.shape,paddle.tensor.attribute.shape
-	:old_api: paddle.fluid.layers.shape
-
-    **Shape Layer**
-
    Get the shape of the input.

    .. code-block:: text

--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -416,12 +416,6 @@ def transpose(x, perm, name=None):

 def unstack(x, axis=0, num=None):
    """
-    :alias_main: paddle.unstack
-	:alias: paddle.unstack,paddle.tensor.unstack,paddle.tensor.manipulation.unstack
-	:old_api: paddle.fluid.layers.unstack
-
-    **UnStack Layer**
-
    This layer unstacks input Tensor :code:`x` into several Tensors along :code:`axis`.

    If :code:`axis` < 0, it would be replaced with :code:`axis+rank(x)`.
@@ -1693,12 +1687,12 @@ def stack(x, axis=0, name=None):
            #  [[3., 4.]],
            #  [[5., 6.]]]

-	    out = paddle.stack([x1, x2, x3], axis=-2)
-	    print(out.shape)  # [1, 3, 2]
-	    print(out)
-	    # [[[1., 2.],
-	    #   [3., 4.],
-	    #   [5., 6.]]]
+        out = paddle.stack([x1, x2, x3], axis=-2)
+        print(out.shape)  # [1, 3, 2]
+        print(out)
+        # [[[1., 2.],
+        #   [3., 4.],
+        #   [5., 6.]]]
    """
    axis = 0 if axis is None else axis

@@ -2663,7 +2657,7 @@ def scatter(x, index, updates, overwrite=True, name=None):
        overwrite (bool): The mode that updating the output when there are same indices.

            If True, use the overwrite mode to update the output of the same index,
-	        if False, use the accumulate mode to update the output of the same index.Default value is True.
+            if False, use the accumulate mode to update the output of the same index.Default value is True.

        name(str, optional): The default value is None. Normally there is no need for user to set this property.  For more information, please refer to :ref:`api_guide_Name` .


--- a/python/paddle/text/datasets/conll05.py
+++ b/python/paddle/text/datasets/conll05.py
@@ -302,10 +302,10 @@ class Conll05st(Dataset):

            .. code-block:: python

-            	from paddle.text.datasets import Conll05st
+                from paddle.text.datasets import Conll05st

-            	conll05st = Conll05st()
-            	word_dict, predicate_dict, label_dict = conll05st.get_dict()
+                conll05st = Conll05st()
+                word_dict, predicate_dict, label_dict = conll05st.get_dict()
        """
        return self.word_dict, self.predicate_dict, self.label_dict

@@ -317,9 +317,9 @@ class Conll05st(Dataset):

            .. code-block:: python

-            	from paddle.text.datasets import Conll05st
+                from paddle.text.datasets import Conll05st

-            	conll05st = Conll05st()
-            	emb_file = conll05st.get_embedding()
+                conll05st = Conll05st()
+                emb_file = conll05st.get_embedding()
        """
        return self.emb_file