From 81f3f6b515549e23ef49892bd011b405b28d3575 Mon Sep 17 00:00:00 2001
From: Ainavo <57820731+Ainavo@users.noreply.github.com>
Date: Mon, 20 Mar 2023 17:55:36 +0800
Subject: [PATCH] [CodeStyle][UP008] remove super call with parameters (#51812)

* remove super call with parameters

* fix bug
---
 paddle/fluid/imperative/README.md                  |  6 +++---
 pyproject.toml                                     |  1 +
 python/paddle/device/__init__.py                   |  2 +-
 .../distributed/fleet/recompute/recompute.py       |  2 +-
 python/paddle/distributed/fleet/utils/__init__.py  |  2 +-
 .../fleet/dygraph_group_sharded_stage3.py          |  6 +++---
 .../fleet/dygraph_group_sharded_stage3_eager.py    |  2 +-
 .../fleet/dygraph_save_for_auto_infer.py           |  6 +++---
 .../fleet/fused_attention_pass_with_mp.py          |  2 +-
 .../fleet/test_dygraph_recompute_for_eager.py      |  2 +-
 .../test_dist_fuse_gemm_epilogue_pass.py           |  2 +-
 .../test_backward_without_params.py                |  2 +-
 .../tests/unittests/dygraph_to_static/test_cinn.py |  2 +-
 .../unittests/dygraph_to_static/test_cinn_prim.py  |  2 +-
 .../dygraph_to_static/test_cinn_prim_gelu.py       |  2 +-
 .../dygraph_to_static/test_cinn_prim_layer_norm.py |  2 +-
 .../dygraph_to_static/test_cinn_prim_mean.py       |  2 +-
 .../dygraph_to_static/test_convert_call.py         |  2 +-
 .../dygraph_to_static/test_program_translator.py   |  2 +-
 .../dygraph_to_static/test_unuseful_inputs.py      |  4 ++--
 .../tests/unittests/ipu/test_modelruntime_ipu.py   |  2 +-
 .../ir/inference/test_trt_inference_predictor.py   |  4 ++--
 .../ir/inference/test_trt_support_nhwc_pass.py     |  2 +-
 .../composite_ops/test_composite_batch_norm.py     |  2 +-
 .../composite_ops/test_composite_layer_norm.py     |  2 +-
 .../prim/composite_ops/test_composite_softmax.py   |  2 +-
 .../fluid/tests/unittests/prim/model/bert.py       | 14 +++++++-------
 .../prim/prim/flags/test_prim_flags_case.py        |  2 +-
 .../prim/prim/vjp/static/test_comp_add_grad.py     |  2 +-
 .../prim/vjp/static/test_comp_add_tanh_grad.py     |  2 +-
 .../prim/prim/vjp/static/test_comp_cast_grad.py    |  2 +-
 .../prim/prim/vjp/static/test_comp_div_grad.py     |  2 +-
 .../prim/prim/vjp/static/test_comp_gather_grad.py  |  2 +-
 .../prim/prim/vjp/static/test_comp_reshape_grad.py |  2 +-
 .../prim/prim/vjp/static/test_comp_sqrt_grad.py    |  2 +-
 .../prim/prim/vjp/static/test_comp_sub_grad.py     |  2 +-
 .../prim/prim/vjp/static/test_comp_tanh_grad.py    |  2 +-
 .../prim/vjp/static/test_comp_transpose_grad.py    |  2 +-
 .../paddle/fluid/tests/unittests/prim_op_test.py   |  2 +-
 .../fluid/tests/unittests/test_activation_op.py    |  2 +-
 .../tests/unittests/test_fused_attention_pass.py   |  2 +-
 .../paddle/fluid/tests/unittests/test_lbfgs_v2.py  |  2 +-
 .../fluid/tests/unittests/test_rnn_decode_api.py   |  8 ++++----
 .../parallel_dygraph_dataparallel_with_pylayer.py  |  2 +-
 .../xpu/parallel_dygraph_gradient_check.py         |  2 +-
 ...arallel_dygraph_gradient_check_in_eager_mode.py |  2 +-
 .../tests/unittests/xpu/test_recompute_op_xpu.py   |  2 +-
 python/paddle/incubate/optimizer/lbfgs.py          |  2 +-
 python/paddle/nn/quant/format.py                   | 10 +++++-----
 python/paddle/nn/quant/qat/conv.py                 |  2 +-
 python/paddle/nn/quant/qat/linear.py               |  2 +-
 python/paddle/nn/quant/stub.py                     |  6 +++---
 python/paddle/quantization/base_observer.py        |  2 +-
 python/paddle/quantization/base_quanter.py         |  2 +-
 python/paddle/quantization/config.py               |  8 ++++----
 python/paddle/quantization/factory.py              |  2 +-
 python/paddle/quantization/observers/abs_max.py    |  4 ++--
 python/paddle/quantization/ptq.py                  |  2 +-
 python/paddle/quantization/qat.py                  |  2 +-
 python/paddle/quantization/quanters/abs_max.py     |  4 ++--
 python/paddle/quantization/wrapper.py              |  2 +-
 .../tests/quantization/test_customized_quanter.py  |  2 +-
 python/paddle/tests/quantization/test_ptq.py       |  2 +-
 python/paddle/tests/quantization/test_qat.py       |  2 +-
 python/paddle/tests/quantization/test_quant.py     |  2 +-
 python/paddle/tests/quantization/test_stub.py      |  2 +-
 python/paddle/utils/install_check.py               |  2 +-
 python/paddle/vision/models/_utils.py              |  2 +-
 68 files changed, 96 insertions(+), 95 deletions(-)

diff --git a/paddle/fluid/imperative/README.md b/paddle/fluid/imperative/README.md
index 27c3f75bcc5..c5dfe8fe7fd 100644
--- a/paddle/fluid/imperative/README.md
+++ b/paddle/fluid/imperative/README.md
@@ -139,7 +139,7 @@ TODO
 import paddle
 class MyLayer(fluid.imperative.Layer):
     def __init__(self):
-        super(MyLayer, self).__init__()
+        super().__init__()
 
     def forward(self, inputs):
         x = fluid.layers.relu(inputs)
@@ -150,7 +150,7 @@ class MyLayer(fluid.imperative.Layer):
 
 class MyPyLayer(fluid.imperative.PyLayer):
     def __init__(self):
-        super(MyPyLayer, self).__init__()
+        super().__init__()
 
     @staticmethod
     def forward(inputs):
@@ -172,7 +172,7 @@ with fluid.imperative.guard():
 
 class MLP(fluid.Layer):
     def __init__(self, input_size):
-        super(MLP, self).__init__()
+        super().__init__()
         self._linear1 = Linear(input_size,
                        3,
                        fluid.ParamAttr(
diff --git a/pyproject.toml b/pyproject.toml
index 23578ea6c4d..ac23bdc97fb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,6 +37,7 @@ select = [
     "UP003",
     "UP004",
     "UP007",
+    "UP008",
     "UP010",
     "UP011",
     "UP013",
diff --git a/python/paddle/device/__init__.py b/python/paddle/device/__init__.py
index 1a25f516f7d..b05c5be8d45 100644
--- a/python/paddle/device/__init__.py
+++ b/python/paddle/device/__init__.py
@@ -840,7 +840,7 @@ class Stream:
 
     def __eq__(self, o):
         if isinstance(o, Stream):
-            return super(Stream, self).__eq__(o)
+            return super().__eq__(o)
         return False
 
     def __hash__(self):
diff --git a/python/paddle/distributed/fleet/recompute/recompute.py b/python/paddle/distributed/fleet/recompute/recompute.py
index 4a09a74fbdc..d9b355ac2c0 100755
--- a/python/paddle/distributed/fleet/recompute/recompute.py
+++ b/python/paddle/distributed/fleet/recompute/recompute.py
@@ -379,7 +379,7 @@ def recompute(function, *args, **kwargs):
                 def __init__(self, input_size=10,
                             recompute_blocks=[1, 3],
                             recompute_kwargs={}):
-                    super(Naive_fc_net, self).__init__()
+                    super().__init__()
                     self.recompute_blocks = recompute_blocks
                     self.recompute_kwargs = recompute_kwargs
                     self.runfunc0 = get_fc_block(0, input_size, is_last=False)
diff --git a/python/paddle/distributed/fleet/utils/__init__.py b/python/paddle/distributed/fleet/utils/__init__.py
index ef205bb8b5f..5de15187a9f 100644
--- a/python/paddle/distributed/fleet/utils/__init__.py
+++ b/python/paddle/distributed/fleet/utils/__init__.py
@@ -79,7 +79,7 @@ def recompute(function, *args, **kwargs):
                 def __init__(self, input_size=10,
                             recompute_blocks=[1, 3],
                             recompute_kwargs={}):
-                    super(Naive_fc_net, self).__init__()
+                    super().__init__()
                     self.recompute_blocks = recompute_blocks
                     self.recompute_kwargs = recompute_kwargs
                     self.runfunc0 = get_fc_block(0, input_size, is_last=False)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3.py
index 104d084121f..e73a79724c3 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3.py
@@ -61,7 +61,7 @@ class MLP(paddle.nn.Layer):
 
 class Encoder(paddle.nn.Layer):
     def __init__(self, encoder):
-        super(Encoder, self).__init__()
+        super().__init__()
         self.first_stage = paddle.nn.Linear(1024, 1024)
         self.encoder = encoder
 
@@ -73,7 +73,7 @@ class Encoder(paddle.nn.Layer):
 
 class Decoder(paddle.nn.Layer):
     def __init__(self, decoder):
-        super(Decoder, self).__init__()
+        super().__init__()
         self.decoder = decoder
         self.final_stage = paddle.nn.Linear(1024, 1024)
         self.group_norm = paddle.nn.GroupNorm(64, 1024)
@@ -87,7 +87,7 @@ class Decoder(paddle.nn.Layer):
 
 class SpecialModel(paddle.nn.Layer):
     def __init__(self):
-        super(SpecialModel, self).__init__()
+        super().__init__()
         self.shared = paddle.nn.Linear(1024, 1024, bias_attr=False)
         self.encoder = Encoder(self.shared)
         self.decoder = Decoder(self.shared)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3_eager.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3_eager.py
index afc4f61e019..dd9ae4dea47 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3_eager.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3_eager.py
@@ -25,7 +25,7 @@ np.random.seed(2022)
 
 class Model(nn.Layer):
     def __init__(self):
-        super(Model, self).__init__()
+        super().__init__()
         self.first_stage = nn.Linear(4096, 4096, bias_attr=False)
         self.center_stage = nn.Linear(4096, 4096)
         self.center_stage.weight.stop_gradient = True
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py
index 58157fcfeff..c74e2b7adaa 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py
@@ -77,7 +77,7 @@ class MLP_pipe(PipelineLayer):
             ),
             LayerDesc(Linear, in_features=linear_size, out_features=10),
         ]
-        super(MLP_pipe, self).__init__(
+        super().__init__(
             desc,
             num_stages=2,
             loss_fn=paddle.nn.CrossEntropyLoss(),
@@ -93,7 +93,7 @@ class MLP_Hybrid(paddle.nn.Layer):
         param_attr=None,
         bias_attr=None,
     ):
-        super(MLP_Hybrid, self).__init__()
+        super().__init__()
         self.embedding = VocabParallelEmbedding(embedding_size, linear_size)
         self._linear1 = RowParallelLinear(
             linear_size, linear_size, has_bias=True, input_is_parallel=True
@@ -128,7 +128,7 @@ class MLP(paddle.nn.Layer):
         param_attr=None,
         bias_attr=None,
     ):
-        super(MLP, self).__init__()
+        super().__init__()
         self.embedding = paddle.nn.Embedding(embedding_size, linear_size)
         self._linear1 = Linear(linear_size, linear_size)
         self._linear2 = Linear(linear_size, linear_size)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/fused_attention_pass_with_mp.py b/python/paddle/fluid/tests/unittests/collective/fleet/fused_attention_pass_with_mp.py
index b3dc61ce9e5..1ff58c47b0e 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/fused_attention_pass_with_mp.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/fused_attention_pass_with_mp.py
@@ -35,7 +35,7 @@ class MultiHeadAttentionWithMP(paddle.nn.Layer):
         pre_ln=True,
         attn_dropout=True,
     ):
-        super(MultiHeadAttentionWithMP, self).__init__()
+        super().__init__()
         self.embed_dim = embed_dim
         self.kdim = embed_dim
         self.vdim = embed_dim
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_dygraph_recompute_for_eager.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_dygraph_recompute_for_eager.py
index f496b4e4f09..5328308fd5a 100755
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_dygraph_recompute_for_eager.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_dygraph_recompute_for_eager.py
@@ -23,7 +23,7 @@ from paddle.distributed.fleet.utils import recompute
 
 class Model(paddle.nn.Layer):
     def __init__(self, block_idx, input_size, is_last=False):
-        super(Model, self).__init__()
+        super().__init__()
         block_name = "block_" + str(block_idx)
         self.block = paddle.nn.Sequential(
             (
diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_gemm_epilogue_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_gemm_epilogue_pass.py
index 897a40abf42..939edf3d308 100644
--- a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_gemm_epilogue_pass.py
+++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_gemm_epilogue_pass.py
@@ -37,7 +37,7 @@ def verify_op_count(op_types, op_name, target_count):
 
 class MultiFCLayer(nn.Layer):
     def __init__(self, hidden, Activation):
-        super(MultiFCLayer, self).__init__()
+        super().__init__()
         self.linear1 = paddle.nn.Linear(hidden, 4 * hidden)
         self.linear2 = paddle.nn.Linear(4 * hidden, hidden)
         self.linear3 = paddle.nn.Linear(hidden, hidden)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_backward_without_params.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_backward_without_params.py
index b1924d84db5..179f7ab1f7f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_backward_without_params.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_backward_without_params.py
@@ -21,7 +21,7 @@ import paddle
 
 class Net(paddle.nn.Layer):
     def __init__(self):
-        super(Net, self).__init__()
+        super().__init__()
 
     @paddle.jit.to_static
     def forward(self, x):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn.py
index 1bd66156aa5..0ef5186dab2 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn.py
@@ -21,7 +21,7 @@ import paddle
 
 class Net(paddle.nn.Layer):
     def __init__(self):
-        super(Net, self).__init__()
+        super().__init__()
         self.relu = paddle.nn.functional.relu
         self.fc = paddle.nn.Linear(4, 4)
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim.py
index a86cf18ade1..6ace7696c38 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim.py
@@ -29,7 +29,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_gelu.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_gelu.py
index a4492f1bfdf..0f764c0745d 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_gelu.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_gelu.py
@@ -42,7 +42,7 @@ def generate_data(shape, dtype="float32"):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self, approximate):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
         self.approximate = approximate
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_layer_norm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_layer_norm.py
index 2de94fdcbb1..571805ba449 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_layer_norm.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_layer_norm.py
@@ -42,7 +42,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(64, 64)
 
     def forward(self, x, w, b):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_mean.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_mean.py
index ae2de19c872..d1d56fca374 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_mean.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_mean.py
@@ -45,7 +45,7 @@ class PrimeNet(
     paddle.nn.Layer,
 ):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py
index 03a19c328bb..3669986174f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py
@@ -218,7 +218,7 @@ class TestStaticMethod(TestRecursiveCall2):
 
 class NotToStaticHelper(paddle.nn.Layer):
     def __init__(self):
-        super(NotToStaticHelper, self).__init__()
+        super().__init__()
 
     def sum(self, x):
         if x.shape[0] > 1:
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py
index 3866cfe04d2..e6856801c21 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py
@@ -335,7 +335,7 @@ obj = Obj()
 
 class Net2:
     def __init__(self):
-        super(Net2, self).__init__()
+        super().__init__()
         self.layer1 = paddle.nn.Linear(10, 10)
 
     def forward(self, data):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_unuseful_inputs.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_unuseful_inputs.py
index 5cafba4e040..41a2a7b3098 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_unuseful_inputs.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_unuseful_inputs.py
@@ -33,7 +33,7 @@ def apply_to_static(support_to_static, model, image_shape=None):
 
 class Layer0(nn.Layer):
     def __init__(self, level):
-        super(Layer0, self).__init__()
+        super().__init__()
         self._linear1 = nn.Linear(10, 5)
         self._linear2 = nn.Linear(10, 5)
         self.layer1 = Layer1(level)
@@ -51,7 +51,7 @@ class Layer0(nn.Layer):
 
 class Layer1(nn.Layer):
     def __init__(self, level):
-        super(Layer1, self).__init__()
+        super().__init__()
         self.level = level
         self._linear = nn.Linear(5, 2)
 
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_modelruntime_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_modelruntime_ipu.py
index b4e27a6cc47..46b03297cd1 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_modelruntime_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_modelruntime_ipu.py
@@ -23,7 +23,7 @@ import paddle
 
 class SimpleLayer(paddle.nn.Layer):
     def __init__(self):
-        super(SimpleLayer, self).__init__()
+        super().__init__()
         self.conv = paddle.nn.Conv2D(
             in_channels=3, out_channels=1, kernel_size=2, stride=1
         )
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_inference_predictor.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_inference_predictor.py
index d12f900e516..1d097b42ae7 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_inference_predictor.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_inference_predictor.py
@@ -55,7 +55,7 @@ def getdtype(dtype="float32"):
 
 class BackendPaddle:
     def __init__(self):
-        super(BackendPaddle, self).__init__()
+        super().__init__()
         self.h2d_time = []
         self.compute_time = []
         self.d2h_time = []
@@ -341,7 +341,7 @@ class ConvBNLayer(paddle.nn.Layer):
 
 class Test(nn.Layer):
     def __init__(self):
-        super(Test, self).__init__()
+        super().__init__()
         self.conv = ConvBNLayer(
             num_channels=3, num_filters=64, filter_size=3, stride=2, act='relu'
         )
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_support_nhwc_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_support_nhwc_pass.py
index 179b191ec38..a34ef16a3e0 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_support_nhwc_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_support_nhwc_pass.py
@@ -27,7 +27,7 @@ paddle.enable_static()
 
 class SimpleNet(nn.Layer):
     def __init__(self):
-        super(SimpleNet, self).__init__()
+        super().__init__()
         self.conv1 = nn.Conv2D(
             in_channels=4,
             out_channels=4,
diff --git a/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_batch_norm.py b/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_batch_norm.py
index ed6dcd6a682..70e1fa87177 100644
--- a/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_batch_norm.py
+++ b/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_batch_norm.py
@@ -350,7 +350,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.conv = nn.Conv2D(4, 2, (3, 3), bias_attr=False)
         self.bn = BatchNorm(2, act="relu")
         self.run_mean = zeros([2])
diff --git a/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_layer_norm.py b/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_layer_norm.py
index e9ded156500..558baf54795 100644
--- a/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_layer_norm.py
+++ b/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_layer_norm.py
@@ -212,7 +212,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self, n_shape):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.ln = LayerNorm(n_shape)
 
     def forward(self, x):
diff --git a/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_softmax.py b/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_softmax.py
index d0f8b2bb455..728027cbf34 100644
--- a/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_softmax.py
+++ b/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_softmax.py
@@ -137,7 +137,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.sf = F.softmax
 
     def forward(self, x, current_axis):
diff --git a/python/paddle/fluid/tests/unittests/prim/model/bert.py b/python/paddle/fluid/tests/unittests/prim/model/bert.py
index 689fb30e763..f6c6499dbcb 100644
--- a/python/paddle/fluid/tests/unittests/prim/model/bert.py
+++ b/python/paddle/fluid/tests/unittests/prim/model/bert.py
@@ -87,7 +87,7 @@ class BertConfig:
 
 class BertLMPredictionHead(nn.Layer):
     def __init__(self, config: BertConfig, embedding_weights=None):
-        super(BertLMPredictionHead, self).__init__()
+        super().__init__()
 
         self.transform = nn.Linear(config.hidden_size, config.hidden_size)
         self.activation = getattr(nn.functional, config.hidden_act)
@@ -131,7 +131,7 @@ class BertLMPredictionHead(nn.Layer):
 
 class BertPretrainingHeads(nn.Layer):
     def __init__(self, config: BertConfig, embedding_weights=None):
-        super(BertPretrainingHeads, self).__init__()
+        super().__init__()
         self.predictions = BertLMPredictionHead(config, embedding_weights)
         self.seq_relationship = nn.Linear(config.hidden_size, 2)
 
@@ -143,7 +143,7 @@ class BertPretrainingHeads(nn.Layer):
 
 class BertEmbeddings(nn.Layer):
     def __init__(self, config: BertConfig):
-        super(BertEmbeddings, self).__init__()
+        super().__init__()
 
         self.word_embeddings = nn.Embedding(
             config.vocab_size, config.hidden_size
@@ -190,7 +190,7 @@ class BertEmbeddings(nn.Layer):
 
 class BertPooler(nn.Layer):
     def __init__(self, config: BertConfig):
-        super(BertPooler, self).__init__()
+        super().__init__()
 
         self.dense = nn.Linear(config.hidden_size, config.hidden_size)
         self.activation = nn.Tanh()
@@ -208,7 +208,7 @@ class BertPooler(nn.Layer):
 
 class BertModel(nn.Layer):
     def __init__(self, config: BertConfig, to_static, enable_cinn):
-        super(BertModel, self).__init__()
+        super().__init__()
         self.config = config
         self.pad_token_id = config.pad_token_id
         self.initializer_range = config.initializer_range
@@ -372,7 +372,7 @@ class BertModel(nn.Layer):
 
 class Bert(nn.Layer):
     def __init__(self, to_static, enable_cinn):
-        super(Bert, self).__init__()
+        super().__init__()
         config = BertConfig()
         self.bert = BertModel(config, to_static, enable_cinn)
         self.cls = BertPretrainingHeads(
@@ -434,7 +434,7 @@ class Bert(nn.Layer):
 
 class BertPretrainingCriterion(paddle.nn.Layer):
     def __init__(self, vocab_size=VOCAB_SIZE):
-        super(BertPretrainingCriterion, self).__init__()
+        super().__init__()
         # CrossEntropyLoss is expensive since the inner reshape (copy)
         self.loss_fn = paddle.nn.loss.CrossEntropyLoss(ignore_index=-1)
         self.vocab_size = vocab_size
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/flags/test_prim_flags_case.py b/python/paddle/fluid/tests/unittests/prim/prim/flags/test_prim_flags_case.py
index b2e2ad05ea4..30fe2af5621 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/flags/test_prim_flags_case.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/flags/test_prim_flags_case.py
@@ -28,7 +28,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
 
     def forward(self, x):
         out = F.softmax(x)
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_grad.py
index 50ef9f6f130..9da9e7131d1 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_grad.py
@@ -29,7 +29,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x, y):
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_tanh_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_tanh_grad.py
index b037cc73bfd..520aef634b6 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_tanh_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_tanh_grad.py
@@ -29,7 +29,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x, y):
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_cast_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_cast_grad.py
index 22913b284dd..850ad822201 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_cast_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_cast_grad.py
@@ -30,7 +30,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x):
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_div_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_div_grad.py
index 606b55b5a95..844d30894de 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_div_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_div_grad.py
@@ -29,7 +29,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x, y):
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_gather_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_gather_grad.py
index 1f89b024e81..77693dd1071 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_gather_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_gather_grad.py
@@ -31,7 +31,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x, index, axis):
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_reshape_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_reshape_grad.py
index d89e764da48..4523e4af4bf 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_reshape_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_reshape_grad.py
@@ -29,7 +29,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x):
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sqrt_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sqrt_grad.py
index 8e623100dd0..a3b854fcc29 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sqrt_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sqrt_grad.py
@@ -34,7 +34,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x):
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sub_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sub_grad.py
index 107ffbd062f..87bd6ff0b36 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sub_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sub_grad.py
@@ -29,7 +29,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x, y):
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_tanh_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_tanh_grad.py
index d28f84a685b..869a36e8066 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_tanh_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_tanh_grad.py
@@ -34,7 +34,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x):
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_transpose_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_transpose_grad.py
index 948b1e33849..7be0be582fd 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_transpose_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_transpose_grad.py
@@ -29,7 +29,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
 
     def forward(self, x):
         out = paddle.transpose(x, [0, 2, 1])
diff --git a/python/paddle/fluid/tests/unittests/prim_op_test.py b/python/paddle/fluid/tests/unittests/prim_op_test.py
index 980fdc5f7a5..758fcdc1b49 100644
--- a/python/paddle/fluid/tests/unittests/prim_op_test.py
+++ b/python/paddle/fluid/tests/unittests/prim_op_test.py
@@ -222,7 +222,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimNet(paddle.nn.Layer):
     def __init__(self, python_api):
-        super(PrimNet, self).__init__()
+        super().__init__()
         self.python_api = python_api
 
     def forward(self, args):
diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py
index 154e3c33f12..08a4afba343 100644
--- a/python/paddle/fluid/tests/unittests/test_activation_op.py
+++ b/python/paddle/fluid/tests/unittests/test_activation_op.py
@@ -3005,7 +3005,7 @@ class TestPow_ZeroDim(TestPow):
         self.shape = []
 
     def setUp(self):
-        super(TestPow_ZeroDim, self).setUp()
+        super().setUp()
         self.enable_cinn = False
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_fused_attention_pass.py b/python/paddle/fluid/tests/unittests/test_fused_attention_pass.py
index f27daa2d0c1..2a8ce9b1299 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_attention_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_attention_pass.py
@@ -33,7 +33,7 @@ class MultiHeadAttention(paddle.nn.Layer):
         pre_ln=True,
         attn_dropout=True,
     ):
-        super(MultiHeadAttention, self).__init__()
+        super().__init__()
         self.embed_dim = embed_dim
         self.kdim = embed_dim
         self.vdim = embed_dim
diff --git a/python/paddle/fluid/tests/unittests/test_lbfgs_v2.py b/python/paddle/fluid/tests/unittests/test_lbfgs_v2.py
index c7f8138318d..fa64c480ed9 100644
--- a/python/paddle/fluid/tests/unittests/test_lbfgs_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_lbfgs_v2.py
@@ -31,7 +31,7 @@ np.random.seed(123)
 
 class Net(paddle.nn.Layer):
     def __init__(self, np_w, func):
-        super(Net, self).__init__()
+        super().__init__()
         self.func = func
         w = paddle.to_tensor(np_w)
         self.w = paddle.create_parameter(
diff --git a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
index 9322b258e7d..2337364efa2 100644
--- a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
@@ -407,7 +407,7 @@ class EncoderCell(SimpleRNNCell):
         dropout_prob=0.0,
         init_scale=0.1,
     ):
-        super(EncoderCell, self).__init__(input_size, hidden_size)
+        super().__init__(input_size, hidden_size)
         self.dropout_prob = dropout_prob
         # use add_sublayer to add multi-layers
         self.lstm_cells = []
@@ -453,7 +453,7 @@ class Encoder(Layer):
         dropout_prob=0.0,
         init_scale=0.1,
     ):
-        super(Encoder, self).__init__()
+        super().__init__()
         self.embedder = Embedding(vocab_size, embed_dim)
         self.stack_lstm = RNN(
             EncoderCell(
@@ -484,7 +484,7 @@ class Decoder(Layer):
         dropout_prob=0.0,
         init_scale=0.1,
     ):
-        super(Decoder, self).__init__()
+        super().__init__()
         self.embedder = Embedding(vocab_size, embed_dim)
         self.stack_lstm = RNN(
             DecoderCell(
@@ -603,7 +603,7 @@ class BaseModel(Layer):
         dropout_prob=0.0,
         init_scale=0.1,
     ):
-        super(BaseModel, self).__init__()
+        super().__init__()
         self.hidden_size = hidden_size
         self.word_embedding = Embedding(vocab_size, embed_dim)
         self.encoder = Encoder(
diff --git a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_dataparallel_with_pylayer.py b/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_dataparallel_with_pylayer.py
index 070770fbcaf..2171cb24292 100644
--- a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_dataparallel_with_pylayer.py
+++ b/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_dataparallel_with_pylayer.py
@@ -44,7 +44,7 @@ class cus_tanh(PyLayer):
 
 class SimpleNet(paddle.nn.Layer):
     def __init__(self, train_id, model_id):
-        super(SimpleNet, self).__init__()
+        super().__init__()
         self.w = self.create_parameter(shape=[in_dim, batch], dtype="float32")
         self.linear = paddle.nn.Linear(in_dim, out_dim)
         self.tanh = paddle.tanh
diff --git a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check.py b/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check.py
index 56d428a7781..8c996fa6a91 100644
--- a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check.py
+++ b/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check.py
@@ -30,7 +30,7 @@ out_dim = 20
 
 class SimpleNet(paddle.nn.Layer):
     def __init__(self, train_id):
-        super(SimpleNet, self).__init__()
+        super().__init__()
         self.w1 = self.create_parameter(
             shape=[in_dim, out_dim], dtype="float32"
         )
diff --git a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check_in_eager_mode.py b/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check_in_eager_mode.py
index 9ee5e860438..f5ecd5cf4fc 100644
--- a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check_in_eager_mode.py
+++ b/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check_in_eager_mode.py
@@ -30,7 +30,7 @@ out_dim = 20
 
 class SimpleNet(paddle.nn.Layer):
     def __init__(self, train_id):
-        super(SimpleNet, self).__init__()
+        super().__init__()
         self.w1 = self.create_parameter(
             shape=[in_dim, out_dim], dtype="float32"
         )
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_recompute_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_recompute_op_xpu.py
index 3ec67eb76e1..b29f72ff03b 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_recompute_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_recompute_op_xpu.py
@@ -52,7 +52,7 @@ class Naive_fc_net(paddle.nn.Layer):
     def __init__(
         self, input_size=10, recompute_blocks=[1, 3], recompute_kwargs={}
     ):
-        super(Naive_fc_net, self).__init__()
+        super().__init__()
         self.recompute_blocks = recompute_blocks
         self.recompute_kwargs = recompute_kwargs
         self.runfunc0 = get_fc_block(0, input_size, is_last=False)
diff --git a/python/paddle/incubate/optimizer/lbfgs.py b/python/paddle/incubate/optimizer/lbfgs.py
index 7a1be322d87..937a3b2f9af 100644
--- a/python/paddle/incubate/optimizer/lbfgs.py
+++ b/python/paddle/incubate/optimizer/lbfgs.py
@@ -89,7 +89,7 @@ class LBFGS(Optimizer):
 
             class Net(paddle.nn.Layer):
                 def __init__(self):
-                    super(Net, self).__init__()
+                    super().__init__()
                     w = paddle.to_tensor(np_w)
                     self.w = paddle.create_parameter(shape=w.shape, dtype=w.dtype, default_initializer=paddle.nn.initializer.Assign(w))
 
diff --git a/python/paddle/nn/quant/format.py b/python/paddle/nn/quant/format.py
index ca5b6ea7f3e..edd72ad8ce3 100644
--- a/python/paddle/nn/quant/format.py
+++ b/python/paddle/nn/quant/format.py
@@ -23,7 +23,7 @@ from paddle.nn import Layer
 
 class LinearQuanterDequanter(Layer):
     def __init__(self, quanter, dequanter):
-        super(LinearQuanterDequanter, self).__init__()
+        super().__init__()
         self._quanter = quanter
         self._dequanter = dequanter
 
@@ -46,7 +46,7 @@ class LinearQuanterDequanter(Layer):
 
 class LinearQuanter(Layer):
     def __init__(self, scales, zero_point=None, quant_axis=None, bit_length=8):
-        super(LinearQuanter, self).__init__()
+        super().__init__()
         self._scales = paddle.to_tensor(scales, dtype="float32")
         self._zero_point = (
             paddle.zeros([1], dtype="float32")
@@ -97,7 +97,7 @@ class LinearQuanter(Layer):
 
 class LinearDequanter(Layer):
     def __init__(self, scales, zero_point=None, quant_axis=None, bit_length=8):
-        super(LinearDequanter, self).__init__()
+        super().__init__()
         self._scales = paddle.to_tensor(scales, dtype="float32")
         self._zero_point = (
             paddle.zeros([1], dtype="float32")
@@ -156,7 +156,7 @@ class ConvertibleQuantedLayer(Layer, metaclass=abc.ABCMeta):
             # Given codes in ./customized_quanter.py
             class CustomizedQuantedLayer(ConvertibleQuantedLayer):
                 def __init__(self):
-                    super(CustomizedQuantedLayer, self).__init__()
+                    super().__init__()
                     self.weight_a = paddle.create_parameter(shape=[1], dtype='float32')
                     self.weight_b = paddle.create_parameter(shape=[1], dtype='float32')
                     self.quanter_for_weight_a = None
@@ -176,7 +176,7 @@ class ConvertibleQuantedLayer(Layer, metaclass=abc.ABCMeta):
     """
 
     def __init__(self):
-        super(ConvertibleQuantedLayer, self).__init__()
+        super().__init__()
         self.converted = False
 
     @abc.abstractmethod
diff --git a/python/paddle/nn/quant/qat/conv.py b/python/paddle/nn/quant/qat/conv.py
index f2ffc7b103a..1cf33a8bcb3 100644
--- a/python/paddle/nn/quant/qat/conv.py
+++ b/python/paddle/nn/quant/qat/conv.py
@@ -27,7 +27,7 @@ class QuantedConv2D(ConvertibleQuantedLayer):
     """
 
     def __init__(self, layer: Layer, q_config):
-        super(QuantedConv2D, self).__init__()
+        super().__init__()
 
         # For Conv2D
         self._groups = layer._groups
diff --git a/python/paddle/nn/quant/qat/linear.py b/python/paddle/nn/quant/qat/linear.py
index c0e015ce51c..39b177f2c24 100644
--- a/python/paddle/nn/quant/qat/linear.py
+++ b/python/paddle/nn/quant/qat/linear.py
@@ -26,7 +26,7 @@ class QuantedLinear(ConvertibleQuantedLayer):
     """
 
     def __init__(self, layer: Layer, q_config):
-        super(QuantedLinear, self).__init__()
+        super().__init__()
         # For Linear
         self.weight = layer.weight
         self.bias = layer.bias
diff --git a/python/paddle/nn/quant/stub.py b/python/paddle/nn/quant/stub.py
index 74deb8aa75d..ab977524153 100644
--- a/python/paddle/nn/quant/stub.py
+++ b/python/paddle/nn/quant/stub.py
@@ -36,7 +36,7 @@ class Stub(Layer):
             quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
             class Model(paddle.nn.Layer):
                 def __init__(self, num_classes=10):
-                    super(Model, self).__init__()
+                    super().__init__()
                     self.conv = Conv2D(3, 6, 3, stride=1, padding=1)
                     self.quant = Stub(quanter)
                 def forward(self, inputs):
@@ -51,7 +51,7 @@ class Stub(Layer):
     """
 
     def __init__(self, observer=None):
-        super(Stub, self).__init__()
+        super().__init__()
         self._observer = observer
 
     def forward(self, input):
@@ -71,7 +71,7 @@ class QuanterStub(Layer):
     """
 
     def __init__(self, layer: Stub, q_config):
-        super(QuanterStub, self).__init__()
+        super().__init__()
         self._observer = None
         if layer._observer is not None:
             self._observer = layer._observer._instance(layer)
diff --git a/python/paddle/quantization/base_observer.py b/python/paddle/quantization/base_observer.py
index ede6873ef50..76d60de045d 100644
--- a/python/paddle/quantization/base_observer.py
+++ b/python/paddle/quantization/base_observer.py
@@ -25,7 +25,7 @@ class BaseObserver(BaseQuanter, metaclass=abc.ABCMeta):
     """
 
     def __init__(self):
-        super(BaseObserver, self).__init__()
+        super().__init__()
 
     @abc.abstractmethod
     def cal_thresholds(self):
diff --git a/python/paddle/quantization/base_quanter.py b/python/paddle/quantization/base_quanter.py
index 4aa4598351b..c73627e1002 100644
--- a/python/paddle/quantization/base_quanter.py
+++ b/python/paddle/quantization/base_quanter.py
@@ -29,7 +29,7 @@ class BaseQuanter(Layer, metaclass=abc.ABCMeta):
     """
 
     def __init__(self):
-        super(BaseQuanter, self).__init__()
+        super().__init__()
 
     @abc.abstractmethod
     def forward(self, input):
diff --git a/python/paddle/quantization/config.py b/python/paddle/quantization/config.py
index fa3e8cc8237..cc58b40ac2a 100644
--- a/python/paddle/quantization/config.py
+++ b/python/paddle/quantization/config.py
@@ -118,7 +118,7 @@ class QuantConfig:
 
              class Model(paddle.nn.Layer):
                  def __init__(self):
-                     super(Model, self).__init__()
+                     super().__init__()
                      self.fc = Linear(576, 120)
              model = Model()
              quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
@@ -162,7 +162,7 @@ class QuantConfig:
 
              class Model(paddle.nn.Layer):
                  def __init__(self):
-                     super(Model, self).__init__()
+                     super().__init__()
                      self.fc = Linear(576, 120)
              model = Model()
              quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
@@ -207,7 +207,7 @@ class QuantConfig:
 
             class Model(paddle.nn.Layer):
                 def __init__(self):
-                    super(Model, self).__init__()
+                    super().__init__()
                     self.fc = Linear(576, 120)
             model = Model()
             quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
@@ -388,7 +388,7 @@ class QuantConfig:
 
             class Model(paddle.nn.Layer):
                 def __init__(self):
-                    super(Model, self).__init__()
+                    super().__init__()
                     self.fc = Sequential(Linear(576, 120),Linear(576, 120))
             model = Model()
             quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
diff --git a/python/paddle/quantization/factory.py b/python/paddle/quantization/factory.py
index a57a2e95e31..d7ad2c355ba 100644
--- a/python/paddle/quantization/factory.py
+++ b/python/paddle/quantization/factory.py
@@ -56,7 +56,7 @@ class QuanterFactory(ClassWithArguments):
     """
 
     def __init__(self, *args, **kwargs):
-        super(QuanterFactory, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
         self.partial_class = None
 
     def _instance(self, layer: Layer) -> BaseQuanter:
diff --git a/python/paddle/quantization/observers/abs_max.py b/python/paddle/quantization/observers/abs_max.py
index 4c29dd907a8..47ef545e751 100644
--- a/python/paddle/quantization/observers/abs_max.py
+++ b/python/paddle/quantization/observers/abs_max.py
@@ -39,7 +39,7 @@ class AbsmaxObserver(ObserverFactory):
     """
 
     def __init__(self, quant_bits=8):
-        super(AbsmaxObserver, self).__init__(quant_bits=quant_bits)
+        super().__init__(quant_bits=quant_bits)
 
     def _get_class(self):
         return AbsmaxObserverLayer
@@ -53,7 +53,7 @@ class AbsmaxObserverLayer(BaseObserver):
     INIT_ABS_MAX = 1e-7
 
     def __init__(self, layer, quant_bits=8):
-        super(AbsmaxObserverLayer, self).__init__()
+        super().__init__()
         self._quant_bits = quant_bits
         self.abs_max_val = paddle.to_tensor(AbsmaxObserverLayer.INIT_ABS_MAX)
 
diff --git a/python/paddle/quantization/ptq.py b/python/paddle/quantization/ptq.py
index a9204397b71..48db8d8d92f 100644
--- a/python/paddle/quantization/ptq.py
+++ b/python/paddle/quantization/ptq.py
@@ -27,7 +27,7 @@ class PTQ(Quantization):
     """
 
     def __init__(self, config: QuantConfig):
-        super(PTQ, self).__init__(config)
+        super().__init__(config)
 
     def _is_parallel_training(self):
         try:
diff --git a/python/paddle/quantization/qat.py b/python/paddle/quantization/qat.py
index e7a28a3b3a9..cff2304acbe 100644
--- a/python/paddle/quantization/qat.py
+++ b/python/paddle/quantization/qat.py
@@ -36,7 +36,7 @@ class QAT(Quantization):
     """
 
     def __init__(self, config: QuantConfig):
-        super(QAT, self).__init__(config)
+        super().__init__(config)
 
     def quantize(self, model: Layer, inplace=False):
         r"""
diff --git a/python/paddle/quantization/quanters/abs_max.py b/python/paddle/quantization/quanters/abs_max.py
index 14344459eba..abb4cb84002 100644
--- a/python/paddle/quantization/quanters/abs_max.py
+++ b/python/paddle/quantization/quanters/abs_max.py
@@ -82,7 +82,7 @@ class FakeQuanterWithAbsMaxObserver(QuanterFactory):
         dtype='float32',
         name=None,
     ):
-        super(FakeQuanterWithAbsMaxObserver, self).__init__(
+        super().__init__(
             name=name,
             moving_rate=moving_rate,
             bit_length=bit_length,
@@ -102,7 +102,7 @@ class FakeQuanterWithAbsMaxObserverLayer(BaseQuanter):
         bit_length=8,
         dtype='float32',
     ):
-        super(FakeQuanterWithAbsMaxObserverLayer, self).__init__()
+        super().__init__()
         self._moving_rate = moving_rate
         self._bit_length = bit_length
         scale_prefix = (
diff --git a/python/paddle/quantization/wrapper.py b/python/paddle/quantization/wrapper.py
index 96178d28210..cef847a5a1b 100644
--- a/python/paddle/quantization/wrapper.py
+++ b/python/paddle/quantization/wrapper.py
@@ -34,7 +34,7 @@ class ObserveWrapper(Layer):
         observed: Layer,
         observe_input=True,
     ):
-        super(ObserveWrapper, self).__init__()
+        super().__init__()
         self._observer = observer
         self._observed = observed
         self._observe_input = observe_input
diff --git a/python/paddle/tests/quantization/test_customized_quanter.py b/python/paddle/tests/quantization/test_customized_quanter.py
index 04ad5b2f284..2c7d3f33250 100644
--- a/python/paddle/tests/quantization/test_customized_quanter.py
+++ b/python/paddle/tests/quantization/test_customized_quanter.py
@@ -28,7 +28,7 @@ linear_quant_axis = 1
 @quanter("CustomizedQuanter")
 class CustomizedQuanterLayer(BaseQuanter):
     def __init__(self, layer, bit_length=8, kwargs1=None):
-        super(CustomizedQuanterLayer, self).__init__()
+        super().__init__()
         self._layer = layer
         self._bit_length = bit_length
         self._kwargs1 = kwargs1
diff --git a/python/paddle/tests/quantization/test_ptq.py b/python/paddle/tests/quantization/test_ptq.py
index f5237fdd87d..afac5edf0b0 100644
--- a/python/paddle/tests/quantization/test_ptq.py
+++ b/python/paddle/tests/quantization/test_ptq.py
@@ -29,7 +29,7 @@ from paddle.quantization.observers.abs_max import AbsmaxObserverLayer
 
 class LeNetDygraph(paddle.nn.Layer):
     def __init__(self, num_classes=10):
-        super(LeNetDygraph, self).__init__()
+        super().__init__()
         self.num_classes = num_classes
         self.features = Sequential(
             Conv2D(1, 6, 3, stride=1, padding=1),
diff --git a/python/paddle/tests/quantization/test_qat.py b/python/paddle/tests/quantization/test_qat.py
index 920e6b2bde2..a94c4025f84 100644
--- a/python/paddle/tests/quantization/test_qat.py
+++ b/python/paddle/tests/quantization/test_qat.py
@@ -41,7 +41,7 @@ class RandomDataset(Dataset):
 
 class Model(paddle.nn.Layer):
     def __init__(self, num_classes=10):
-        super(Model, self).__init__()
+        super().__init__()
         self.num_classes = num_classes
         self.features = Sequential(
             Conv2D(3, 6, 3, stride=1, padding=1),
diff --git a/python/paddle/tests/quantization/test_quant.py b/python/paddle/tests/quantization/test_quant.py
index d5c49c2353b..3842a67f571 100644
--- a/python/paddle/tests/quantization/test_quant.py
+++ b/python/paddle/tests/quantization/test_quant.py
@@ -24,7 +24,7 @@ from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
 
 class LeNetDygraph(paddle.nn.Layer):
     def __init__(self, num_classes=10):
-        super(LeNetDygraph, self).__init__()
+        super().__init__()
         self.num_classes = num_classes
         self.features = Sequential(
             Conv2D(3, 6, 3, stride=1, padding=1),
diff --git a/python/paddle/tests/quantization/test_stub.py b/python/paddle/tests/quantization/test_stub.py
index 6f0337d1ac4..987428c14b8 100644
--- a/python/paddle/tests/quantization/test_stub.py
+++ b/python/paddle/tests/quantization/test_stub.py
@@ -28,7 +28,7 @@ quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
 
 class Model(paddle.nn.Layer):
     def __init__(self, num_classes=10):
-        super(Model, self).__init__()
+        super().__init__()
         self.quant_in = Stub()
         self.conv = Conv2D(3, 6, 3, stride=1, padding=1)
         self.quant = Stub(quanter)
diff --git a/python/paddle/utils/install_check.py b/python/paddle/utils/install_check.py
index ad88aaaa577..064d70ed4f8 100644
--- a/python/paddle/utils/install_check.py
+++ b/python/paddle/utils/install_check.py
@@ -186,7 +186,7 @@ def train_for_run_parallel():
         """
 
         def __init__(self):
-            super(LinearNet, self).__init__()
+            super().__init__()
             self._linear1 = paddle.nn.Linear(10, 10)
             self._linear2 = paddle.nn.Linear(10, 1)
 
diff --git a/python/paddle/vision/models/_utils.py b/python/paddle/vision/models/_utils.py
index a5567008017..08b103b3a68 100644
--- a/python/paddle/vision/models/_utils.py
+++ b/python/paddle/vision/models/_utils.py
@@ -87,7 +87,7 @@ class IntermediateLayerGetter(nn.LayerDict):
             if not return_layers:
                 break
 
-        super(IntermediateLayerGetter, self).__init__(layers)
+        super().__init__(layers)
         self.return_layers = orig_return_layers
 
     def forward(self, x):
-- 
GitLab