diff --git a/paddle/fluid/imperative/README.md b/paddle/fluid/imperative/README.md
index 27c3f75bcc558db36fc0275ff507ee9c63418273..c5dfe8fe7fd6a0b42ae9d5009f45f5fbec38d6e3 100644
--- a/paddle/fluid/imperative/README.md
+++ b/paddle/fluid/imperative/README.md
@@ -139,7 +139,7 @@ TODO
 import paddle
 class MyLayer(fluid.imperative.Layer):
     def __init__(self):
-        super(MyLayer, self).__init__()
+        super().__init__()
 
     def forward(self, inputs):
         x = fluid.layers.relu(inputs)
@@ -150,7 +150,7 @@ class MyLayer(fluid.imperative.Layer):
 
 class MyPyLayer(fluid.imperative.PyLayer):
     def __init__(self):
-        super(MyPyLayer, self).__init__()
+        super().__init__()
 
     @staticmethod
     def forward(inputs):
@@ -172,7 +172,7 @@ with fluid.imperative.guard():
 
 class MLP(fluid.Layer):
     def __init__(self, input_size):
-        super(MLP, self).__init__()
+        super().__init__()
         self._linear1 = Linear(input_size,
                        3,
                        fluid.ParamAttr(
diff --git a/pyproject.toml b/pyproject.toml
index 23578ea6c4d24b8e1a840736aee0052299a97fe1..ac23bdc97fb63d87037be53d73f055be8970022a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,6 +37,7 @@ select = [
     "UP003",
     "UP004",
     "UP007",
+    "UP008",
     "UP010",
     "UP011",
     "UP013",
diff --git a/python/paddle/device/__init__.py b/python/paddle/device/__init__.py
index 1a25f516f7d506b950bd0d22aa0c19c0a62548da..b05c5be8d4568fd761da24c4ea7118b8a2464355 100644
--- a/python/paddle/device/__init__.py
+++ b/python/paddle/device/__init__.py
@@ -840,7 +840,7 @@ class Stream:
 
     def __eq__(self, o):
         if isinstance(o, Stream):
-            return super(Stream, self).__eq__(o)
+            return super().__eq__(o)
         return False
 
     def __hash__(self):
diff --git a/python/paddle/distributed/fleet/recompute/recompute.py b/python/paddle/distributed/fleet/recompute/recompute.py
index 4a09a74fbdc6221b9b6cd0b1a45630f8a2211044..d9b355ac2c0a8dd058c0c4609169cc4e2e92576b 100755
--- a/python/paddle/distributed/fleet/recompute/recompute.py
+++ b/python/paddle/distributed/fleet/recompute/recompute.py
@@ -379,7 +379,7 @@ def recompute(function, *args, **kwargs):
                 def __init__(self, input_size=10,
                             recompute_blocks=[1, 3],
                             recompute_kwargs={}):
-                    super(Naive_fc_net, self).__init__()
+                    super().__init__()
                     self.recompute_blocks = recompute_blocks
                     self.recompute_kwargs = recompute_kwargs
                     self.runfunc0 = get_fc_block(0, input_size, is_last=False)
diff --git a/python/paddle/distributed/fleet/utils/__init__.py b/python/paddle/distributed/fleet/utils/__init__.py
index ef205bb8b5ffee01058425c740849bbf1c18e9de..5de15187a9fbc58f12d7d5f3674e235e9da7e03d 100644
--- a/python/paddle/distributed/fleet/utils/__init__.py
+++ b/python/paddle/distributed/fleet/utils/__init__.py
@@ -79,7 +79,7 @@ def recompute(function, *args, **kwargs):
                 def __init__(self, input_size=10,
                             recompute_blocks=[1, 3],
                             recompute_kwargs={}):
-                    super(Naive_fc_net, self).__init__()
+                    super().__init__()
                     self.recompute_blocks = recompute_blocks
                     self.recompute_kwargs = recompute_kwargs
                     self.runfunc0 = get_fc_block(0, input_size, is_last=False)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3.py
index 104d084121f552e0a905d116f1811e479b65d33b..e73a79724c38ed6240a0f1d5ca10c1d7fd87aba2 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3.py
@@ -61,7 +61,7 @@ class MLP(paddle.nn.Layer):
 
 class Encoder(paddle.nn.Layer):
     def __init__(self, encoder):
-        super(Encoder, self).__init__()
+        super().__init__()
         self.first_stage = paddle.nn.Linear(1024, 1024)
         self.encoder = encoder
 
@@ -73,7 +73,7 @@ class Encoder(paddle.nn.Layer):
 
 class Decoder(paddle.nn.Layer):
     def __init__(self, decoder):
-        super(Decoder, self).__init__()
+        super().__init__()
         self.decoder = decoder
         self.final_stage = paddle.nn.Linear(1024, 1024)
         self.group_norm = paddle.nn.GroupNorm(64, 1024)
@@ -87,7 +87,7 @@ class Decoder(paddle.nn.Layer):
 
 class SpecialModel(paddle.nn.Layer):
     def __init__(self):
-        super(SpecialModel, self).__init__()
+        super().__init__()
         self.shared = paddle.nn.Linear(1024, 1024, bias_attr=False)
         self.encoder = Encoder(self.shared)
         self.decoder = Decoder(self.shared)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3_eager.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3_eager.py
index afc4f61e01947797ce8f57b792fd862290ac2a7c..dd9ae4dea47db94ef0253f8d5a475dd7dfd36fab 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3_eager.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3_eager.py
@@ -25,7 +25,7 @@ np.random.seed(2022)
 
 class Model(nn.Layer):
     def __init__(self):
-        super(Model, self).__init__()
+        super().__init__()
         self.first_stage = nn.Linear(4096, 4096, bias_attr=False)
         self.center_stage = nn.Linear(4096, 4096)
         self.center_stage.weight.stop_gradient = True
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py
index 58157fcfefff7f55b3985497537826cb0fecedd3..c74e2b7adaa2216e4393ae14d70bf92cf2e566cd 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py
@@ -77,7 +77,7 @@ class MLP_pipe(PipelineLayer):
             ),
             LayerDesc(Linear, in_features=linear_size, out_features=10),
         ]
-        super(MLP_pipe, self).__init__(
+        super().__init__(
             desc,
             num_stages=2,
             loss_fn=paddle.nn.CrossEntropyLoss(),
@@ -93,7 +93,7 @@ class MLP_Hybrid(paddle.nn.Layer):
         param_attr=None,
         bias_attr=None,
     ):
-        super(MLP_Hybrid, self).__init__()
+        super().__init__()
         self.embedding = VocabParallelEmbedding(embedding_size, linear_size)
         self._linear1 = RowParallelLinear(
             linear_size, linear_size, has_bias=True, input_is_parallel=True
@@ -128,7 +128,7 @@ class MLP(paddle.nn.Layer):
         param_attr=None,
         bias_attr=None,
     ):
-        super(MLP, self).__init__()
+        super().__init__()
         self.embedding = paddle.nn.Embedding(embedding_size, linear_size)
         self._linear1 = Linear(linear_size, linear_size)
         self._linear2 = Linear(linear_size, linear_size)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/fused_attention_pass_with_mp.py b/python/paddle/fluid/tests/unittests/collective/fleet/fused_attention_pass_with_mp.py
index b3dc61ce9e5aeeb1fce1a3b190fc7a5f6ff87018..1ff58c47b0ec12c5d4cd01398564351c92f1cd28 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/fused_attention_pass_with_mp.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/fused_attention_pass_with_mp.py
@@ -35,7 +35,7 @@ class MultiHeadAttentionWithMP(paddle.nn.Layer):
         pre_ln=True,
         attn_dropout=True,
     ):
-        super(MultiHeadAttentionWithMP, self).__init__()
+        super().__init__()
         self.embed_dim = embed_dim
         self.kdim = embed_dim
         self.vdim = embed_dim
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_dygraph_recompute_for_eager.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_dygraph_recompute_for_eager.py
index f496b4e4f092a4d19bdf451a4f2b8fb5c90ed6d0..5328308fd5a1a786a218bc96277c2815ff450394 100755
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_dygraph_recompute_for_eager.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_dygraph_recompute_for_eager.py
@@ -23,7 +23,7 @@ from paddle.distributed.fleet.utils import recompute
 
 class Model(paddle.nn.Layer):
     def __init__(self, block_idx, input_size, is_last=False):
-        super(Model, self).__init__()
+        super().__init__()
         block_name = "block_" + str(block_idx)
         self.block = paddle.nn.Sequential(
             (
diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_gemm_epilogue_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_gemm_epilogue_pass.py
index 897a40abf425fe3d2b4b0af09edf2ffa86c080f7..939edf3d308a816a75ca82b75d3fb80b90b6edce 100644
--- a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_gemm_epilogue_pass.py
+++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_gemm_epilogue_pass.py
@@ -37,7 +37,7 @@ def verify_op_count(op_types, op_name, target_count):
 
 class MultiFCLayer(nn.Layer):
     def __init__(self, hidden, Activation):
-        super(MultiFCLayer, self).__init__()
+        super().__init__()
         self.linear1 = paddle.nn.Linear(hidden, 4 * hidden)
         self.linear2 = paddle.nn.Linear(4 * hidden, hidden)
         self.linear3 = paddle.nn.Linear(hidden, hidden)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_backward_without_params.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_backward_without_params.py
index b1924d84db5898cf68f8aea899f1b74094ff1b67..179f7ab1f7fd502b4be119906ac227958912b047 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_backward_without_params.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_backward_without_params.py
@@ -21,7 +21,7 @@ import paddle
 
 class Net(paddle.nn.Layer):
     def __init__(self):
-        super(Net, self).__init__()
+        super().__init__()
 
     @paddle.jit.to_static
     def forward(self, x):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn.py
index 1bd66156aa5f5ab6c1a1253fc6315c132ad69e36..0ef5186dab2d0bc481ef3a227f8460bbc9f1cfa5 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn.py
@@ -21,7 +21,7 @@ import paddle
 
 class Net(paddle.nn.Layer):
     def __init__(self):
-        super(Net, self).__init__()
+        super().__init__()
         self.relu = paddle.nn.functional.relu
         self.fc = paddle.nn.Linear(4, 4)
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim.py
index a86cf18ade135ce03727d66a40e0f3f3fdac8ebb..6ace7696c383a37d0cebf3c89b89b92ce1331bc2 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim.py
@@ -29,7 +29,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_gelu.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_gelu.py
index a4492f1bfdf6a271383ef1629bf8484d7ac1f5ac..0f764c0745dacb94931ecdb5e724a3439703894c 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_gelu.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_gelu.py
@@ -42,7 +42,7 @@ def generate_data(shape, dtype="float32"):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self, approximate):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
         self.approximate = approximate
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_layer_norm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_layer_norm.py
index 2de94fdcbb193d1a48f48ff641c423edd8ea7721..571805ba449b3e7e92591082f3538edf330d60e7 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_layer_norm.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_layer_norm.py
@@ -42,7 +42,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(64, 64)
 
     def forward(self, x, w, b):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_mean.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_mean.py
index ae2de19c8721d495bf014821c33b9d350460c4b4..d1d56fca374c099e6df50bc3a10914e5c1d83fea 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_mean.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim_mean.py
@@ -45,7 +45,7 @@ class PrimeNet(
     paddle.nn.Layer,
 ):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py
index 03a19c328bb08a2700c6a164143492bf918b894a..3669986174fb15ad812c722823d6130645443696 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py
@@ -218,7 +218,7 @@ class TestStaticMethod(TestRecursiveCall2):
 
 class NotToStaticHelper(paddle.nn.Layer):
     def __init__(self):
-        super(NotToStaticHelper, self).__init__()
+        super().__init__()
 
     def sum(self, x):
         if x.shape[0] > 1:
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py
index 3866cfe04d2aeee1e5e31c936b18645d7f415cec..e6856801c21bdb7e89aec0673ae5adc7c38346ef 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py
@@ -335,7 +335,7 @@ obj = Obj()
 
 class Net2:
     def __init__(self):
-        super(Net2, self).__init__()
+        super().__init__()
         self.layer1 = paddle.nn.Linear(10, 10)
 
     def forward(self, data):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_unuseful_inputs.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_unuseful_inputs.py
index 5cafba4e0401b9882d97b9b7385d312e1fc03951..41a2a7b30986af03bb259d70811d4bb99c01a211 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_unuseful_inputs.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_unuseful_inputs.py
@@ -33,7 +33,7 @@ def apply_to_static(support_to_static, model, image_shape=None):
 
 class Layer0(nn.Layer):
     def __init__(self, level):
-        super(Layer0, self).__init__()
+        super().__init__()
         self._linear1 = nn.Linear(10, 5)
         self._linear2 = nn.Linear(10, 5)
         self.layer1 = Layer1(level)
@@ -51,7 +51,7 @@ class Layer0(nn.Layer):
 
 class Layer1(nn.Layer):
     def __init__(self, level):
-        super(Layer1, self).__init__()
+        super().__init__()
         self.level = level
         self._linear = nn.Linear(5, 2)
 
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_modelruntime_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_modelruntime_ipu.py
index b4e27a6cc47cf26e72e7f7f4682e21873f4f61e3..46b03297cd1d2ae2d307513ebfc5cdf8e5147ebf 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_modelruntime_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_modelruntime_ipu.py
@@ -23,7 +23,7 @@ import paddle
 
 class SimpleLayer(paddle.nn.Layer):
     def __init__(self):
-        super(SimpleLayer, self).__init__()
+        super().__init__()
         self.conv = paddle.nn.Conv2D(
             in_channels=3, out_channels=1, kernel_size=2, stride=1
         )
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_inference_predictor.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_inference_predictor.py
index d12f900e51651aa89f742220dc40cd06007efef5..1d097b42ae702191d934d3c5f945dbbba4bf9900 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_inference_predictor.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_inference_predictor.py
@@ -55,7 +55,7 @@ def getdtype(dtype="float32"):
 
 class BackendPaddle:
     def __init__(self):
-        super(BackendPaddle, self).__init__()
+        super().__init__()
         self.h2d_time = []
         self.compute_time = []
         self.d2h_time = []
@@ -341,7 +341,7 @@ class ConvBNLayer(paddle.nn.Layer):
 
 class Test(nn.Layer):
     def __init__(self):
-        super(Test, self).__init__()
+        super().__init__()
         self.conv = ConvBNLayer(
             num_channels=3, num_filters=64, filter_size=3, stride=2, act='relu'
         )
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_support_nhwc_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_support_nhwc_pass.py
index 179b191ec38328fb887a7960501f6c4b0fa7913b..a34ef16a3e03fa25595bb44fabc8ee3a14f94252 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_support_nhwc_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_support_nhwc_pass.py
@@ -27,7 +27,7 @@ paddle.enable_static()
 
 class SimpleNet(nn.Layer):
     def __init__(self):
-        super(SimpleNet, self).__init__()
+        super().__init__()
         self.conv1 = nn.Conv2D(
             in_channels=4,
             out_channels=4,
diff --git a/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_batch_norm.py b/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_batch_norm.py
index ed6dcd6a6823e173ac0950da3c045bc9d47f0c3e..70e1fa87177025ab68681d2722812f5220eff572 100644
--- a/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_batch_norm.py
+++ b/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_batch_norm.py
@@ -350,7 +350,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.conv = nn.Conv2D(4, 2, (3, 3), bias_attr=False)
         self.bn = BatchNorm(2, act="relu")
         self.run_mean = zeros([2])
diff --git a/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_layer_norm.py b/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_layer_norm.py
index e9ded156500c3940de32e4ef7c044e38ba9983ad..558baf54795bfa88e44ad5276233863fe484df3b 100644
--- a/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_layer_norm.py
+++ b/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_layer_norm.py
@@ -212,7 +212,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self, n_shape):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.ln = LayerNorm(n_shape)
 
     def forward(self, x):
diff --git a/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_softmax.py b/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_softmax.py
index d0f8b2bb455797c17cfc0029f65aa29956d0fbd4..728027cbf34027dcdc6237e1a73c5d07d24b8159 100644
--- a/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_softmax.py
+++ b/python/paddle/fluid/tests/unittests/prim/composite_ops/test_composite_softmax.py
@@ -137,7 +137,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.sf = F.softmax
 
     def forward(self, x, current_axis):
diff --git a/python/paddle/fluid/tests/unittests/prim/model/bert.py b/python/paddle/fluid/tests/unittests/prim/model/bert.py
index 689fb30e7633302536f633c9d5b1a5bcd2331caa..f6c6499dbcb48e4433d3c3da7a1ad51a195ef001 100644
--- a/python/paddle/fluid/tests/unittests/prim/model/bert.py
+++ b/python/paddle/fluid/tests/unittests/prim/model/bert.py
@@ -87,7 +87,7 @@ class BertConfig:
 
 class BertLMPredictionHead(nn.Layer):
     def __init__(self, config: BertConfig, embedding_weights=None):
-        super(BertLMPredictionHead, self).__init__()
+        super().__init__()
 
         self.transform = nn.Linear(config.hidden_size, config.hidden_size)
         self.activation = getattr(nn.functional, config.hidden_act)
@@ -131,7 +131,7 @@ class BertLMPredictionHead(nn.Layer):
 
 class BertPretrainingHeads(nn.Layer):
     def __init__(self, config: BertConfig, embedding_weights=None):
-        super(BertPretrainingHeads, self).__init__()
+        super().__init__()
         self.predictions = BertLMPredictionHead(config, embedding_weights)
         self.seq_relationship = nn.Linear(config.hidden_size, 2)
 
@@ -143,7 +143,7 @@ class BertPretrainingHeads(nn.Layer):
 
 class BertEmbeddings(nn.Layer):
     def __init__(self, config: BertConfig):
-        super(BertEmbeddings, self).__init__()
+        super().__init__()
 
         self.word_embeddings = nn.Embedding(
             config.vocab_size, config.hidden_size
@@ -190,7 +190,7 @@ class BertEmbeddings(nn.Layer):
 
 class BertPooler(nn.Layer):
     def __init__(self, config: BertConfig):
-        super(BertPooler, self).__init__()
+        super().__init__()
 
         self.dense = nn.Linear(config.hidden_size, config.hidden_size)
         self.activation = nn.Tanh()
@@ -208,7 +208,7 @@ class BertPooler(nn.Layer):
 
 class BertModel(nn.Layer):
     def __init__(self, config: BertConfig, to_static, enable_cinn):
-        super(BertModel, self).__init__()
+        super().__init__()
         self.config = config
         self.pad_token_id = config.pad_token_id
         self.initializer_range = config.initializer_range
@@ -372,7 +372,7 @@ class BertModel(nn.Layer):
 
 class Bert(nn.Layer):
     def __init__(self, to_static, enable_cinn):
-        super(Bert, self).__init__()
+        super().__init__()
         config = BertConfig()
         self.bert = BertModel(config, to_static, enable_cinn)
         self.cls = BertPretrainingHeads(
@@ -434,7 +434,7 @@ class Bert(nn.Layer):
 
 class BertPretrainingCriterion(paddle.nn.Layer):
     def __init__(self, vocab_size=VOCAB_SIZE):
-        super(BertPretrainingCriterion, self).__init__()
+        super().__init__()
         # CrossEntropyLoss is expensive since the inner reshape (copy)
         self.loss_fn = paddle.nn.loss.CrossEntropyLoss(ignore_index=-1)
         self.vocab_size = vocab_size
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/flags/test_prim_flags_case.py b/python/paddle/fluid/tests/unittests/prim/prim/flags/test_prim_flags_case.py
index b2e2ad05ea439edab86df50c0609be7498e46c67..30fe2af5621f9a3e546efbece92cf9d3af9f9e60 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/flags/test_prim_flags_case.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/flags/test_prim_flags_case.py
@@ -28,7 +28,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
 
     def forward(self, x):
         out = F.softmax(x)
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_grad.py
index 50ef9f6f13036ac114b2c9e6a7e4be7bb82b24e6..9da9e7131d1a917ec0d4e43677c49e229ad05552 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_grad.py
@@ -29,7 +29,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x, y):
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_tanh_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_tanh_grad.py
index b037cc73bfd545c9e21862d183d9f2759333d0b5..520aef634b6fa9333dc68f6011e0a1133b9b1bf5 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_tanh_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_tanh_grad.py
@@ -29,7 +29,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x, y):
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_cast_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_cast_grad.py
index 22913b284ddf8d24d29c7872f5a15a28833befe9..850ad82220165aaf5794adf01cffa50944f3e780 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_cast_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_cast_grad.py
@@ -30,7 +30,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x):
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_div_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_div_grad.py
index 606b55b5a95c06fc097c03e0fe8964e38faadce6..844d30894de1879c70ec068cf3fae307884bca89 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_div_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_div_grad.py
@@ -29,7 +29,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x, y):
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_gather_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_gather_grad.py
index 1f89b024e819b4757a20ee283ce0573f9aa39e94..77693dd1071da83260b9d0eb212e754efa4e31e0 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_gather_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_gather_grad.py
@@ -31,7 +31,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x, index, axis):
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_reshape_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_reshape_grad.py
index d89e764da48d79b5b84c54486eeddb95a53ba8bf..4523e4af4bf44c1dfa8a621498766f8a49dd685f 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_reshape_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_reshape_grad.py
@@ -29,7 +29,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x):
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sqrt_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sqrt_grad.py
index 8e623100dd09cb86b7aae0562035536944598b9f..a3b854fcc296b0b2062fd64165eae2ca6304ed73 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sqrt_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sqrt_grad.py
@@ -34,7 +34,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x):
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sub_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sub_grad.py
index 107ffbd062f3b101d67994896b04ba48eb512343..87bd6ff0b360274d9755f91c113e00d4e8c48c25 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sub_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sub_grad.py
@@ -29,7 +29,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x, y):
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_tanh_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_tanh_grad.py
index d28f84a685b0d0d83d085f1f76edfe003b6bd2fb..869a36e806695c34b791db427022b700cfae848e 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_tanh_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_tanh_grad.py
@@ -34,7 +34,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
         self.fc = paddle.nn.Linear(4, 4)
 
     def forward(self, x):
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_transpose_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_transpose_grad.py
index 948b1e33849085e164619c80bd743fbe29bca0ec..7be0be582fd3b8900351f725de75742dee957a8d 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_transpose_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_transpose_grad.py
@@ -29,7 +29,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimeNet(paddle.nn.Layer):
     def __init__(self):
-        super(PrimeNet, self).__init__()
+        super().__init__()
 
     def forward(self, x):
         out = paddle.transpose(x, [0, 2, 1])
diff --git a/python/paddle/fluid/tests/unittests/prim_op_test.py b/python/paddle/fluid/tests/unittests/prim_op_test.py
index 980fdc5f7a5ad69a3682351c606e5b9c6630a4bc..758fcdc1b49976dbeae47d7143d3c50999a47035 100644
--- a/python/paddle/fluid/tests/unittests/prim_op_test.py
+++ b/python/paddle/fluid/tests/unittests/prim_op_test.py
@@ -222,7 +222,7 @@ def apply_to_static(net, use_cinn):
 
 class PrimNet(paddle.nn.Layer):
     def __init__(self, python_api):
-        super(PrimNet, self).__init__()
+        super().__init__()
         self.python_api = python_api
 
     def forward(self, args):
diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py
index 154e3c33f12365aab5395798447a372e1a2c48a3..08a4afba3437c0fda30cbc28218a196f4a8c26ca 100644
--- a/python/paddle/fluid/tests/unittests/test_activation_op.py
+++ b/python/paddle/fluid/tests/unittests/test_activation_op.py
@@ -3005,7 +3005,7 @@ class TestPow_ZeroDim(TestPow):
         self.shape = []
 
     def setUp(self):
-        super(TestPow_ZeroDim, self).setUp()
+        super().setUp()
         self.enable_cinn = False
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_fused_attention_pass.py b/python/paddle/fluid/tests/unittests/test_fused_attention_pass.py
index f27daa2d0c119e023471d2029a4bccc5f5742c71..2a8ce9b1299acb86872ea41769e3d5c68ee414bc 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_attention_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_attention_pass.py
@@ -33,7 +33,7 @@ class MultiHeadAttention(paddle.nn.Layer):
         pre_ln=True,
         attn_dropout=True,
     ):
-        super(MultiHeadAttention, self).__init__()
+        super().__init__()
         self.embed_dim = embed_dim
         self.kdim = embed_dim
         self.vdim = embed_dim
diff --git a/python/paddle/fluid/tests/unittests/test_lbfgs_v2.py b/python/paddle/fluid/tests/unittests/test_lbfgs_v2.py
index c7f8138318d9fd4f3eecce36a76e38043d963ba1..fa64c480ed970a4757a6930ed385f771d04c8332 100644
--- a/python/paddle/fluid/tests/unittests/test_lbfgs_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_lbfgs_v2.py
@@ -31,7 +31,7 @@ np.random.seed(123)
 
 class Net(paddle.nn.Layer):
     def __init__(self, np_w, func):
-        super(Net, self).__init__()
+        super().__init__()
         self.func = func
         w = paddle.to_tensor(np_w)
         self.w = paddle.create_parameter(
diff --git a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
index 9322b258e7ddd71b23452cc7d0828b22c25752ba..2337364efa2a4690572d09b125c71f4117c8a571 100644
--- a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
@@ -407,7 +407,7 @@ class EncoderCell(SimpleRNNCell):
         dropout_prob=0.0,
         init_scale=0.1,
     ):
-        super(EncoderCell, self).__init__(input_size, hidden_size)
+        super().__init__(input_size, hidden_size)
         self.dropout_prob = dropout_prob
         # use add_sublayer to add multi-layers
         self.lstm_cells = []
@@ -453,7 +453,7 @@ class Encoder(Layer):
         dropout_prob=0.0,
         init_scale=0.1,
     ):
-        super(Encoder, self).__init__()
+        super().__init__()
         self.embedder = Embedding(vocab_size, embed_dim)
         self.stack_lstm = RNN(
             EncoderCell(
@@ -484,7 +484,7 @@ class Decoder(Layer):
         dropout_prob=0.0,
         init_scale=0.1,
     ):
-        super(Decoder, self).__init__()
+        super().__init__()
         self.embedder = Embedding(vocab_size, embed_dim)
         self.stack_lstm = RNN(
             DecoderCell(
@@ -603,7 +603,7 @@ class BaseModel(Layer):
         dropout_prob=0.0,
         init_scale=0.1,
     ):
-        super(BaseModel, self).__init__()
+        super().__init__()
         self.hidden_size = hidden_size
         self.word_embedding = Embedding(vocab_size, embed_dim)
         self.encoder = Encoder(
diff --git a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_dataparallel_with_pylayer.py b/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_dataparallel_with_pylayer.py
index 070770fbcafd4bb5f7934433d87650f3e6d88418..2171cb2429246af230e67521b8d74753236afb68 100644
--- a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_dataparallel_with_pylayer.py
+++ b/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_dataparallel_with_pylayer.py
@@ -44,7 +44,7 @@ class cus_tanh(PyLayer):
 
 class SimpleNet(paddle.nn.Layer):
     def __init__(self, train_id, model_id):
-        super(SimpleNet, self).__init__()
+        super().__init__()
         self.w = self.create_parameter(shape=[in_dim, batch], dtype="float32")
         self.linear = paddle.nn.Linear(in_dim, out_dim)
         self.tanh = paddle.tanh
diff --git a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check.py b/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check.py
index 56d428a7781afeec20913d8cde33775fd2d2c5a1..8c996fa6a91344613ec6ad9196ffaae0d2f9104e 100644
--- a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check.py
+++ b/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check.py
@@ -30,7 +30,7 @@ out_dim = 20
 
 class SimpleNet(paddle.nn.Layer):
     def __init__(self, train_id):
-        super(SimpleNet, self).__init__()
+        super().__init__()
         self.w1 = self.create_parameter(
             shape=[in_dim, out_dim], dtype="float32"
         )
diff --git a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check_in_eager_mode.py b/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check_in_eager_mode.py
index 9ee5e8604388448ac4b76fc1d9f03375bb0caafb..f5ecd5cf4fc11b689533a45396fd06980a18b9ba 100644
--- a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check_in_eager_mode.py
+++ b/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check_in_eager_mode.py
@@ -30,7 +30,7 @@ out_dim = 20
 
 class SimpleNet(paddle.nn.Layer):
     def __init__(self, train_id):
-        super(SimpleNet, self).__init__()
+        super().__init__()
         self.w1 = self.create_parameter(
             shape=[in_dim, out_dim], dtype="float32"
         )
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_recompute_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_recompute_op_xpu.py
index 3ec67eb76e13f435160fe6b63a8f596b41b1aec6..b29f72ff03b38ef2765414bd04d7ffd941eacd1b 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_recompute_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_recompute_op_xpu.py
@@ -52,7 +52,7 @@ class Naive_fc_net(paddle.nn.Layer):
     def __init__(
         self, input_size=10, recompute_blocks=[1, 3], recompute_kwargs={}
     ):
-        super(Naive_fc_net, self).__init__()
+        super().__init__()
         self.recompute_blocks = recompute_blocks
         self.recompute_kwargs = recompute_kwargs
         self.runfunc0 = get_fc_block(0, input_size, is_last=False)
diff --git a/python/paddle/incubate/optimizer/lbfgs.py b/python/paddle/incubate/optimizer/lbfgs.py
index 7a1be322d8705453e0c1da2b3292730d93a6f198..937a3b2f9af3f778d724c09b97ad1228cb24d263 100644
--- a/python/paddle/incubate/optimizer/lbfgs.py
+++ b/python/paddle/incubate/optimizer/lbfgs.py
@@ -89,7 +89,7 @@ class LBFGS(Optimizer):
 
             class Net(paddle.nn.Layer):
                 def __init__(self):
-                    super(Net, self).__init__()
+                    super().__init__()
                     w = paddle.to_tensor(np_w)
                     self.w = paddle.create_parameter(shape=w.shape, dtype=w.dtype, default_initializer=paddle.nn.initializer.Assign(w))
 
diff --git a/python/paddle/nn/quant/format.py b/python/paddle/nn/quant/format.py
index ca5b6ea7f3e69077c636e69c97c92d36ab9e5a90..edd72ad8ce30cc843801dd2c143b20f2a757a827 100644
--- a/python/paddle/nn/quant/format.py
+++ b/python/paddle/nn/quant/format.py
@@ -23,7 +23,7 @@ from paddle.nn import Layer
 
 class LinearQuanterDequanter(Layer):
     def __init__(self, quanter, dequanter):
-        super(LinearQuanterDequanter, self).__init__()
+        super().__init__()
         self._quanter = quanter
         self._dequanter = dequanter
 
@@ -46,7 +46,7 @@ class LinearQuanterDequanter(Layer):
 
 class LinearQuanter(Layer):
     def __init__(self, scales, zero_point=None, quant_axis=None, bit_length=8):
-        super(LinearQuanter, self).__init__()
+        super().__init__()
         self._scales = paddle.to_tensor(scales, dtype="float32")
         self._zero_point = (
             paddle.zeros([1], dtype="float32")
@@ -97,7 +97,7 @@ class LinearQuanter(Layer):
 
 class LinearDequanter(Layer):
     def __init__(self, scales, zero_point=None, quant_axis=None, bit_length=8):
-        super(LinearDequanter, self).__init__()
+        super().__init__()
         self._scales = paddle.to_tensor(scales, dtype="float32")
         self._zero_point = (
             paddle.zeros([1], dtype="float32")
@@ -156,7 +156,7 @@ class ConvertibleQuantedLayer(Layer, metaclass=abc.ABCMeta):
             # Given codes in ./customized_quanter.py
             class CustomizedQuantedLayer(ConvertibleQuantedLayer):
                 def __init__(self):
-                    super(CustomizedQuantedLayer, self).__init__()
+                    super().__init__()
                     self.weight_a = paddle.create_parameter(shape=[1], dtype='float32')
                     self.weight_b = paddle.create_parameter(shape=[1], dtype='float32')
                     self.quanter_for_weight_a = None
@@ -176,7 +176,7 @@ class ConvertibleQuantedLayer(Layer, metaclass=abc.ABCMeta):
     """
 
     def __init__(self):
-        super(ConvertibleQuantedLayer, self).__init__()
+        super().__init__()
         self.converted = False
 
     @abc.abstractmethod
diff --git a/python/paddle/nn/quant/qat/conv.py b/python/paddle/nn/quant/qat/conv.py
index f2ffc7b103ad721f9a510557d29a67d66450aed6..1cf33a8bcb344b6aebf9a2078f258551256f3d90 100644
--- a/python/paddle/nn/quant/qat/conv.py
+++ b/python/paddle/nn/quant/qat/conv.py
@@ -27,7 +27,7 @@ class QuantedConv2D(ConvertibleQuantedLayer):
     """
 
     def __init__(self, layer: Layer, q_config):
-        super(QuantedConv2D, self).__init__()
+        super().__init__()
 
         # For Conv2D
         self._groups = layer._groups
diff --git a/python/paddle/nn/quant/qat/linear.py b/python/paddle/nn/quant/qat/linear.py
index c0e015ce51c8602954eb79228b754221f7ce04c1..39b177f2c249566c7ac1db49c34406c704581f12 100644
--- a/python/paddle/nn/quant/qat/linear.py
+++ b/python/paddle/nn/quant/qat/linear.py
@@ -26,7 +26,7 @@ class QuantedLinear(ConvertibleQuantedLayer):
     """
 
     def __init__(self, layer: Layer, q_config):
-        super(QuantedLinear, self).__init__()
+        super().__init__()
         # For Linear
         self.weight = layer.weight
         self.bias = layer.bias
diff --git a/python/paddle/nn/quant/stub.py b/python/paddle/nn/quant/stub.py
index 74deb8aa75d10a7188c7bee9aa40586b4b53a747..ab977524153e7d7e48d392b2b9ff95cace95a3c7 100644
--- a/python/paddle/nn/quant/stub.py
+++ b/python/paddle/nn/quant/stub.py
@@ -36,7 +36,7 @@ class Stub(Layer):
             quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
             class Model(paddle.nn.Layer):
                 def __init__(self, num_classes=10):
-                    super(Model, self).__init__()
+                    super().__init__()
                     self.conv = Conv2D(3, 6, 3, stride=1, padding=1)
                     self.quant = Stub(quanter)
                 def forward(self, inputs):
@@ -51,7 +51,7 @@ class Stub(Layer):
     """
 
     def __init__(self, observer=None):
-        super(Stub, self).__init__()
+        super().__init__()
         self._observer = observer
 
     def forward(self, input):
@@ -71,7 +71,7 @@ class QuanterStub(Layer):
     """
 
     def __init__(self, layer: Stub, q_config):
-        super(QuanterStub, self).__init__()
+        super().__init__()
         self._observer = None
         if layer._observer is not None:
             self._observer = layer._observer._instance(layer)
diff --git a/python/paddle/quantization/base_observer.py b/python/paddle/quantization/base_observer.py
index ede6873ef50bc0043f3b2cf1d8912379cb4c5535..76d60de045da62e8c80b459715c3db432928f91b 100644
--- a/python/paddle/quantization/base_observer.py
+++ b/python/paddle/quantization/base_observer.py
@@ -25,7 +25,7 @@ class BaseObserver(BaseQuanter, metaclass=abc.ABCMeta):
     """
 
     def __init__(self):
-        super(BaseObserver, self).__init__()
+        super().__init__()
 
     @abc.abstractmethod
     def cal_thresholds(self):
diff --git a/python/paddle/quantization/base_quanter.py b/python/paddle/quantization/base_quanter.py
index 4aa4598351bf3c4eda48f8b13d06fea1a6dd8421..c73627e1002bef8ca8a62ba35e1626bb64ebd6af 100644
--- a/python/paddle/quantization/base_quanter.py
+++ b/python/paddle/quantization/base_quanter.py
@@ -29,7 +29,7 @@ class BaseQuanter(Layer, metaclass=abc.ABCMeta):
     """
 
     def __init__(self):
-        super(BaseQuanter, self).__init__()
+        super().__init__()
 
     @abc.abstractmethod
     def forward(self, input):
diff --git a/python/paddle/quantization/config.py b/python/paddle/quantization/config.py
index fa3e8cc8237415207038e6efb8b511e1b3ca309c..cc58b40ac2a3a1e15983056dfa05c68d9f31162c 100644
--- a/python/paddle/quantization/config.py
+++ b/python/paddle/quantization/config.py
@@ -118,7 +118,7 @@ class QuantConfig:
 
              class Model(paddle.nn.Layer):
                  def __init__(self):
-                     super(Model, self).__init__()
+                     super().__init__()
                      self.fc = Linear(576, 120)
              model = Model()
              quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
@@ -162,7 +162,7 @@ class QuantConfig:
 
              class Model(paddle.nn.Layer):
                  def __init__(self):
-                     super(Model, self).__init__()
+                     super().__init__()
                      self.fc = Linear(576, 120)
              model = Model()
              quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
@@ -207,7 +207,7 @@ class QuantConfig:
 
             class Model(paddle.nn.Layer):
                 def __init__(self):
-                    super(Model, self).__init__()
+                    super().__init__()
                     self.fc = Linear(576, 120)
             model = Model()
             quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
@@ -388,7 +388,7 @@ class QuantConfig:
 
             class Model(paddle.nn.Layer):
                 def __init__(self):
-                    super(Model, self).__init__()
+                    super().__init__()
                     self.fc = Sequential(Linear(576, 120),Linear(576, 120))
             model = Model()
             quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
diff --git a/python/paddle/quantization/factory.py b/python/paddle/quantization/factory.py
index a57a2e95e318368f46be8dd6194b3c1cfe6a484b..d7ad2c355ba47f48d312dbd873a8aa72f552b912 100644
--- a/python/paddle/quantization/factory.py
+++ b/python/paddle/quantization/factory.py
@@ -56,7 +56,7 @@ class QuanterFactory(ClassWithArguments):
     """
 
     def __init__(self, *args, **kwargs):
-        super(QuanterFactory, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
         self.partial_class = None
 
     def _instance(self, layer: Layer) -> BaseQuanter:
diff --git a/python/paddle/quantization/observers/abs_max.py b/python/paddle/quantization/observers/abs_max.py
index 4c29dd907a80c193ec179ab66a158ee8a0444264..47ef545e751d1ee8fd63d544ea6eddf518cdb1c1 100644
--- a/python/paddle/quantization/observers/abs_max.py
+++ b/python/paddle/quantization/observers/abs_max.py
@@ -39,7 +39,7 @@ class AbsmaxObserver(ObserverFactory):
     """
 
     def __init__(self, quant_bits=8):
-        super(AbsmaxObserver, self).__init__(quant_bits=quant_bits)
+        super().__init__(quant_bits=quant_bits)
 
     def _get_class(self):
         return AbsmaxObserverLayer
@@ -53,7 +53,7 @@ class AbsmaxObserverLayer(BaseObserver):
     INIT_ABS_MAX = 1e-7
 
     def __init__(self, layer, quant_bits=8):
-        super(AbsmaxObserverLayer, self).__init__()
+        super().__init__()
         self._quant_bits = quant_bits
         self.abs_max_val = paddle.to_tensor(AbsmaxObserverLayer.INIT_ABS_MAX)
 
diff --git a/python/paddle/quantization/ptq.py b/python/paddle/quantization/ptq.py
index a9204397b717111fe6064b9e466dda9e4b5ef07a..48db8d8d92f7d3eb203a39a3e44bd0e6e990cf60 100644
--- a/python/paddle/quantization/ptq.py
+++ b/python/paddle/quantization/ptq.py
@@ -27,7 +27,7 @@ class PTQ(Quantization):
     """
 
     def __init__(self, config: QuantConfig):
-        super(PTQ, self).__init__(config)
+        super().__init__(config)
 
     def _is_parallel_training(self):
         try:
diff --git a/python/paddle/quantization/qat.py b/python/paddle/quantization/qat.py
index e7a28a3b3a957102f86a6d5f4d057560eece9492..cff2304acbe09f30b240896498617b3d341e5745 100644
--- a/python/paddle/quantization/qat.py
+++ b/python/paddle/quantization/qat.py
@@ -36,7 +36,7 @@ class QAT(Quantization):
     """
 
     def __init__(self, config: QuantConfig):
-        super(QAT, self).__init__(config)
+        super().__init__(config)
 
     def quantize(self, model: Layer, inplace=False):
         r"""
diff --git a/python/paddle/quantization/quanters/abs_max.py b/python/paddle/quantization/quanters/abs_max.py
index 14344459eba963a9fc73511c05cf5fe31ccdfbf6..abb4cb84002702e09f323f1e7948e4a3b9e94204 100644
--- a/python/paddle/quantization/quanters/abs_max.py
+++ b/python/paddle/quantization/quanters/abs_max.py
@@ -82,7 +82,7 @@ class FakeQuanterWithAbsMaxObserver(QuanterFactory):
         dtype='float32',
         name=None,
     ):
-        super(FakeQuanterWithAbsMaxObserver, self).__init__(
+        super().__init__(
             name=name,
             moving_rate=moving_rate,
             bit_length=bit_length,
@@ -102,7 +102,7 @@ class FakeQuanterWithAbsMaxObserverLayer(BaseQuanter):
         bit_length=8,
         dtype='float32',
     ):
-        super(FakeQuanterWithAbsMaxObserverLayer, self).__init__()
+        super().__init__()
         self._moving_rate = moving_rate
         self._bit_length = bit_length
         scale_prefix = (
diff --git a/python/paddle/quantization/wrapper.py b/python/paddle/quantization/wrapper.py
index 96178d28210cd14ab2d917b7b9e6bfb9963dbf61..cef847a5a1b051b5154a5bb1962d537d1b711eda 100644
--- a/python/paddle/quantization/wrapper.py
+++ b/python/paddle/quantization/wrapper.py
@@ -34,7 +34,7 @@ class ObserveWrapper(Layer):
         observed: Layer,
         observe_input=True,
     ):
-        super(ObserveWrapper, self).__init__()
+        super().__init__()
         self._observer = observer
         self._observed = observed
         self._observe_input = observe_input
diff --git a/python/paddle/tests/quantization/test_customized_quanter.py b/python/paddle/tests/quantization/test_customized_quanter.py
index 04ad5b2f2849c9f55f393b1a25b7edd7ae4b826a..2c7d3f33250a02af01cef73c4651c5c58bd6891d 100644
--- a/python/paddle/tests/quantization/test_customized_quanter.py
+++ b/python/paddle/tests/quantization/test_customized_quanter.py
@@ -28,7 +28,7 @@ linear_quant_axis = 1
 @quanter("CustomizedQuanter")
 class CustomizedQuanterLayer(BaseQuanter):
     def __init__(self, layer, bit_length=8, kwargs1=None):
-        super(CustomizedQuanterLayer, self).__init__()
+        super().__init__()
         self._layer = layer
         self._bit_length = bit_length
         self._kwargs1 = kwargs1
diff --git a/python/paddle/tests/quantization/test_ptq.py b/python/paddle/tests/quantization/test_ptq.py
index f5237fdd87d8b123f2b1584e302ebd4923ee679b..afac5edf0b0ede7193e7e2d4c4d796571412a4c2 100644
--- a/python/paddle/tests/quantization/test_ptq.py
+++ b/python/paddle/tests/quantization/test_ptq.py
@@ -29,7 +29,7 @@ from paddle.quantization.observers.abs_max import AbsmaxObserverLayer
 
 class LeNetDygraph(paddle.nn.Layer):
     def __init__(self, num_classes=10):
-        super(LeNetDygraph, self).__init__()
+        super().__init__()
         self.num_classes = num_classes
         self.features = Sequential(
             Conv2D(1, 6, 3, stride=1, padding=1),
diff --git a/python/paddle/tests/quantization/test_qat.py b/python/paddle/tests/quantization/test_qat.py
index 920e6b2bde29101935c0efe8b83de875bbfd5896..a94c4025f8451fe27d49c5236ff279ce4ab75462 100644
--- a/python/paddle/tests/quantization/test_qat.py
+++ b/python/paddle/tests/quantization/test_qat.py
@@ -41,7 +41,7 @@ class RandomDataset(Dataset):
 
 class Model(paddle.nn.Layer):
     def __init__(self, num_classes=10):
-        super(Model, self).__init__()
+        super().__init__()
         self.num_classes = num_classes
         self.features = Sequential(
             Conv2D(3, 6, 3, stride=1, padding=1),
diff --git a/python/paddle/tests/quantization/test_quant.py b/python/paddle/tests/quantization/test_quant.py
index d5c49c2353b38c411cbe73d0fc94a9c2eb84f62c..3842a67f571fdb32667fea62cc4cdc9c311a0aa1 100644
--- a/python/paddle/tests/quantization/test_quant.py
+++ b/python/paddle/tests/quantization/test_quant.py
@@ -24,7 +24,7 @@ from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
 
 class LeNetDygraph(paddle.nn.Layer):
     def __init__(self, num_classes=10):
-        super(LeNetDygraph, self).__init__()
+        super().__init__()
         self.num_classes = num_classes
         self.features = Sequential(
             Conv2D(3, 6, 3, stride=1, padding=1),
diff --git a/python/paddle/tests/quantization/test_stub.py b/python/paddle/tests/quantization/test_stub.py
index 6f0337d1ac48109e1976ed2ac7137a56ca0b5fb4..987428c14b814820a0fcb9954081992e258cf33a 100644
--- a/python/paddle/tests/quantization/test_stub.py
+++ b/python/paddle/tests/quantization/test_stub.py
@@ -28,7 +28,7 @@ quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
 
 class Model(paddle.nn.Layer):
     def __init__(self, num_classes=10):
-        super(Model, self).__init__()
+        super().__init__()
         self.quant_in = Stub()
         self.conv = Conv2D(3, 6, 3, stride=1, padding=1)
         self.quant = Stub(quanter)
diff --git a/python/paddle/utils/install_check.py b/python/paddle/utils/install_check.py
index ad88aaaa5774065f178f573761c21d831d560adf..064d70ed4f892962fa05d828ce345ba0d29c2ee2 100644
--- a/python/paddle/utils/install_check.py
+++ b/python/paddle/utils/install_check.py
@@ -186,7 +186,7 @@ def train_for_run_parallel():
         """
 
         def __init__(self):
-            super(LinearNet, self).__init__()
+            super().__init__()
             self._linear1 = paddle.nn.Linear(10, 10)
             self._linear2 = paddle.nn.Linear(10, 1)
 
diff --git a/python/paddle/vision/models/_utils.py b/python/paddle/vision/models/_utils.py
index a556700801794a31a2a9ac382ec1de183f4cf149..08b103b3a68b28fd13d8e6f3780b65a098abe72b 100644
--- a/python/paddle/vision/models/_utils.py
+++ b/python/paddle/vision/models/_utils.py
@@ -87,7 +87,7 @@ class IntermediateLayerGetter(nn.LayerDict):
             if not return_layers:
                 break
 
-        super(IntermediateLayerGetter, self).__init__(layers)
+        super().__init__(layers)
         self.return_layers = orig_return_layers
 
     def forward(self, x):