[CodeStyle][E266] remove multiple '#' in comments (#47772)

* fix flake8 CodeStyle E266 * fix comments

[CodeStyle][E266] remove multiple '#' in comments (#47772)
* fix flake8 CodeStyle E266 * fix comments
8c8cf0fd · Tony Cao · GitHub · a97b3630 · 8c8cf0fd · 8c8cf0fd
37 changed file
--- a/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
@@ -15,9 +15,9 @@
 import yaml
 import re
-########################
+####################
-### Global Variables ###
+# Global Variables #
-########################
+####################
 ops_to_fill_zero_for_empty_grads = set(
    [
        "split_grad",
@@ -95,9 +95,9 @@ yaml_types_mapping = {
 }
-#############################
+#########################
-###  File Reader Helpers  ###
+#  File Reader Helpers  #
-#############################
+#########################
 def AssertMessage(lhs_str, rhs_str):
    return f"lhs: {lhs_str}, rhs: {rhs_str}"
@@ -127,9 +127,9 @@ def ReadBwdFile(filepath):
    return ret
-##################################
+##############################
-###  Generic Helper Functions  ###
+#  Generic Helper Functions  #
-##################################
+##############################
 def FindGradName(string):
    return string + "_grad"
@@ -252,9 +252,9 @@ def GetIndent(num):
    return "".join([tab for i in range(num)])
-######################
+##################
-###  Yaml Parsers  ###
+#  Yaml Parsers  #
-######################
+##################
 def ParseYamlArgs(string):
    # Example: const Tensor& x, const Tensor& y, bool transpose_x, bool transpose_y
@@ -398,9 +398,9 @@ def ParseYamlInplaceInfo(string):
    return inplace_map
-########################
+####################
-###  Generator Base  ###
+#  Generator Base  #
-########################
+####################
 class FunctionGeneratorBase:
    def __init__(self, forward_api_contents, namespace):
        self.forward_api_contents = forward_api_contents

--- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
@@ -54,9 +54,9 @@ black_ops_list = [
 ]
-###########
+#########
-## Utils ##
+# Utils #
-###########
+#########
 def ParseArguments():
    parser = argparse.ArgumentParser(
        description='Eager Code Generator Args Parser'
@@ -72,9 +72,9 @@ def ParseArguments():
    return args
-########################
+######################
-## Code Gen Templates ##
+# Code Gen Templates #
-########################
+######################
 SET_PLAIN_TENSOR_WRAPPER_TEMPLATE = """  void SetTensorWrapper{}(const paddle::experimental::Tensor& {}) {{
    {} = egr::TensorWrapper({}, {});
  }}
@@ -479,9 +479,9 @@ def IsInvokeForwardApi(api_contents, forward_api_name_list):
    )
-#######################
+#####################
-## Generator Helpers ##
+# Generator Helpers #
-#######################
+#####################
 def GenerateCoreOpInfoDeclaration():
    return CORE_OPS_DECLARATION_TEMPLATE
@@ -517,9 +517,9 @@ def GenerateCoreOpInfoDefinition():
    return core_ops_info_definition_str
-#####################
+###################
-## Generator Class ##
+# Generator Class #
-#####################
+###################
 class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
    def __init__(
        self,
@@ -1033,9 +1033,9 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
        # Basic Validation Check
        self.DygraphYamlValidationCheck()
-        ##########################
+        ########################
-        ## Parsing Raw Contents ##
+        # Parsing Raw Contents #
-        ##########################
+        ########################
        # Parse forward and backward inplace_map
        self.ParseForwardInplaceInfo()
        if self.grad_api_contents is not None:
@@ -1066,9 +1066,9 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
        # Forwards Validation Check
        self.ForwardsValidationCheck()
-        #############################
+        ###########################
-        ## Process Parsed Contents ##
+        # Process Parsed Contents #
-        #############################
+        ###########################
        # Initialize forward_inputs_position_map, forward_outputs_position_map
        self.DetermineForwardPositionMap(
            self.forward_inputs_list, self.forward_returns_list
@@ -1711,9 +1711,9 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
    def run(self):
        super().run()
-        #####################
+        ###################
-        ## Code Generation ##
+        # Code Generation #
-        #####################
+        ###################
        # Definition And Declaration
        self.GenerateForwardDefinitionAndDeclaration(is_inplaced=False)
@@ -2341,9 +2341,9 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
        self.ResetOptionalInputs()
-        #####################
+        ###################
-        ## Code Generation ##
+        # Code Generation #
-        #####################
+        ###################
        # Higher-order GradNode generation
        (
            has_higher_order_node,
@@ -2503,9 +2503,9 @@ class DygraphForwardAndNodesGenerator(GeneratorBase):
        self.GenerateCode()
-##################
+################
-## File Writers ##
+# File Writers #
-##################
+################
 def GenerateNodeCCFile(filepath, node_definition_str):
    if os.path.exists(filepath):
        os.remove(filepath)

--- a/paddle/fluid/eager/auto_code_generator/generator/python_c_gen.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/python_c_gen.py
@@ -18,9 +18,9 @@ from codegen_utils import FunctionGeneratorBase, GeneratorBase
 from codegen_utils import GetForwardFunctionName, IsVectorTensorType
 from codegen_utils import GetInplacedFunctionName
-###########################
+#########################
-## Global Configurations ##
+# Global Configurations #
-###########################
+#########################
 skipped_forward_api_names = set([])
@@ -58,9 +58,9 @@ def FindParsingFunctionFromAttributeType(atype):
    return atype_to_parsing_function[atype]
-##########################
+########################
-## Refactored Functions ##
+# Refactored Functions #
-##########################
+########################
 PARSE_PYTHON_C_TENSORS_TEMPLATE = (
    "    auto {} = {}(\"{}\", \"{}\", args, {}, {});\n"
 )
@@ -234,9 +234,9 @@ NAMESPACE_WRAPPER_TEMPLATE = """namespace {} {{
 """
-#######################
+#####################
-## Generator Classes ##
+# Generator Classes #
-#######################
+#####################
 class PythonCSingleFunctionGenerator(FunctionGeneratorBase):
    def __init__(self, forward_api_contents, namespace):
        # Members from Parent:
@@ -565,9 +565,9 @@ class PythonCGenerator(GeneratorBase):
        self.AttachNamespace()
-############################
+##########################
-## Code Generation Helper ##
+# Code Generation Helper #
-############################
+##########################
 def ParseArguments():
    parser = argparse.ArgumentParser(
        description='Eager Code Generator Args Parser'

--- a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py
@@ -71,7 +71,7 @@ registerd_op = {  # forwards
    "equal": "EqualParser",
    "expand": "ExpandParser",
    "squeeze2": "SqueezeParser",
-    ## backwords
+    # backwords
    "matmul_grad": "MatMulGradParser",
    "mul_grad": "MulGradParser",
    "relu_grad": "ReluGradParser",
@@ -93,7 +93,7 @@ registerd_op = {  # forwards
    "gather_grad": "GatherGradParser",
    "transpose2_grad": "TransposeGradParser",
    "layer_norm_grad": "LayerNormGradParser",
-    ## opt
+    # opt
    "sgd": "SGDParser",
    # "adam": "AdamParser",
 }
@@ -445,7 +445,7 @@ class MinParser(AscendParserBase):
        return [min_out], [[0]]
-## cal
+# cal
 class LogParser(AscendParserBase):
    def __init__(self, graph, var2geop):
        super().__init__(graph, var2geop)
@@ -605,7 +605,7 @@ class ReduceSumParser(AscendParserBase):
 #        return [increment]
-## matrix cal
+# matrix cal
 class MatMulParser(AscendParserBase):
    def __init__(self, graph, var2geop):
        super().__init__(graph, var2geop)
@@ -803,7 +803,7 @@ class LayerNormParser(AscendParserBase):
        return [y, mean, variance], [[1], [2], [0]]
-## activate function
+# activate function
 class ReluParser(AscendParserBase):
    def __init__(self, graph, var2geop):
        super().__init__(graph, var2geop)
@@ -843,7 +843,7 @@ class TanhParser(AscendParserBase):
        return [tanh], [[0]]
-## loss function
+# loss function
 class SoftmaxWithCrossEntropyParser(AscendParserBase):
    def __init__(self, graph, var2geop):
        super().__init__(graph, var2geop)
@@ -932,7 +932,7 @@ class SoftMaxParser(AscendParserBase):
        return [softmax], [[0]]
-## general
+# general
 class ShapeParser(AscendParserBase):
    def __init__(self, graph, var2geop):
        super().__init__(graph, var2geop)
@@ -1038,7 +1038,7 @@ class TruncatedNormalParser(AscendParserBase):
            .set_attr_int32("seed", 0)
        )
-        ## wirte the output of truncatedNormal from startup_program to main_program
+        # wirte the output of truncatedNormal from startup_program to main_program
        if self.op.block.var(self.op.output('Out')[0]).persistable:
            # print("%s is Persistable in truncated_normal" %
            #      (self.op.output('Out')[0]))
@@ -1524,7 +1524,7 @@ class UnSqueezeParser(AscendParserBase):
        return [shape, output], [[1], [0]]
-## parallel
+# parallel
 class AllGatherParser(AscendParserBase):
    def __init__(self, graph, var2geop):
        super().__init__(graph, var2geop)
@@ -1821,7 +1821,7 @@ class SqueezeParser(AscendParserBase):
 # ***************************            *************************#
 # ***************************            *************************#
 # ****************************************************************#
-## grad
+# grad
 class ReduceSumGradParser(AscendParserBase):
    def __init__(self, graph, var2geop):
        super().__init__(graph, var2geop)

--- a/python/paddle/distributed/launch/controllers/collective.py
+++ b/python/paddle/distributed/launch/controllers/collective.py
@@ -71,7 +71,7 @@ class CollectiveController(Controller):
                "PADDLE_GLOBAL_RANK": "{}".format(i + rank_offset),
                "PADDLE_LOCAL_RANK": "{}".format(i),
                "PADDLE_NNODES": "{}".format(len(ips)),
-                ## compatible env
+                # compatible env
                "PADDLE_TRAINER_ENDPOINTS": ",".join(job_endpoints),
                "PADDLE_CURRENT_ENDPOINT": job_endpoints[i + rank_offset],
                "PADDLE_TRAINER_ID": "{}".format(i + rank_offset),
@@ -157,7 +157,7 @@ class CollectiveController(Controller):
                "PADDLE_GLOBAL_RANK": "{}".format(i + rank_offset),
                "PADDLE_LOCAL_RANK": "{}".format(i),
                "PADDLE_NNODES": "{}".format(self.job.replicas),
-                ## compatible env
+                # compatible env
                "PADDLE_TRAINER_ENDPOINTS": ",".join(job_endpoints),
                "PADDLE_CURRENT_ENDPOINT": endpoints[i],
                "PADDLE_TRAINER_ID": "{}".format(i + rank_offset),

--- a/python/paddle/distributed/passes/ps_trainer_pass.py
+++ b/python/paddle/distributed/passes/ps_trainer_pass.py
@@ -1238,7 +1238,7 @@ class SplitTrainerOpsPass(PassBase):
            attrs=attrs,
        )
-        ## TODO add check for bp block
+        # TODO add check for bp block
        # check_op_device(program.global_block(), DEFAULT_DEVICE)
    def _apply_single_impl(self, main_program, startup_program, pass_ctx):

--- a/python/paddle/distributed/ps/utils/public.py
+++ b/python/paddle/distributed/ps/utils/public.py
@@ -710,7 +710,7 @@ def find_heter_ops(program, default_device="cpu"):
            ):
                param_name = op.input(SPARSE_OP_TYPE_DICT[forward_op_type])[0]
                if param_name in var2idx:
-                    ## insert sum op & remove sum op from var2idx and origin place
+                    # insert sum op & remove sum op from var2idx and origin place
                    op_list = list(block.ops)
                    sum_op = op_list[var2idx[param_name]]
                    sum_op_inputs = {
@@ -979,7 +979,7 @@ def find_entrance_exit_private(program, program_block_ops_list):
    block_var_detail = []
    persistables = []
    for index, block_op_list in enumerate(program_block_ops_list):
-        ## forward
+        # forward
        block_input, block_output = find_ops_list_input_output(
            program, block_op_list["forward"]
        )
@@ -999,7 +999,7 @@ def find_entrance_exit_private(program, program_block_ops_list):
            }
        }
-        ## backward
+        # backward
        bp_block_input, bp_block_output = find_ops_list_input_output(
            program, block_op_list["backward"]
        )
@@ -1115,7 +1115,7 @@ def entrance_exit_check(
 def delete_block_useless_exit(
    program, program_block_ops_list, block_var_detail
 ):
-    ## forward
+    # forward
    for index in range(len(block_var_detail)):
        if index == len(block_var_detail) - 1:
            break
@@ -1128,7 +1128,7 @@ def delete_block_useless_exit(
        for var in need_delete_var:
            current_block_exit.remove(var)
-    ## backward
+    # backward
    for index in range(len(block_var_detail) - 1, -1, -1):
        if index - 1 < 0:
            break

--- a/python/paddle/fluid/tests/unittests/autograd/test_jvp_and_transpose.py
+++ b/python/paddle/fluid/tests/unittests/autograd/test_jvp_and_transpose.py
@@ -22,7 +22,7 @@ from paddle.incubate.autograd.primrules import _jvp, _transpose
 paddle.enable_static()
-############################ Test linearize rules ############################
+# --------------------- Test linearize rules ----------------------- #
 class TestAddPJVPAndTranspose(unittest.TestCase):
    def setUp(self):
        self.main_program = paddle.static.Program()

--- a/python/paddle/fluid/tests/unittests/autograd/test_orig2prim.py
+++ b/python/paddle/fluid/tests/unittests/autograd/test_orig2prim.py
@@ -22,7 +22,7 @@ from paddle.incubate.autograd.primrules import _orig2prim
 paddle.enable_static()
-############################ Test orig2prim rules ############################
+# ----------------------- Test orig2prim rules ---------------------------- #
 class TestElementWiseAddOrig2Prim(unittest.TestCase):
    def setUp(self):
        self.main_program = paddle.static.Program()

--- a/python/paddle/fluid/tests/unittests/autograd/test_prim2orig.py
+++ b/python/paddle/fluid/tests/unittests/autograd/test_prim2orig.py
@@ -22,7 +22,7 @@ from paddle.incubate.autograd.primrules import _prim2orig
 paddle.enable_static()
-############################ Test prim2orig rules ############################
+# ------------------------ Test prim2orig rules ---------------------------- #
 class TestAddPPrim2Orig(unittest.TestCase):
    def setUp(self):
        self.main_program = paddle.static.Program()

--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
@@ -789,7 +789,7 @@ class Transformer(Layer):
        )
        predict_ids = []
        parent_ids = []
-        ### initialize states of beam search ###
+        # initialize states of beam search
        log_probs = to_variable(
            np.array(
                [[0.0] + [-inf] * (beam_size - 1)] * batch_size, dtype="float32"

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_mean.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_mean.py
@@ -28,7 +28,7 @@ class TrtConvertReduceMeanTest(TrtLayerAutoScanTest):
            program_config.ops[i].attrs for i in range(len(program_config.ops))
        ]
-        ## dim should be in (-rank, rank), and not NONE
+        # dim should be in (-rank, rank), and not NONE
        rank = len(inputs['input_data'].shape)
        for x in attrs[0]["dim"]:
            if x >= rank or x <= -rank:

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py
@@ -29,7 +29,7 @@ class TrtConvertReduceSumTest(TrtLayerAutoScanTest):
            program_config.ops[i].attrs for i in range(len(program_config.ops))
        ]
-        ## dim should be in (-rank, rank), and not NONE
+        # dim should be in (-rank, rank), and not NONE
        rank = len(inputs['input_data'].shape)
        for x in attrs[0]["dim"]:
            if x >= rank or x <= -rank:

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_top_k.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_top_k.py
@@ -123,7 +123,7 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest):
            attrs, False
        ), 1e-3
-        ## for dynamic_shape
+        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        yield self.create_inference_config(), generate_trt_nodes_num(

--- a/python/paddle/fluid/tests/unittests/standalone_executor/test_standalone_custom_stream.py
+++ b/python/paddle/fluid/tests/unittests/standalone_executor/test_standalone_custom_stream.py
@@ -22,26 +22,27 @@ paddle.enable_static()
 class TestCustomStream(unittest.TestCase):
+    """
+    fill_constant(cpu)     gaussian_random
+      |     |      |              |
+      |     | matmul_v2(s1) fill_constant
+      |     |      |              |    |
+      |     |     elementwise_add(s1)  |
+      |     |           |              |
+      |  elementwise_sub(cpu)          |
+      |     |           |              |
+      |  tanh(cpu)     elementwise_add(s2)
+      |     |                  |
+    elementwise_sub(s1)      tanh(s2)
+                 |             |
+                elementwise_add(s2)
+                        |
+                  reduce_mean(s2)
+    """
    def setUp(self):
        self.steps = 3
-    ###
-    ### fill_constant(cpu)     gaussian_random
-    ###   |     |      |              |
-    ###   |     | matmul_v2(s1) fill_constant
-    ###   |     |      |              |    |
-    ###   |     |     elementwise_add(s1)  |
-    ###   |     |           |              |
-    ###   |  elementwise_sub(cpu)          |
-    ###   |     |           |              |
-    ###   |  tanh(cpu)     elementwise_add(s2)
-    ###   |     |                  |
-    ### elementwise_sub(s1)      tanh(s2)
-    ###              |             |
-    ###             elementwise_add(s2)
-    ###                     |
-    ###               reduce_mean(s2)
-    ###
    def set_custom_stream(self, prog):
        op_index_for_stream1 = [2, 4, 9]
        op_index_for_stream2 = [7, 8, 10, 11]

--- a/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py
@@ -369,7 +369,7 @@ class TestBatchNormUseGlobalStats(unittest.TestCase):
            self.places.append(fluid.CUDAPlace(0))
        self.init_test()
-    ### train mode
+    # train mode
    def init_test(self):
        self.use_global_stats = True
        self.trainable_statistics = False
@@ -400,21 +400,21 @@ class TestBatchNormUseGlobalStats(unittest.TestCase):
 class TestBatchNormUseGlobalStatsCase1(TestBatchNormUseGlobalStats):
-    ### test mode
+    # test mode
    def init_test(self):
        self.use_global_stats = False
        self.trainable_statistics = True
 class TestBatchNormUseGlobalStatsCase2(TestBatchNormUseGlobalStats):
-    ### train mode
+    # train mode
    def init_test(self):
        self.use_global_stats = False
        self.trainable_statistics = False
 class TestBatchNormUseGlobalStatsCase3(TestBatchNormUseGlobalStats):
-    ### test mode
+    # test mode
    def init_test(self):
        self.use_global_stats = True
        self.trainable_statistics = True

--- a/python/paddle/fluid/tests/unittests/test_bitwise_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bitwise_op.py
@@ -20,7 +20,7 @@ from op_test import OpTest
 paddle.enable_static()
-################## TEST OP: BitwiseAnd ##################
+# ----------------- TEST OP: BitwiseAnd ----------------- #
 class TestBitwiseAnd(OpTest):
    def setUp(self):
        self.op_type = "bitwise_and"
@@ -124,7 +124,7 @@ class TestBitwiseAndBool(TestBitwiseAnd):
        self.outputs = {'Out': out}
-################## TEST OP: BitwiseOr ##################
+# ----------------- TEST OP: BitwiseOr ------------------ #
 class TestBitwiseOr(OpTest):
    def setUp(self):
        self.op_type = "bitwise_or"
@@ -228,7 +228,7 @@ class TestBitwiseOrBool(TestBitwiseOr):
        self.outputs = {'Out': out}
-################## TEST OP: BitwiseXor ##################
+# ----------------- TEST OP: BitwiseXor ---------------- #
 class TestBitwiseXor(OpTest):
    def setUp(self):
        self.op_type = "bitwise_xor"
@@ -332,7 +332,7 @@ class TestBitwiseXorBool(TestBitwiseXor):
        self.outputs = {'Out': out}
-##################  TEST OP: BitwiseNot ##################
+# ---------------  TEST OP: BitwiseNot ----------------- #
 class TestBitwiseNot(OpTest):
    def setUp(self):
        self.op_type = "bitwise_not"

--- a/python/paddle/fluid/tests/unittests/test_fold_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fold_op.py
@@ -74,7 +74,7 @@ class TestFoldOp(OpTest):
            + 1
        )
        output = np.zeros(output_shape).astype(np.float64)
-        ############ calculate output ##############
+        # ------------- calculate output ------------- #
        for b in range(output_shape[0]):
            for c in range(self.input_channels):
                w_offset = int(c % self.kernel_sizes[1])

--- a/python/paddle/fluid/tests/unittests/test_fusion_seqpool_concat_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fusion_seqpool_concat_op.py
@@ -93,7 +93,7 @@ class TestFusionSeqPoolConcatOpCase4(TestFusionSeqPoolConcatOp):
        self.w = 3
-## test avg pool and sqrt
+# test avg pool and sqrt
 def create_test_avg_sqrt_class(parent):
    class TestSeqPoolAvgCase(parent):
        def set_pooltype(self):

--- a/python/paddle/fluid/tests/unittests/test_fusion_seqpool_cvm_concat_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fusion_seqpool_cvm_concat_op.py
@@ -100,7 +100,7 @@ class TestFusionSeqPoolCVMConcatOpCase4(TestFusionSeqPoolCVMConcatOp):
        self.w = 3
-## test avg pool and sqrt
+# test avg pool and sqrt
 def create_test_avg_sqrt_class(parent):
    class TestSeqPoolAvgCase(parent):
        def set_pooltype(self):

--- a/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py
+++ b/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py
@@ -511,7 +511,7 @@ class TestMathOpPatchesVarBase(unittest.TestCase):
        np.testing.assert_array_equal(
            x.asinh().numpy(), paddle.asinh(x).numpy()
        )
-        ### acosh(x) = nan, need to change input
+        # acosh(x) = nan, need to change input
        t_np = np.random.uniform(1, 2, [2, 3]).astype(self.dtype)
        t = paddle.to_tensor(t_np)
        np.testing.assert_array_equal(

--- a/python/paddle/fluid/tests/unittests/test_nansum_api.py
+++ b/python/paddle/fluid/tests/unittests/test_nansum_api.py
@@ -73,14 +73,14 @@ class API_Test_Nansum(unittest.TestCase):
    def test_error_api(self):
        paddle.enable_static()
-        ## input dtype error
+        # input dtype error
        def run1():
            input = fluid.data(name='input', dtype='float16', shape=[2, 3])
            output = paddle.nansum(input)
        self.assertRaises(TypeError, run1)
-        ## axis type error
+        # axis type error
        def run2():
            input = fluid.data(name='input', dtype='float16', shape=[2, 3])
            output = paddle.nansum(input, axis=1.2)

--- a/python/paddle/fluid/tests/unittests/test_rot90_op.py
+++ b/python/paddle/fluid/tests/unittests/test_rot90_op.py
@@ -241,14 +241,14 @@ class TestRot90_API(unittest.TestCase):
    def test_error_api(self):
        paddle.enable_static()
-        ## dims error
+        # dims error
        def run1():
            input = fluid.data(name='input', dtype='float32', shape=[2, 3])
            output = paddle.rot90(input, k=1, axes=[0])
        self.assertRaises(ValueError, run1)
-        ## input dims error
+        # input dims error
        def run2():
            input = fluid.data(name='input', dtype='float32', shape=[2])
            output = paddle.rot90(input, k=1, axes=[0, 1])

--- a/python/paddle/fluid/tests/unittests/test_sparse_unary_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_unary_op.py
@@ -29,7 +29,7 @@ class TestSparseUnary(unittest.TestCase):
        origin_x = paddle.rand([8, 16, 32], dtype='float32')
        mask = paddle.randint(0, 2, [8, 16, 32]).astype('float32')
-        ### check sparse coo with dense ###
+        # --- check sparse coo with dense --- #
        dense_x = origin_x * mask
        sp_x = self.to_sparse(dense_x, format)

--- a/python/paddle/fluid/tests/unittests/test_tensor_scalar_type_promotion_dynamic.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_scalar_type_promotion_dynamic.py
@@ -319,7 +319,7 @@ class TestTensorScalarTypePromotionDynamic(unittest.TestCase):
            self.func_scalar_pow_tensor()
        self.func_scalar_pow_tensor()
-    ## TODO: floordiv op kernel doesn't support float
+    # TODO: floordiv op kernel doesn't support float
    def func_tensor_floordiv_scalar(self):
        # tensor(int64) // scalar(int)
        a = paddle.full([2, 2, 2], 3, dtype='int64')

--- a/python/paddle/fluid/tests/unittests/test_tril_triu_op.py
+++ b/python/paddle/fluid/tests/unittests/test_tril_triu_op.py
@@ -95,9 +95,9 @@ def case_generator(op_type, Xshape, diagonal, expected):
    globals()[cls_name] = CLASS
-### NOTE: meaningful diagonal is [1 - min(H, W), max(H, W) -1]
+# NOTE: meaningful diagonal is [1 - min(H, W), max(H, W) -1]
-### test the diagonal just at the border, upper/lower the border,
+# test the diagonal just at the border, upper/lower the border,
-###     negative/positive integer within range and a zero
+#     negative/positive integer within range and a zero
 cases = {
    'success': {
        (2, 2, 3, 4, 5): [-100, -3, -1, 0, 2, 4, 100],  # normal shape

--- a/python/paddle/fluid/tests/unittests/test_unfold_op.py
+++ b/python/paddle/fluid/tests/unittests/test_unfold_op.py
@@ -76,7 +76,7 @@ class TestUnfoldOp(OpTest):
        )
        output_shape[2] = out_height * out_width
        output = np.zeros(output_shape).astype(np.float64)
-        ############ calculate output ##############
+        # ------------ calculate output -------------- #
        for i in range(output_shape[0]):
            for j in range(output_shape[1]):
                for k in range(output_shape[2]):

--- a/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py
@@ -355,7 +355,7 @@ class XPUTestBatchNormOp(XPUOpTestWrapper):
            self.places = [paddle.XPUPlace(0)]
            self.init_test()
-        ### train mode
+        # train mode
        def init_test(self):
            self.use_global_stats = True
            self.trainable_statistics = False
@@ -387,13 +387,13 @@ class XPUTestBatchNormOp(XPUOpTestWrapper):
                    )
    class TestBatchNormOpUseGlobalStats1(TestBatchNormOpUseGlobalStats):
-        ### test mode
+        # test mode
        def init_test(self):
            self.use_global_stats = True
            self.trainable_statistics = True
    class TestBatchNormUseGlobalStats2(TestBatchNormOpUseGlobalStats):
-        ### train mode
+        # train mode
        def init_test(self):
            self.use_global_stats = True
            self.trainable_statistics = False

--- a/python/paddle/fluid/tests/unittests/xpu/test_bitwise_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_bitwise_op_xpu.py
@@ -30,7 +30,7 @@ from xpu.get_test_cover_info import (
 paddle.enable_static()
-################## TEST OP: BitwiseAnd ##################
+# ----------------- TEST OP: BitwiseAnd -------------------- #
 class XPUTestBitwiseAnd(XPUOpTestWrapper):
    def __init__(self):
        self.op_name = 'bitwise_and'
@@ -102,7 +102,7 @@ for stype in support_types:
    create_test_class(globals(), XPUTestBitwiseAnd, stype)
-################## TEST OP: BitwiseOr ##################
+# -------------- TEST OP: BitwiseOr ----------------- #
 class XPUTestBitwiseOr(XPUOpTestWrapper):
    def __init__(self):
        self.op_name = 'bitwise_or'
@@ -174,7 +174,7 @@ for stype in support_types:
    create_test_class(globals(), XPUTestBitwiseOr, stype)
-################## TEST OP: BitwiseXor ##################
+# --------------- TEST OP: BitwiseXor ---------------- #
 class XPUTestBitwiseXor(XPUOpTestWrapper):
    def __init__(self):
        self.op_name = 'bitwise_xor'
@@ -246,7 +246,7 @@ for stype in support_types:
    create_test_class(globals(), XPUTestBitwiseXor, stype)
-##################  TEST OP: BitwiseNot ##################
+# ----------------  TEST OP: BitwiseNot ------------------ #
 class XPUTestBitwiseNot(XPUOpTestWrapper):
    def __init__(self):
        self.op_name = 'bitwise_not'

--- a/python/paddle/fluid/tests/unittests/xpu/test_depthwise_conv2d_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_depthwise_conv2d_op_xpu.py
@@ -191,7 +191,7 @@ for stype in support_types:
    create_test_class(globals(), XPUTestDepthwiseConv2DOp, stype)
    create_test_class(globals(), XPUTestDepthwiseConv2DOp_v2, stype)
-#### depthwise conv2d
+# depthwise conv2d
 # create_test_padding_SAME_class(TestDepthwiseConv_AsyPadding)
 # create_test_padding_SAME_class(TestDepthwiseConvWithDilation_AsyPadding)
@@ -203,7 +203,7 @@ for stype in support_types:
 # create_test_padding_VALID_class(TestDepthwiseConvandFuse_AsyPadding)
 # create_test_padding_VALID_class(TestDepthwiseConvWithDilationandFuse_AsyPadding)
-#### channel last
+# channel last
 # create_test_channel_last_class(TestDepthwiseConv_AsyPadding)
 # create_test_channel_last_class(TestDepthwiseConvWithDilation2_AsyPadding)

--- a/python/paddle/fluid/tests/unittests/xpu/test_logical_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_logical_op_xpu.py
@@ -30,7 +30,7 @@ from xpu.get_test_cover_info import (
 paddle.enable_static()
-################## TEST OP: logical_and ##################
+# -------------- TEST OP: logical_and ----------------- #
 class XPUTestLogicalAnd(XPUOpTestWrapper):
    def __init__(self):
        self.op_name = 'logical_and'
@@ -86,7 +86,7 @@ for stype in support_types:
    create_test_class(globals(), XPUTestLogicalAnd, stype)
-################## TEST OP: logical_or ##################
+# --------------- TEST OP: logical_or ------------------ #
 class XPUTestLogicalOr(XPUOpTestWrapper):
    def __init__(self):
        self.op_name = 'logical_or'
@@ -142,7 +142,7 @@ for stype in support_types:
    create_test_class(globals(), XPUTestLogicalOr, stype)
-################## TEST OP: logical_xor ##################
+# --------------- TEST OP: logical_xor ------------------- #
 class XPUTestLogicalXor(XPUOpTestWrapper):
    def __init__(self):
        self.op_name = 'logical_xor'
@@ -198,7 +198,7 @@ for stype in support_types:
    create_test_class(globals(), XPUTestLogicalXor, stype)
-##################  TEST OP: LogicalNot ##################
+# -------------  TEST OP: LogicalNot ---------------- #
 class XPUTestLogicalNot(XPUOpTestWrapper):
    def __init__(self):
        self.op_name = 'logical_not'

--- a/python/paddle/incubate/autograd/primrules.py
+++ b/python/paddle/incubate/autograd/primrules.py
@@ -96,7 +96,7 @@ def linear_jvp(op, *args, **kwargs):
    return out_dot
-## Register orig2prim lower rules
+# Register orig2prim lower rules
 """
 These original ops are fully supported:
@@ -334,7 +334,7 @@ def matmul_v2_orig2prim(op, x, y):
    return matmul(x, y)
-## NOTE(lml): The second output of reshape2 Xshape, which is only used in reshape2_grad, is meanlingless in new autograd mechanism, thus we use a zero tensor instead.
+# NOTE(lml): The second output of reshape2 Xshape, which is only used in reshape2_grad, is meanlingless in new autograd mechanism, thus we use a zero tensor instead.
 @REGISTER_ORIG2PRIM('reshape2')
 def reshape2_orig2prim(op, shape_t, shape_tl, x):
    assert (
@@ -611,7 +611,7 @@ def size_orig2prim(op, x):
    )
-## Register prim2orig lower rules
+# Register prim2orig lower rules
 @REGISTER_PRIM2ORIG('add_p')
 def add_prim2orig(op, x, y):
    return paddle.add(x, y)
@@ -825,7 +825,7 @@ def cast_prim2orig(op, x):
    return paddle.cast(x, paddle.dtype(op.attr('dtype')))
-## Register linearize rules
+# Register linearize rules
 @REGISTER_JVP('add_p')
 def add_jvp(op, x_dot, y_dot):
    if x_dot is None:
@@ -1207,7 +1207,7 @@ def rsqrt_jvp(op, x_dot):
    return y_dot
-## Register transpose rules
+# Register transpose rules
 @REGISTER_TRANSPOSE('add_p')

--- a/python/paddle/incubate/optimizer/functional/bfgs.py
+++ b/python/paddle/incubate/optimizer/functional/bfgs.py
@@ -132,10 +132,10 @@ def minimize_bfgs(
        return (k < max_iters) & ~done
    def body(k, done, is_converge, num_func_calls, xk, value, g1, Hk):
-        #############    compute pk    #############
+        # --------------   compute pk   -------------- #
        pk = -paddle.matmul(Hk, g1)
-        #############    compute alpha by line serach    #############
+        # --------------   compute alpha by line serach   -------------- #
        if line_search_fn == 'strong_wolfe':
            alpha, value, g2, ls_func_calls = strong_wolfe(
                f=objective_func,
@@ -152,7 +152,7 @@ def minimize_bfgs(
            )
        num_func_calls += ls_func_calls
-        #############    update Hk    #############
+        # --------------   update Hk   -------------- #
        sk = alpha * pk
        yk = g2 - g1
@@ -178,7 +178,7 @@ def minimize_bfgs(
        k += 1
-        #############    check convergence    #############
+        # --------------   check convergence   -------------- #
        gnorm = paddle.linalg.norm(g1, p=np.inf)
        pk_norm = paddle.linalg.norm(pk, p=np.inf)
        paddle.assign(

--- a/python/paddle/incubate/optimizer/functional/lbfgs.py
+++ b/python/paddle/incubate/optimizer/functional/lbfgs.py
@@ -173,7 +173,7 @@ def minimize_lbfgs(
    ):
        # use assign to cut off the relevance between g1 and q, or they will change together.
-        #############    compute p_k by two-loop recursion    #############
+        # --------------   compute p_k by two-loop recursion    -------------- #
        q = paddle.assign(g1)
        # In a array circle, the index may out of range, so must use mod.
        i = paddle.full(
@@ -208,7 +208,7 @@ def minimize_lbfgs(
        pk = -r
-        #############    compute alpha by line serach    #############
+        # --------------   compute alpha by line serach    -------------- #
        if line_search_fn == 'strong_wolfe':
            alpha, value, g2, ls_func_calls = strong_wolfe(
                f=objective_func,
@@ -225,7 +225,7 @@ def minimize_lbfgs(
            )
        paddle.assign(num_func_calls + ls_func_calls, num_func_calls)
-        #############    update sk_vec, yk_vec, rhok_vec    #############
+        # --------------   update sk_vec, yk_vec, rhok_vec    -------------- #
        sk = alpha * pk
        yk = g2 - g1
@@ -251,7 +251,7 @@ def minimize_lbfgs(
        g1 = g2
        k += 1
-        #############    check convergence    #############
+        # --------------   check convergence    -------------- #
        gnorm = paddle.linalg.norm(g1, p=np.inf)
        pk_norm = paddle.linalg.norm(pk, p=np.inf)
        paddle.assign(

--- a/python/paddle/nn/layer/norm.py
+++ b/python/paddle/nn/layer/norm.py
@@ -1177,8 +1177,8 @@ class SyncBatchNorm(_BatchNormBase):
        # variance and variance out share the same memory
        variance_out = self._variance
-        ### train mode: use mini-batch stats, eval mode: use global stats
+        # train mode: use mini-batch stats, eval mode: use global stats
-        ### use_global_stats only support False in sync_batch_norm
+        # use_global_stats only support False in sync_batch_norm
        if in_dygraph_mode():
            sync_batch_norm_out, _, _, _, _, _ = _C_ops.sync_batch_norm_(
                x,

--- a/python/paddle/profiler/profiler_statistic.py
+++ b/python/paddle/profiler/profiler_statistic.py
@@ -837,7 +837,7 @@ def _build_table(
    if views is None or SummaryView.DeviceView in views:
-        ###### Print Device Summary ######
+        # ----- Print Device Summary ----- #
        headers = ['Device', 'Utilization (%)']
        name_column_width = 30
        DEFAULT_COLUMN_WIDTH = 20
@@ -893,7 +893,7 @@ def _build_table(
            return ''.join(result)
    if views is None or SummaryView.OverView in views:
-        ###### Print Overview Summary ######
+        # ----- Print Overview Summary ----- #
        headers = ['Event Type', 'Calls', 'CPU Time', 'Ratio (%)']
        row_format_list = [""]
        header_sep_list = [""]
@@ -1028,7 +1028,7 @@ def _build_table(
    if views is None or SummaryView.ModelView in views:
-        ###### Print Model Summary Report ######
+        # ----- Print Model Summary Report ----- #
        model_perspective_items = (
            statistic_data.event_summary.model_perspective_items
        )
@@ -1153,7 +1153,7 @@ def _build_table(
    if views is None or SummaryView.DistributedView in views:
-        ###### Print Distribution Summary Report ######
+        # ----- Print Distribution Summary Report ----- #
        if statistic_data.distributed_summary.communication_range:
            headers = [
                'Name',
@@ -1233,7 +1233,7 @@ def _build_table(
    if views is None or SummaryView.OperatorView in views:
-        ###### Print Operator Summary Report ######
+        # ----- Print Operator Summary Report ----- #
        if statistic_data.event_summary.items:
            all_row_values = []
            name_column_width = 52
@@ -1526,7 +1526,7 @@ def _build_table(
    if views is None or SummaryView.KernelView in views:
-        ###### Print Kernel Summary Report ######
+        # ----- Print Kernel Summary Report ----- #
        if statistic_data.event_summary.kernel_items:
            all_row_values = []
            kernel_items = statistic_data.event_summary.kernel_items
@@ -1627,7 +1627,7 @@ def _build_table(
    if views is None or SummaryView.MemoryManipulationView in views:
-        ###### Print Memory Manipulation Summary Report ######
+        # ----- Print Memory Manipulation Summary Report ----- #
        if statistic_data.event_summary.memory_manipulation_items:
            all_row_values = []
            memory_manipulation_items = (
@@ -1713,7 +1713,7 @@ def _build_table(
    if views is None or SummaryView.UDFView in views:
-        ###### Print UserDefined Summary Report ######
+        # ----- Print UserDefined Summary Report ----- #
        if statistic_data.event_summary.userdefined_items:
            all_row_values = []
            gpu_total_time = (
@@ -1862,7 +1862,7 @@ def _build_table(
    if views is None or SummaryView.MemoryView in views:
-        ###### Print Memory Summary Report ######
+        # ----- Print Memory Summary Report ----- #
        if (
            statistic_data.memory_summary.allocated_items
            or statistic_data.memory_summary.reserved_items

--- a/python/paddle/utils/cpp_extension/extension_utils.py
+++ b/python/paddle/utils/cpp_extension/extension_utils.py
@@ -559,13 +559,13 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
        kwargs['extra_link_args'] = extra_link_args
    else:
-        ########################### Linux Platform ###########################
+        # ----------------------- Linux Platform ----------------------- #
        extra_link_args = kwargs.get('extra_link_args', [])
        # On Linux, GCC support '-l:xxx.so' to specify the library name
        # without `lib` prefix.
        if OS_NAME.startswith('linux'):
            extra_link_args.append('-l:{}'.format(_get_core_name()))
-        ########################### MacOS Platform ###########################
+        # ----------------------- MacOS Platform ----------------------- #
        else:
            # See _reset_so_rpath for details.
            extra_link_args.append('-Wl,-rpath,{}'.format(_get_fluid_path()))
@@ -573,7 +573,7 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
            # liblibpaddle.dylib symbol link.
            lib_core_name = create_sym_link_if_not_exist()
            extra_link_args.append('-l{}'.format(lib_core_name))
-        ###########################   -- END --    ###########################
+        # -----------------------   -- END --    ----------------------- #
        add_compile_flag(extra_compile_args, ['-w'])  # disable warning