diff --git a/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc b/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
index 95dc32d20b094f4b029d60150a8fc2096afe74db..070f6a51e3023758742a8b0f77a74d5296e02c09 100644
--- a/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
@@ -32,8 +32,8 @@ PDNode* BuildSeqExpandConcatPattern(PDPattern* pattern) {
 
   // The following variables will be treat as inputs:
   // concat mid input, 0th input for fused op
-  // sequence_expand input, 1th input for fused op
-  // sequence_expand input, 2th input for fused op
+  // sequence_expand input, 1st input for fused op
+  // sequence_expand input, 2nd input for fused op
 
   // The following variables will be treat as outputs:
   // concat output
diff --git a/python/paddle/distributed/utils/moe_utils.py b/python/paddle/distributed/utils/moe_utils.py
index ae18938941817083ddc9c32089086d176b8493f9..fa003c1a00d7cabd9137e668437f0ecce20a4892 100644
--- a/python/paddle/distributed/utils/moe_utils.py
+++ b/python/paddle/distributed/utils/moe_utils.py
@@ -35,19 +35,19 @@ def global_scatter(
 
     local_count[0] represents taking out 2 batches from x and sending 2 batches to the 0th expert of the 0th card;
 
-    local_count[1] represents taking out 0 batches from x and sending 0 batches to the 1th expert of the 0th card;
+    local_count[1] represents taking out 0 batches from x and sending 0 batches to the 1st expert of the 0th card;
 
-    local_count[2] represents taking out 2 batches from x and sending 2 batches to the 0th expert of the 1th card;
+    local_count[2] represents taking out 2 batches from x and sending 2 batches to the 0th expert of the 1st card;
 
-    local_count[3] represents taking out 0 batches from x and sending 0 batches to the 1th expert of the 1th card;
+    local_count[3] represents taking out 0 batches from x and sending 0 batches to the 1st expert of the 1st card;
 
     Therefore, the global_count[0] of the 0th card is equal to 2, which means that 2 batches of data are received from the 0th card to the 0th expert;
 
-    the global_count[1] of the 0th card is equal to 0, which means that 0 batches of data are received from the 0th card to the 1th expert;
+    the global_count[1] of the 0th card is equal to 0, which means that 0 batches of data are received from the 0th card to the 1st expert;
 
-    the global_count[0] of the 1th card is equal to 2, which means that 2 batches of data are received from the 0th card to the 0th expert;
+    the global_count[0] of the 1st card is equal to 2, which means that 2 batches of data are received from the 0th card to the 0th expert;
 
-    the global_count[1] of the 1th card is equal to 0, which means that 0 batches of data are received from the 0th card to the 1th expert.
+    the global_count[1] of the 1st card is equal to 0, which means that 0 batches of data are received from the 0th card to the 1st expert.
 
     .. image:: https://githubraw.cdn.bcebos.com/PaddlePaddle/docs/develop/docs/api/paddle/distributed/img/global_scatter_gather.png
         :width: 800
@@ -160,11 +160,11 @@ def global_gather(
 
     The global_count[0] of the 0th card represents sending 2 data to the 0th expert of the 0th card;
 
-    The global_count[1] of the 0th card represents sending 0 data to the 1th expert of the 0th card;
+    The global_count[1] of the 0th card represents sending 0 data to the 1st expert of the 0th card;
 
-    The global_count[0] of the 1th card represents sending 2 data to the 0th expert of the 0th card;
+    The global_count[0] of the 1st card represents sending 2 data to the 0th expert of the 0th card;
 
-    The global_count[1] of the 1th card represents sending 0 data to the 1th expert of the 0th card.
+    The global_count[1] of the 1st card represents sending 0 data to the 1st expert of the 0th card.
 
     .. image:: https://githubraw.cdn.bcebos.com/PaddlePaddle/docs/develop/docs/api/paddle/distributed/img/global_scatter_gather.png
         :width: 800
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py
index 3504af44e0009babafca7e86c866c08bc2d239ff..f4d56d7e41ba4dc4358a4c1ea0a8aed8e8d3f945 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py
@@ -342,12 +342,12 @@ class TestSparseConvert(unittest.TestCase):
         verify(dense_x)
 
         dense_x = paddle.randn(shape)
-        # set the 1th batch to zero
+        # set the 1st batch to zero
         dense_x[1] = 0
         verify(dense_x)
 
         dense_x = paddle.randn(shape)
-        # set the 2th batch to zero
+        # set the 2nd batch to zero
         dense_x[2] = 0
         verify(dense_x)
 
diff --git a/tools/CrossStackProfiler/DCGMFileReader.py b/tools/CrossStackProfiler/DCGMFileReader.py
index bb248e05868ccbd20b848bbfb16c2867e3bba6db..f462ce5c9ad5eac95b883c6df593cd3b17c17ceb 100755
--- a/tools/CrossStackProfiler/DCGMFileReader.py
+++ b/tools/CrossStackProfiler/DCGMFileReader.py
@@ -136,7 +136,7 @@ class dcgmFileReader(FileReader):
                 ) and not line.strip().startswith("# Entity"):
                     continue
 
-                # skip non-needed headers (only the header in 1th line was needed)
+                # skip non-needed headers (only the header in 1st line was needed)
                 if line.strip().startswith("# Entity"):
                     line = line.strip()[2:]