diff --git a/ppocr/modeling/heads/self_attention.py b/ppocr/modeling/heads/self_attention.py
index 51d5198f558dcb7e0351f04b3a884b71707104d4..6c27fdbe434166e9277cc8d695bce2743cbd8ec6 100644
--- a/ppocr/modeling/heads/self_attention.py
+++ b/ppocr/modeling/heads/self_attention.py
@@ -285,8 +285,7 @@ class PrePostProcessLayer(nn.Layer):
             elif cmd == "n":  # add layer normalization
                 self.functors.append(
                     self.add_sublayer(
-                        "layer_norm_%d" % len(
-                            self.sublayers(include_sublayers=False)),
+                        "layer_norm_%d" % len(self.sublayers()),
                         paddle.nn.LayerNorm(
                             normalized_shape=d_model,
                             weight_attr=fluid.ParamAttr(
@@ -320,9 +319,7 @@ class PrepareEncoder(nn.Layer):
         self.src_emb_dim = src_emb_dim
         self.src_max_len = src_max_len
         self.emb = paddle.nn.Embedding(
-            num_embeddings=self.src_max_len,
-            embedding_dim=self.src_emb_dim,
-            sparse=True)
+            num_embeddings=self.src_max_len, embedding_dim=self.src_emb_dim)
         self.dropout_rate = dropout_rate
 
     def forward(self, src_word, src_pos):