diff --git a/ppdet/modeling/layers.py b/ppdet/modeling/layers.py
index fd31812650c84b2a9fb9a14fa9c446ba1a11cb58..713880b9a4c3051898be6854bb077416e8c946dc 100644
--- a/ppdet/modeling/layers.py
+++ b/ppdet/modeling/layers.py
@@ -31,8 +31,6 @@ from . import ops
 from .initializer import xavier_uniform_, constant_
 
 from paddle.vision.ops import DeformConv2D
-from paddle.nn.layer import transformer
-_convert_attention_mask = transformer._convert_attention_mask
 
 
 def _to_list(l):
@@ -1195,6 +1193,27 @@ class Concat(nn.Layer):
         return 'dim={}'.format(self.dim)
 
 
+def _convert_attention_mask(attn_mask, dtype):
+    """
+    Convert the attention mask to the target dtype we expect.
+    Parameters:
+        attn_mask (Tensor, optional): A tensor used in multi-head attention
+                to prevents attention to some unwanted positions, usually the
+                paddings or the subsequent positions. It is a tensor with shape
+                broadcasted to `[batch_size, n_head, sequence_length, sequence_length]`.
+                When the data type is bool, the unwanted positions have `False` 
+                values and the others have `True` values. When the data type is 
+                int, the unwanted positions have 0 values and the others have 1 
+                values. When the data type is float, the unwanted positions have 
+                `-INF` values and the others have 0 values. It can be None when 
+                nothing wanted or needed to be prevented attention to. Default None.
+        dtype (VarType): The target type of `attn_mask` we expect.
+    Returns:
+        Tensor: A Tensor with shape same as input `attn_mask`, with data type `dtype`.
+    """
+    return nn.layer.transformer._convert_attention_mask(attn_mask, dtype)
+
+
 class MultiHeadAttention(nn.Layer):
     """
     Attention mapps queries and a set of key-value pairs to outputs, and
diff --git a/ppdet/modeling/transformers/detr_transformer.py b/ppdet/modeling/transformers/detr_transformer.py
index 92d79d53c7c8b6edc273e461faa3d211b6c6fcbe..9069ee8c4edb401be3b03c53a5e0369f7bcc4489 100644
--- a/ppdet/modeling/transformers/detr_transformer.py
+++ b/ppdet/modeling/transformers/detr_transformer.py
@@ -18,11 +18,10 @@ from __future__ import print_function
 
 import paddle
 import paddle.nn as nn
-from paddle.nn.layer.transformer import _convert_attention_mask
 import paddle.nn.functional as F
 
 from ppdet.core.workspace import register
-from ..layers import MultiHeadAttention
+from ..layers import MultiHeadAttention, _convert_attention_mask
 from .position_encoding import PositionEmbedding
 from .utils import *
 from ..initializer import *