diff --git a/python/paddle/incubate/operators/softmax_mask_fuse.py b/python/paddle/incubate/operators/softmax_mask_fuse.py
index bbc4175b0d1c444b16950c3cae2fa9c0484546cb..4c95a1ab512885459e3f399027a3edf8fe58d9ff 100644
--- a/python/paddle/incubate/operators/softmax_mask_fuse.py
+++ b/python/paddle/incubate/operators/softmax_mask_fuse.py
@@ -20,6 +20,43 @@ from paddle.fluid import core
 
 
 def softmax_mask_fuse(x, mask, name=None):
+    """
+    Do a masked softmax on x.
+
+    This is designed for speeding up Transformer structure.
+    Used for reducing operation such as: tmp = x + mask, out = softmax(tmp).
+    The equation is:
+
+    .. math::
+        out = softmax(x + mask)
+
+    **Note**:
+        This API only supports GPU.
+
+    Args:
+        x (4-D Tensor): The input tensor, should be in 4D shape, it's data type should be float16, float32.
+                        The fourth dimension of x must be larger or equal to 32 and less then 8192.
+        mask (4-D Tensor): The input tensor, should be in 4D shape, it's data type should be float16, float32.
+                           The second dimension of mask must be 1, and other dimensions must be same with x.
+        name (str, optional): Name for the operation (optional, default is None).
+                              For more information, please refer to :ref:`api_guide_Name`.
+
+    Returns:
+        4-D Tensor. A location into which the result is stored. It’s dimension is 4D. Has same shape with x.
+
+    Examples:
+        .. code-block:: python
+
+            # required: gpu
+            import paddle
+            import paddle.incubate as incubate
+
+            x = paddle.rand([2, 8, 8, 32])
+            mask = paddle.rand([2, 1, 8, 32])
+
+            rst = incubate.softmax_mask_fuse(x, mask)
+            # [[[[0.02404429, 0.04658398, 0.02746007, ..., 0.01489375, 0.02397441, 0.02851614] ... ]]]
+    """
     if in_dygraph_mode():
         out = core.ops.fused_softmax_mask(x, mask)
         return out
diff --git a/python/paddle/incubate/operators/softmax_mask_fuse_upper_triangle.py b/python/paddle/incubate/operators/softmax_mask_fuse_upper_triangle.py
index 636d0f5f9dd3b637e63963ee328aeb4924c44a9c..918adf8c21a1cd723463c4fba2e325a993720dc0 100644
--- a/python/paddle/incubate/operators/softmax_mask_fuse_upper_triangle.py
+++ b/python/paddle/incubate/operators/softmax_mask_fuse_upper_triangle.py
@@ -21,11 +21,41 @@ from paddle.fluid import core
 
 def softmax_mask_fuse_upper_triangle(x):
     """
-    Fuse softmax mask together without even give a mask.
-    Under GPT model, the mask is always be a upper triangle
-    so we can simply mask the upper triangle part of x to get the mask result
-    :param x: the input x (rst of QK)
-    :return: the result of softmax mask fuse (upper triangle)
+    Do a masked softmax on x, which will always mask upper triangle part of x.
+
+    This is designed for speeding up GPT kind Transformer structure.
+    Used for reducing operation such as: tmp = x + mask, out = softmax(tmp), where the mask is
+    always be an upper triangle matrix.
+    The equation is:
+
+    .. math::
+        out = softmax(LowerTriangular(x))
+
+    **Note**:
+        This API only supports GPU.
+
+    Args:
+        x (4-D Tensor): The input tensor, should be in 4D shape, it's data type should be float16, float32
+                        The fourth dimension of x must be larger or equal to 32 and less then 8192.
+                        The third dimension of x must be same with the fourth dimension of x.
+
+    Returns:
+        4-D Tensor. A location into which the result is stored. It’s dimension is 4D. Has same dimension with x.
+
+    Examples:
+        .. code-block:: python
+
+            # required: gpu
+            import paddle
+            import paddle.incubate as incubate
+
+            x = paddle.rand((1, 1, 32, 32))
+
+            rst = incubate.softmax_mask_fuse_upper_triangle(x)
+            # [[[[1.        , 0.        , 0.        , ..., 0., 0., 0.],
+            #    [0.45324376, 0.54675621, 0.        , ..., 0., 0., 0.],
+            #    [0.32674268, 0.28156221, 0.39169508, ..., 0., 0., 0.]
+            #     ... ]]]
     """
     if in_dygraph_mode():
         out = core.ops.fused_softmax_mask_upper_triangle(x)