diff --git a/doc/api/v2/fluid/nets.rst b/doc/api/v2/fluid/nets.rst
index 8af61977ab984a13ab8133212ec25903f045d790..500019bc507f859c4c91de5d322a82eb1e78e2de 100644
--- a/doc/api/v2/fluid/nets.rst
+++ b/doc/api/v2/fluid/nets.rst
@@ -28,6 +28,6 @@ glu
 
 scaled_dot_product_attention
 ----------------------------
-..  autofunction:: paddle.v2.fluid.nets.dot_product_attention
+..  autofunction:: paddle.v2.fluid.nets.scaled_dot_product_attention
     :noindex:
 
diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py
index 19fee8ec67b7a714ef9da9031e30616eada881c0..07c4f89da602844f52b8bf39137ef2b566d44ff4 100644
--- a/python/paddle/v2/fluid/layers/nn.py
+++ b/python/paddle/v2/fluid/layers/nn.py
@@ -2097,7 +2097,7 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
         if len(x_shape) == 1:
             x_shape = [1] + x_shape
         if len(y_shape) == 1:
-            y_shape = [1] + y_shape
+            y_shape = y_shape + [1]
 
         # check the inner 2 dimensions
         if transpose_x:
diff --git a/python/paddle/v2/fluid/nets.py b/python/paddle/v2/fluid/nets.py
index e18852c59662539f90778e74f2e11643b9a6fe8e..cb63d43709e23ae04c4d23457bbb79e6f7f0ce3c 100644
--- a/python/paddle/v2/fluid/nets.py
+++ b/python/paddle/v2/fluid/nets.py
@@ -306,7 +306,7 @@ def scaled_dot_product_attention(queries,
                     [bs, max_sequence_length, num_heads * hidden_dim].
         """
 
-        if len(x.shape) == 3: return
+        if len(x.shape) == 3: return x
         if len(x.shape) != 4:
             raise ValueError("Input(x) should be a 4-D Tensor.")