diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc
index bb7ff39f1318331c33a2401c4b29e9a5da3bc12c..e8a9b38c9c4a717b3728a78f4e0e2e274ca24f79 100644
--- a/paddle/fluid/operators/matmul_op.cc
+++ b/paddle/fluid/operators/matmul_op.cc
@@ -313,6 +313,13 @@ class MatMulOp : public framework::OperatorWithKernel {
         math::CreateMatrixDescriptor(ColumnMatrixFromVector(dim_y), 0,
                                      context->Attrs().Get<bool>("transpose_Y"));
 
+    if (mat_dim_x.width_ == -1) {
+      mat_dim_x.width_ = mat_dim_y.height_;
+    }
+    if (mat_dim_y.height_ == -1) {
+      mat_dim_y.height_ = mat_dim_x.width_;
+    }
+
     if (context->IsRuntime()) {
       PADDLE_ENFORCE(
           mat_dim_x.batch_size_ == mat_dim_y.batch_size_ ||
@@ -323,20 +330,21 @@ class MatMulOp : public framework::OperatorWithKernel {
           DumpMatrixShape(mat_dim_x).c_str(),
           DumpMatrixShape(mat_dim_y).c_str());
     }
-    std::vector<int64_t> dim_out;
     int64_t dim_out_y = mat_dim_y.width_;
 #if defined(PADDLE_WITH_MKLML) && !defined(PADDLE_WITH_CUDA)
     int head_number = context->Attrs().Get<int>("head_number");
     bool split_vertical_y = (mat_dim_x.width_ != mat_dim_y.height_);
-    PADDLE_ENFORCE_LE(
-        head_number, mat_dim_x.width_,
-        "ShapeError: Unsatisfied mkl acceleration library requirements: "
-        "The number of heads "
-        "(%d) must be equal to X's width. But received X's shape: %s.",
-        head_number, DumpMatrixShape(mat_dim_x).c_str());
-
-    if (!split_vertical_y && head_number > 0) {
-      dim_out_y = head_number * mat_dim_y.width_;
+    if (context->IsRuntime()) {
+      PADDLE_ENFORCE_LE(
+          head_number, mat_dim_x.width_,
+          "ShapeError: Unsatisfied mkl acceleration library requirements: "
+          "The number of heads "
+          "(%d) must be equal to X's width. But received X's shape: %s.",
+          head_number, DumpMatrixShape(mat_dim_x).c_str());
+
+      if (!split_vertical_y && head_number > 0) {
+        dim_out_y = head_number * mat_dim_y.width_;
+      }
     }
 #else
     PADDLE_ENFORCE_EQ(
@@ -347,6 +355,7 @@ class MatMulOp : public framework::OperatorWithKernel {
         DumpMatrixShape(mat_dim_x).c_str(), DumpMatrixShape(mat_dim_y).c_str());
 #endif
 
+    std::vector<int64_t> dim_out;
     if (mat_dim_x.batch_size_ != 0) {
       dim_out = framework::vectorize(dim_x);
       dim_out[dim_out.size() - 2] = mat_dim_x.height_;
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 2d12c7f3d490e2401c268537e33b339d2c49ba0a..31e4d2567efb0055da628bc2657ac6bcb0284565 100755
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -6914,11 +6914,11 @@ def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None):
         if transpose_y:
             y_shape[-2], y_shape[-1] = y_shape[-1], y_shape[-2]
         if x_shape[-1] != y_shape[-2]:
-            raise ValueError(
-                "After performing an optional transpose, Input X's width should be "
-                "equal to Y's width for multiplication "
-                "prerequisites. But received X's shape: %s, Y's shape: %s\n" %
-                (x_shape, y_shape))
+            assert (x_shape[-1] == -1) or (y_shape[-2] == -1),                         \
+                "After performing an optional transpose, Input X's width should be "   \
+                "equal to Y's width for multiplication "                               \
+                "prerequisites. But received X's shape: %s, Y's shape: %s\n" %         \
+                (x_shape, y_shape)
 
         if len(y_shape) > 2 and len(x_shape) > 2:
             for i, dim_x in enumerate(x_shape[:-2]):
diff --git a/python/paddle/fluid/tests/unittests/test_matmul_op.py b/python/paddle/fluid/tests/unittests/test_matmul_op.py
index 7083c35462d710ec424428125517e0b40f0a2948..02a5caf0e30ad17ff76fda760f62e0f502e46d21 100644
--- a/python/paddle/fluid/tests/unittests/test_matmul_op.py
+++ b/python/paddle/fluid/tests/unittests/test_matmul_op.py
@@ -129,7 +129,60 @@ class TestMatmulOpError(OpTest):
             fluid.layers.matmul(input3, input3)
 
 
-# Generate test cases for all possibilities
+# Negative dimension generation
+def generate_negative_dims(in_shape):
+    from itertools import combinations
+    size = len(in_shape)
+    indexs = list()
+    shapes = list()
+    for i in range(size):
+        indexs.extend(list(combinations([j for j in range(size)], i + 1)))
+    for idx in indexs:
+        shapes.append(
+            [in_shape[i] if i not in idx else -1 for i in range(size)])
+    return shapes
+
+
+# Build program with inputs sizes that contain negative numbers
+def test_negative_dims_program(obj):
+    for shape_x in generate_negative_dims(obj.shape_X):
+        for shape_y in generate_negative_dims(obj.shape_Y):
+            X = np.random.random(obj.shape_X).astype("float32")
+            Y = np.random.random(obj.shape_Y).astype("float32")
+            Ref = reference_matmul(X, Y, obj.transpose_X, obj.transpose_Y)
+            with program_guard(Program(), Program()):
+                x = fluid.data(name='x', shape=shape_x, dtype='float32')
+                y = fluid.data(name='y', shape=shape_y, dtype='float32')
+                output = fluid.layers.matmul(x, y, obj.transpose_X,
+                                             obj.transpose_Y)
+                obj.assertEqual(len(Ref.shape), len(output.shape))
+                for idx in range(len(Ref.shape)):
+                    if output.shape[idx] != -1:
+                        obj.assertEqual(Ref.shape[idx], output.shape[idx])
+                exe = fluid.Executor(fluid.CPUPlace())
+                res, = exe.run(fluid.default_main_program(),
+                               feed={'x': X,
+                                     'y': Y},
+                               fetch_list=[output])
+                np.allclose(res, Ref, atol=1e-5)
+
+
+# Generate program api cases for all negative possibilities
+def api_test(dim_x, dim_y, trans_x, trans_y):
+    test_name = ('TestMatMulAPI_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format(
+        dim_x, dim_y, trans_x, trans_y))
+    shape_x, shape_y = generate_compatible_shapes(dim_x, dim_y, trans_x,
+                                                  trans_y)
+    globals()[test_name] = type(test_name, (OpTest, ), {
+        'shape_X': shape_x,
+        'shape_Y': shape_y,
+        'transpose_X': trans_x,
+        'transpose_Y': trans_y,
+        'test_propram': test_negative_dims_program,
+    })
+
+
+# Generate operators cases for all possibilities
 def inject_test(dim_x, dim_y, trans_x, trans_y):
     test_name = ('TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format(
         dim_x, dim_y, trans_x, trans_y))
@@ -148,6 +201,7 @@ for dim_X in (1, 2, 3):
         for transose_x in (False, True):
             for transose_y in (False, True):
                 inject_test(dim_X, dim_Y, transose_x, transose_y)
+                api_test(dim_X, dim_Y, transose_x, transose_y)
 
 
 # Test case n-dim