update the infer shape of matmul, test=develop (#20717)

* update the infer shape of matmul, test=release/1.6 * add unittests of matmul, test=release/1.6 * change func names, test=develop

update the infer shape of matmul, test=develop (#20717)
* update the infer shape of matmul, test=release/1.6 * add unittests of matmul, test=release/1.6 * change func names, test=develop
37cd4354 · 石晓伟 · GitHub · e742760f · 37cd4354 · 37cd4354
3 changed file
--- a/paddle/fluid/operators/matmul_op.cc
+++ b/paddle/fluid/operators/matmul_op.cc
@@ -313,6 +313,13 @@ class MatMulOp : public framework::OperatorWithKernel {
        math::CreateMatrixDescriptor(ColumnMatrixFromVector(dim_y), 0,
                                     context->Attrs().Get<bool>("transpose_Y"));

+    if (mat_dim_x.width_ == -1) {
+      mat_dim_x.width_ = mat_dim_y.height_;
+    }
+    if (mat_dim_y.height_ == -1) {
+      mat_dim_y.height_ = mat_dim_x.width_;
+    }
+
    if (context->IsRuntime()) {
      PADDLE_ENFORCE(
          mat_dim_x.batch_size_ == mat_dim_y.batch_size_ ||
@@ -323,20 +330,21 @@ class MatMulOp : public framework::OperatorWithKernel {
          DumpMatrixShape(mat_dim_x).c_str(),
          DumpMatrixShape(mat_dim_y).c_str());
    }
-    std::vector<int64_t> dim_out;
    int64_t dim_out_y = mat_dim_y.width_;
 #if defined(PADDLE_WITH_MKLML) && !defined(PADDLE_WITH_CUDA)
    int head_number = context->Attrs().Get<int>("head_number");
    bool split_vertical_y = (mat_dim_x.width_ != mat_dim_y.height_);
-    PADDLE_ENFORCE_LE(
-        head_number, mat_dim_x.width_,
-        "ShapeError: Unsatisfied mkl acceleration library requirements: "
-        "The number of heads "
-        "(%d) must be equal to X's width. But received X's shape: %s.",
-        head_number, DumpMatrixShape(mat_dim_x).c_str());
-
-    if (!split_vertical_y && head_number > 0) {
-      dim_out_y = head_number * mat_dim_y.width_;
+    if (context->IsRuntime()) {
+      PADDLE_ENFORCE_LE(
+          head_number, mat_dim_x.width_,
+          "ShapeError: Unsatisfied mkl acceleration library requirements: "
+          "The number of heads "
+          "(%d) must be equal to X's width. But received X's shape: %s.",
+          head_number, DumpMatrixShape(mat_dim_x).c_str());
+
+      if (!split_vertical_y && head_number > 0) {
+        dim_out_y = head_number * mat_dim_y.width_;
+      }
    }
 #else
    PADDLE_ENFORCE_EQ(
@@ -347,6 +355,7 @@ class MatMulOp : public framework::OperatorWithKernel {
        DumpMatrixShape(mat_dim_x).c_str(), DumpMatrixShape(mat_dim_y).c_str());
 #endif

+    std::vector<int64_t> dim_out;
    if (mat_dim_x.batch_size_ != 0) {
      dim_out = framework::vectorize(dim_x);
      dim_out[dim_out.size() - 2] = mat_dim_x.height_;

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -6914,11 +6914,11 @@ def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None):
        if transpose_y:
            y_shape[-2], y_shape[-1] = y_shape[-1], y_shape[-2]
        if x_shape[-1] != y_shape[-2]:
-            raise ValueError(
-                "After performing an optional transpose, Input X's width should be "
-                "equal to Y's width for multiplication "
-                "prerequisites. But received X's shape: %s, Y's shape: %s\n" %
-                (x_shape, y_shape))
+            assert (x_shape[-1] == -1) or (y_shape[-2] == -1),                         \
+                "After performing an optional transpose, Input X's width should be "   \
+                "equal to Y's width for multiplication "                               \
+                "prerequisites. But received X's shape: %s, Y's shape: %s\n" %         \
+                (x_shape, y_shape)

        if len(y_shape) > 2 and len(x_shape) > 2:
            for i, dim_x in enumerate(x_shape[:-2]):

--- a/python/paddle/fluid/tests/unittests/test_matmul_op.py
+++ b/python/paddle/fluid/tests/unittests/test_matmul_op.py
@@ -129,7 +129,60 @@ class TestMatmulOpError(OpTest):
            fluid.layers.matmul(input3, input3)


-# Generate test cases for all possibilities
+# Negative dimension generation
+def generate_negative_dims(in_shape):
+    from itertools import combinations
+    size = len(in_shape)
+    indexs = list()
+    shapes = list()
+    for i in range(size):
+        indexs.extend(list(combinations([j for j in range(size)], i + 1)))
+    for idx in indexs:
+        shapes.append(
+            [in_shape[i] if i not in idx else -1 for i in range(size)])
+    return shapes
+
+
+# Build program with inputs sizes that contain negative numbers
+def test_negative_dims_program(obj):
+    for shape_x in generate_negative_dims(obj.shape_X):
+        for shape_y in generate_negative_dims(obj.shape_Y):
+            X = np.random.random(obj.shape_X).astype("float32")
+            Y = np.random.random(obj.shape_Y).astype("float32")
+            Ref = reference_matmul(X, Y, obj.transpose_X, obj.transpose_Y)
+            with program_guard(Program(), Program()):
+                x = fluid.data(name='x', shape=shape_x, dtype='float32')
+                y = fluid.data(name='y', shape=shape_y, dtype='float32')
+                output = fluid.layers.matmul(x, y, obj.transpose_X,
+                                             obj.transpose_Y)
+                obj.assertEqual(len(Ref.shape), len(output.shape))
+                for idx in range(len(Ref.shape)):
+                    if output.shape[idx] != -1:
+                        obj.assertEqual(Ref.shape[idx], output.shape[idx])
+                exe = fluid.Executor(fluid.CPUPlace())
+                res, = exe.run(fluid.default_main_program(),
+                               feed={'x': X,
+                                     'y': Y},
+                               fetch_list=[output])
+                np.allclose(res, Ref, atol=1e-5)
+
+
+# Generate program api cases for all negative possibilities
+def api_test(dim_x, dim_y, trans_x, trans_y):
+    test_name = ('TestMatMulAPI_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format(
+        dim_x, dim_y, trans_x, trans_y))
+    shape_x, shape_y = generate_compatible_shapes(dim_x, dim_y, trans_x,
+                                                  trans_y)
+    globals()[test_name] = type(test_name, (OpTest, ), {
+        'shape_X': shape_x,
+        'shape_Y': shape_y,
+        'transpose_X': trans_x,
+        'transpose_Y': trans_y,
+        'test_propram': test_negative_dims_program,
+    })
+
+
+# Generate operators cases for all possibilities
 def inject_test(dim_x, dim_y, trans_x, trans_y):
    test_name = ('TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format(
        dim_x, dim_y, trans_x, trans_y))
@@ -148,6 +201,7 @@ for dim_X in (1, 2, 3):
        for transose_x in (False, True):
            for transose_y in (False, True):
                inject_test(dim_X, dim_Y, transose_x, transose_y)
+                api_test(dim_X, dim_Y, transose_x, transose_y)


 # Test case n-dim