diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc index bb7ff39f1318331c33a2401c4b29e9a5da3bc12c..e8a9b38c9c4a717b3728a78f4e0e2e274ca24f79 100644 --- a/paddle/fluid/operators/matmul_op.cc +++ b/paddle/fluid/operators/matmul_op.cc @@ -313,6 +313,13 @@ class MatMulOp : public framework::OperatorWithKernel { math::CreateMatrixDescriptor(ColumnMatrixFromVector(dim_y), 0, context->Attrs().Get("transpose_Y")); + if (mat_dim_x.width_ == -1) { + mat_dim_x.width_ = mat_dim_y.height_; + } + if (mat_dim_y.height_ == -1) { + mat_dim_y.height_ = mat_dim_x.width_; + } + if (context->IsRuntime()) { PADDLE_ENFORCE( mat_dim_x.batch_size_ == mat_dim_y.batch_size_ || @@ -323,20 +330,21 @@ class MatMulOp : public framework::OperatorWithKernel { DumpMatrixShape(mat_dim_x).c_str(), DumpMatrixShape(mat_dim_y).c_str()); } - std::vector dim_out; int64_t dim_out_y = mat_dim_y.width_; #if defined(PADDLE_WITH_MKLML) && !defined(PADDLE_WITH_CUDA) int head_number = context->Attrs().Get("head_number"); bool split_vertical_y = (mat_dim_x.width_ != mat_dim_y.height_); - PADDLE_ENFORCE_LE( - head_number, mat_dim_x.width_, - "ShapeError: Unsatisfied mkl acceleration library requirements: " - "The number of heads " - "(%d) must be equal to X's width. But received X's shape: %s.", - head_number, DumpMatrixShape(mat_dim_x).c_str()); - - if (!split_vertical_y && head_number > 0) { - dim_out_y = head_number * mat_dim_y.width_; + if (context->IsRuntime()) { + PADDLE_ENFORCE_LE( + head_number, mat_dim_x.width_, + "ShapeError: Unsatisfied mkl acceleration library requirements: " + "The number of heads " + "(%d) must be equal to X's width. But received X's shape: %s.", + head_number, DumpMatrixShape(mat_dim_x).c_str()); + + if (!split_vertical_y && head_number > 0) { + dim_out_y = head_number * mat_dim_y.width_; + } } #else PADDLE_ENFORCE_EQ( @@ -347,6 +355,7 @@ class MatMulOp : public framework::OperatorWithKernel { DumpMatrixShape(mat_dim_x).c_str(), DumpMatrixShape(mat_dim_y).c_str()); #endif + std::vector dim_out; if (mat_dim_x.batch_size_ != 0) { dim_out = framework::vectorize(dim_x); dim_out[dim_out.size() - 2] = mat_dim_x.height_; diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 9405e9eec911ee06aaeb00aa253d7b7ed4055480..48d04101fbd6dd868fe6c6d4d31ed286c327c0ca 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -6941,11 +6941,11 @@ def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None): if transpose_y: y_shape[-2], y_shape[-1] = y_shape[-1], y_shape[-2] if x_shape[-1] != y_shape[-2]: - raise ValueError( - "After performing an optional transpose, Input X's width should be " - "equal to Y's width for multiplication " - "prerequisites. But received X's shape: %s, Y's shape: %s\n" % - (x_shape, y_shape)) + assert (x_shape[-1] == -1) or (y_shape[-2] == -1), \ + "After performing an optional transpose, Input X's width should be " \ + "equal to Y's width for multiplication " \ + "prerequisites. But received X's shape: %s, Y's shape: %s\n" % \ + (x_shape, y_shape) if len(y_shape) > 2 and len(x_shape) > 2: for i, dim_x in enumerate(x_shape[:-2]): diff --git a/python/paddle/fluid/tests/unittests/test_matmul_op.py b/python/paddle/fluid/tests/unittests/test_matmul_op.py index 7083c35462d710ec424428125517e0b40f0a2948..02a5caf0e30ad17ff76fda760f62e0f502e46d21 100644 --- a/python/paddle/fluid/tests/unittests/test_matmul_op.py +++ b/python/paddle/fluid/tests/unittests/test_matmul_op.py @@ -129,7 +129,60 @@ class TestMatmulOpError(OpTest): fluid.layers.matmul(input3, input3) -# Generate test cases for all possibilities +# Negative dimension generation +def generate_negative_dims(in_shape): + from itertools import combinations + size = len(in_shape) + indexs = list() + shapes = list() + for i in range(size): + indexs.extend(list(combinations([j for j in range(size)], i + 1))) + for idx in indexs: + shapes.append( + [in_shape[i] if i not in idx else -1 for i in range(size)]) + return shapes + + +# Build program with inputs sizes that contain negative numbers +def test_negative_dims_program(obj): + for shape_x in generate_negative_dims(obj.shape_X): + for shape_y in generate_negative_dims(obj.shape_Y): + X = np.random.random(obj.shape_X).astype("float32") + Y = np.random.random(obj.shape_Y).astype("float32") + Ref = reference_matmul(X, Y, obj.transpose_X, obj.transpose_Y) + with program_guard(Program(), Program()): + x = fluid.data(name='x', shape=shape_x, dtype='float32') + y = fluid.data(name='y', shape=shape_y, dtype='float32') + output = fluid.layers.matmul(x, y, obj.transpose_X, + obj.transpose_Y) + obj.assertEqual(len(Ref.shape), len(output.shape)) + for idx in range(len(Ref.shape)): + if output.shape[idx] != -1: + obj.assertEqual(Ref.shape[idx], output.shape[idx]) + exe = fluid.Executor(fluid.CPUPlace()) + res, = exe.run(fluid.default_main_program(), + feed={'x': X, + 'y': Y}, + fetch_list=[output]) + np.allclose(res, Ref, atol=1e-5) + + +# Generate program api cases for all negative possibilities +def api_test(dim_x, dim_y, trans_x, trans_y): + test_name = ('TestMatMulAPI_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format( + dim_x, dim_y, trans_x, trans_y)) + shape_x, shape_y = generate_compatible_shapes(dim_x, dim_y, trans_x, + trans_y) + globals()[test_name] = type(test_name, (OpTest, ), { + 'shape_X': shape_x, + 'shape_Y': shape_y, + 'transpose_X': trans_x, + 'transpose_Y': trans_y, + 'test_propram': test_negative_dims_program, + }) + + +# Generate operators cases for all possibilities def inject_test(dim_x, dim_y, trans_x, trans_y): test_name = ('TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format( dim_x, dim_y, trans_x, trans_y)) @@ -148,6 +201,7 @@ for dim_X in (1, 2, 3): for transose_x in (False, True): for transose_y in (False, True): inject_test(dim_X, dim_Y, transose_x, transose_y) + api_test(dim_X, dim_Y, transose_x, transose_y) # Test case n-dim