未验证 提交 04806ffe 编写于 作者: C Cao Ying 提交者: GitHub

Merge pull request #7656 from chengduoZH/feature/enhance_matmul_op

Enhance matmul_op to support 4-D inputs.
...@@ -41,10 +41,24 @@ class MatMulFunctor { ...@@ -41,10 +41,24 @@ class MatMulFunctor {
"Input tensor a must be at least 1-dimensional."); "Input tensor a must be at least 1-dimensional.");
PADDLE_ENFORCE_GE(dim_b.size(), 1, PADDLE_ENFORCE_GE(dim_b.size(), 1,
"Input tensor b must be at least 1-dimensional."); "Input tensor b must be at least 1-dimensional.");
PADDLE_ENFORCE_LE(dim_a.size(), 3,
"Input tensor a must be at most 3-dimensional."); std::vector<int64_t> out_dim;
PADDLE_ENFORCE_LE(dim_b.size(), 3, int64_t batch_count = 1;
"Input tensor b must be at most 3-dimensional."); if (dim_a.size() > 3) {
PADDLE_ENFORCE(dim_b.size() == dim_a.size(),
"The dimensions of X and Y must be the same, and both of "
"them should be %d-dimensional.",
dim_b.size());
// The first rank-2 dimensions are accumulated on the batch_count, and the
// last two dimensions are used for matrix multiplication.
for (int j = 0; j < dim_a.size() - 2; ++j) {
PADDLE_ENFORCE_EQ(dim_b[j], dim_a[j],
"The %d-th dimension of X and Y must be the same.",
j);
out_dim.push_back(dim_a[j]);
batch_count *= dim_a[j];
}
}
int M = 0, N = 0, kA = 0, kB = 0, batchCountA = 0, batchCountB = 0, int M = 0, N = 0, kA = 0, kB = 0, batchCountA = 0, batchCountB = 0,
strideA = 0, strideB = 0; strideA = 0, strideB = 0;
...@@ -67,7 +81,11 @@ class MatMulFunctor { ...@@ -67,7 +81,11 @@ class MatMulFunctor {
strideA = M * kA; strideA = M * kA;
break; break;
default: default:
assert(false); batchCountA = batch_count;
size_t mat_s = dim_a.size() - 2;
M = trans_a ? dim_a[mat_s + 1] : dim_a[mat_s];
kA = trans_a ? dim_a[mat_s] : dim_a[mat_s + 1];
strideA = M * kA;
} }
switch (dim_b.size()) { switch (dim_b.size()) {
...@@ -88,7 +106,11 @@ class MatMulFunctor { ...@@ -88,7 +106,11 @@ class MatMulFunctor {
strideB = kB * N; strideB = kB * N;
break; break;
default: default:
assert(false); batchCountB = batch_count;
size_t mat_s = dim_b.size() - 2;
kB = trans_b ? dim_b[mat_s + 1] : dim_b[mat_s];
N = trans_b ? dim_b[mat_s] : dim_b[mat_s + 1];
strideB = kB * N;
} }
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
......
...@@ -41,10 +41,26 @@ class MatMulOp : public framework::OperatorWithKernel { ...@@ -41,10 +41,26 @@ class MatMulOp : public framework::OperatorWithKernel {
"Input tensor X must be at least 1-dimensional."); "Input tensor X must be at least 1-dimensional.");
PADDLE_ENFORCE_GE(dim_y.size(), 1, PADDLE_ENFORCE_GE(dim_y.size(), 1,
"Input tensor Y must be at least 1-dimensional."); "Input tensor Y must be at least 1-dimensional.");
PADDLE_ENFORCE_LE(dim_x.size(), 3,
"Input tensor X must be at most 3-dimensional."); std::vector<int64_t> out_dim;
PADDLE_ENFORCE_LE(dim_y.size(), 3, int64_t batch_count = 1;
"Input tensor Y must be at most 3-dimensional."); if (dim_x.size() > 3) {
PADDLE_ENFORCE_EQ(
dim_y.size(), dim_x.size(),
"The dimensions of X and Y must be the same, and both of "
"them should be %d-dimensional.",
dim_x.size());
// The first rank-2 dimensions are accumulated on the batch_count, and the
// last two dimensions are used for matrix multiplication.
for (int j = 0; j < dim_x.size() - 2; ++j) {
PADDLE_ENFORCE_EQ(dim_y[j], dim_x[j],
"The %d-th dimension of X and Y must be the same.",
j);
out_dim.push_back(dim_x[j]);
batch_count *= dim_x[j];
}
}
int M = 0, N = 0, KX = 0, KY = 0, batchCountX = 0, batchCountY = 0; int M = 0, N = 0, KX = 0, KY = 0, batchCountX = 0, batchCountY = 0;
bool remove_initial_dim = false, remove_final_dim = false; bool remove_initial_dim = false, remove_final_dim = false;
...@@ -70,7 +86,11 @@ class MatMulOp : public framework::OperatorWithKernel { ...@@ -70,7 +86,11 @@ class MatMulOp : public framework::OperatorWithKernel {
KX = transpose_x ? dim_x[1] : dim_x[2]; KX = transpose_x ? dim_x[1] : dim_x[2];
break; break;
default: default:
assert(false); batchCountX = batch_count;
size_t mat_s = dim_x.size() - 2;
M = transpose_x ? dim_x[mat_s + 1] : dim_x[mat_s];
KX = transpose_x ? dim_x[mat_s] : dim_x[mat_s + 1];
break;
} }
switch (dim_y.size()) { switch (dim_y.size()) {
...@@ -94,7 +114,10 @@ class MatMulOp : public framework::OperatorWithKernel { ...@@ -94,7 +114,10 @@ class MatMulOp : public framework::OperatorWithKernel {
N = transpose_y ? dim_y[1] : dim_y[2]; N = transpose_y ? dim_y[1] : dim_y[2];
break; break;
default: default:
assert(false); batchCountY = batch_count;
size_t mat_s = dim_y.size() - 2;
KY = transpose_y ? dim_y[mat_s + 1] : dim_y[mat_s];
N = transpose_y ? dim_y[mat_s] : dim_y[mat_s + 1];
} }
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
...@@ -110,7 +133,11 @@ class MatMulOp : public framework::OperatorWithKernel { ...@@ -110,7 +133,11 @@ class MatMulOp : public framework::OperatorWithKernel {
std::vector<int64_t> dim_out; std::vector<int64_t> dim_out;
if (batchCount) { if (batchCount) {
dim_out.push_back(batchCount); if (dim_x.size() > 3) {
dim_out.insert(dim_out.begin(), out_dim.begin(), out_dim.end());
} else {
dim_out.push_back(batchCount);
}
} }
if (!remove_initial_dim) { if (!remove_initial_dim) {
dim_out.push_back(M); dim_out.push_back(M);
...@@ -162,10 +189,14 @@ Examples without transpose: ...@@ -162,10 +189,14 @@ Examples without transpose:
- X: [B, M, K], Y: [K] => Out: [B, M] - X: [B, M, K], Y: [K] => Out: [B, M]
- X: [M, K], Y: [B, K, N] => Out: [B, M, N] - X: [M, K], Y: [B, K, N] => Out: [B, M, N]
- X: [B, M, K], Y: [B, K, N] => Out: [B, M, N] - X: [B, M, K], Y: [B, K, N] => Out: [B, M, N]
- X: [B, ..., M, K], Y: [B, ..., K, N] => Out: [B, ..., M, N]
The behavior is designed to be similar to the `numpy.matmul` function. The behavior is designed to be similar to the `numpy.matmul` function.
The differences are: The differences are:
- Currently only rank 1 to rank 3 input tensors are supported. - When the rank of the input data is less than or equal to 3, it
is similar to the `numpy.matmul` function.
- When the rank of the input is greater than 3, the rank of X and
Y must be equal, and the first `rank - 2` dimensions must be equal.
- We add `transpose_X` and `transpose_Y` flags. - We add `transpose_X` and `transpose_Y` flags.
Both the input `X` and `Y` can carry the LoD (Level of Details) information, Both the input `X` and `Y` can carry the LoD (Level of Details) information,
......
...@@ -137,6 +137,13 @@ class MatMulGradKernel : public framework::OpKernel<T> { ...@@ -137,6 +137,13 @@ class MatMulGradKernel : public framework::OpKernel<T> {
y_dims.push_back(1); y_dims.push_back(1);
} }
int batch_count = 0;
// The first rank-2 dimensions are accumulated on the batch_count, and the
// last two dimensions are used for matrix multiplication.
if (x_dims.size() > 3) {
batch_count = accumulate(x_dims.begin(), x_dims.end() - 2, 1,
std::multiplies<int>());
}
// Fix the dOut dimensions. // Fix the dOut dimensions.
int M = 0, N = 0, batchCountX = 0, batchCountY = 0; int M = 0, N = 0, batchCountX = 0, batchCountY = 0;
...@@ -149,7 +156,9 @@ class MatMulGradKernel : public framework::OpKernel<T> { ...@@ -149,7 +156,9 @@ class MatMulGradKernel : public framework::OpKernel<T> {
M = transpose_x ? x_dims[2] : x_dims[1]; M = transpose_x ? x_dims[2] : x_dims[1];
break; break;
default: default:
assert(false); batchCountX = batch_count;
size_t mat_s = x_dims.size() - 2;
M = transpose_x ? x_dims[mat_s + 1] : x_dims[mat_s];
} }
switch (y_dims.size()) { switch (y_dims.size()) {
...@@ -161,7 +170,9 @@ class MatMulGradKernel : public framework::OpKernel<T> { ...@@ -161,7 +170,9 @@ class MatMulGradKernel : public framework::OpKernel<T> {
N = transpose_y ? y_dims[1] : y_dims[2]; N = transpose_y ? y_dims[1] : y_dims[2];
break; break;
default: default:
assert(false); batchCountY = batch_count;
size_t mat_s = y_dims.size() - 2;
N = transpose_y ? y_dims[mat_s] : y_dims[mat_s + 1];
} }
if (batchCountX && batchCountY) { if (batchCountX && batchCountY) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
...@@ -172,7 +183,11 @@ class MatMulGradKernel : public framework::OpKernel<T> { ...@@ -172,7 +183,11 @@ class MatMulGradKernel : public framework::OpKernel<T> {
int batchCount = std::max(batchCountX, batchCountY); int batchCount = std::max(batchCountX, batchCountY);
std::vector<int64_t> dout_dims = {M, N}; std::vector<int64_t> dout_dims = {M, N};
if (batchCount) { if (batchCount) {
dout_dims.insert(dout_dims.begin(), batchCount); if (x_dims.size() > 3) {
dout_dims.insert(dout_dims.begin(), x_dims.begin(), x_dims.end() - 2);
} else {
dout_dims.insert(dout_dims.begin(), batchCount);
}
} }
Tensor X = Reshape<T>(x, make_ddim(x_dims)); Tensor X = Reshape<T>(x, make_ddim(x_dims));
Tensor Y = Reshape<T>(y, make_ddim(y_dims)); Tensor Y = Reshape<T>(y, make_ddim(y_dims));
......
...@@ -1794,8 +1794,9 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None): ...@@ -1794,8 +1794,9 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None):
def matmul(x, y, transpose_x=False, transpose_y=False, name=None): def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
""" """
Applies matrix multipication to two tensors. Currently only rank 1 to rank Applies matrix multiplication to two tensors. Currently, the input
3 input tensors are supported. tensors' rank can be any, but when the rank of anyone inputs is
bigger than 3, this two inputs' rank should be equal.
The actual behavior depends on the shapes of :math:`x`, :math:`y` and the The actual behavior depends on the shapes of :math:`x`, :math:`y` and the
flag values of :attr:`transpose_x`, :attr:`transpose_y`. Specifically: flag values of :attr:`transpose_x`, :attr:`transpose_y`. Specifically:
...@@ -1807,17 +1808,17 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None): ...@@ -1807,17 +1808,17 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
opposite: It is treated as :math:`[D, 1]` in nontransposed form and as opposite: It is treated as :math:`[D, 1]` in nontransposed form and as
:math:`[1, D]` in transposed form. :math:`[1, D]` in transposed form.
- After transpose, the two tensors are 2-D or 3-D and matrix multipication - After transpose, the two tensors are 2-D or n-D and matrix multiplication
performs in the following way. performs in the following way.
- If both are 2-D, they are multiplied like conventional matrices. - If both are 2-D, they are multiplied like conventional matrices.
- If either is 3-D, it is treated as a stack of matrices residing in the - If either is n-D, it is treated as a stack of matrices residing in the
last two dimensions and a batched matrix multiply supporting broadcast last two dimensions and a batched matrix multiply supporting broadcast
applies on the two tensors. applies on the two tensors.
Also note that if the raw tensor :math:`x` or :math:`y` is rank-1 and Also note that if the raw tensor :math:`x` or :math:`y` is rank-1 and
nontransposed, the prepended or appended dimension :math:`1` will be nontransposed, the prepended or appended dimension :math:`1` will be
removed after matrix multipication. removed after matrix multiplication.
Args: Args:
x (Variable): The input variable which is a Tensor or LoDTensor. x (Variable): The input variable which is a Tensor or LoDTensor.
...@@ -1834,6 +1835,8 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None): ...@@ -1834,6 +1835,8 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
.. code-block:: python .. code-block:: python
# Examples to clarify shapes of the inputs and output # Examples to clarify shapes of the inputs and output
# x: [B, ..., M, K], y: [B, ..., K, N]
fluid.layers.matmul(x, y) # out: [B, ..., M, N]
# x: [B, M, K], y: [B, K, N] # x: [B, M, K], y: [B, K, N]
fluid.layers.matmul(x, y) # out: [B, M, N] fluid.layers.matmul(x, y) # out: [B, M, N]
# x: [B, M, K], y: [K, N] # x: [B, M, K], y: [K, N]
...@@ -1849,9 +1852,9 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None): ...@@ -1849,9 +1852,9 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
fluid.layers.matmul(x, y, True, True) # out: [M, N] fluid.layers.matmul(x, y, True, True) # out: [M, N]
""" """
helper = LayerHelper('matmul', **locals()) helper = LayerHelper('matmul', **locals())
assert max( assert max(len(x.shape), len(y.shape)) <= 3 or len(x.shape) == len(
len(x.shape), len(y.shape) y.
) <= 3, 'Currently only rank 1 to rank 3 input tensors are supported.' shape), 'Inputs\' rank should be equal or their rank should be less 4.'
out = helper.create_tmp_variable(dtype=helper.input_dtype()) out = helper.create_tmp_variable(dtype=helper.input_dtype())
helper.append_op( helper.append_op(
type='matmul', type='matmul',
......
...@@ -59,19 +59,18 @@ def reference_matmul(X, Y, transpose_X=False, transpose_Y=False): ...@@ -59,19 +59,18 @@ def reference_matmul(X, Y, transpose_X=False, transpose_Y=False):
X = X.reshape((X.size, 1)) X = X.reshape((X.size, 1))
elif X.ndim == 2: elif X.ndim == 2:
X = X.T X = X.T
elif X.ndim == 3:
X = np.transpose(X, (0, 2, 1))
else: else:
raise ValueError('X must have between 1 and 3 dimensions') dim = [i for i in range(len(X.shape))]
dim[-1], dim[len(X.shape) - 2] = dim[len(X.shape) - 2], dim[-1]
X = np.transpose(X, tuple(dim))
if transpose_Y: if transpose_Y:
if Y.ndim == 1: if Y.ndim == 1:
Y = Y.reshape((1, Y.size)) Y = Y.reshape((1, Y.size))
elif Y.ndim == 2:
Y = Y.T
elif Y.ndim == 3:
Y = np.transpose(Y, (0, 2, 1))
else: else:
raise ValueError('Y must have between 1 and 3 dimensions') dim = [i for i in range(len(Y.shape))]
dim[-1], dim[len(Y.shape) - 2] = dim[len(Y.shape) - 2], dim[-1]
Y = np.transpose(Y, tuple(dim))
Out = np.matmul(X, Y) Out = np.matmul(X, Y)
if not Out.shape: if not Out.shape:
# We do not support 0-dimensional Tensors (scalars). So where # We do not support 0-dimensional Tensors (scalars). So where
...@@ -120,13 +119,50 @@ for dim_X in [1, 2, 3]: ...@@ -120,13 +119,50 @@ for dim_X in [1, 2, 3]:
dim_X, dim_Y, transpose_X, transpose_Y)) dim_X, dim_Y, transpose_X, transpose_Y))
shape_X, shape_Y = generate_compatible_shapes( shape_X, shape_Y = generate_compatible_shapes(
dim_X, dim_Y, transpose_X, transpose_Y) dim_X, dim_Y, transpose_X, transpose_Y)
test_class = type(test_name, (Generator, OpTest), { globals()[test_name] = type(test_name, (Generator, OpTest), {
'shape_X': shape_X, 'shape_X': shape_X,
'shape_Y': shape_Y, 'shape_Y': shape_Y,
'transpose_X': transpose_X, 'transpose_X': transpose_X,
'transpose_Y': transpose_Y, 'transpose_Y': transpose_Y,
}) })
globals()[test_name] = test_class
# Test case n-dim
def generate_compatible_shapes(dim, transpose_X, transpose_Y):
M = 2
N = 4
K = 3
shape_X = [2 for _ in range(dim - 2)]
shape_Y = [2 for _ in range(dim - 2)]
if transpose_X:
shape_X += [K, M]
else:
shape_X += [M, K]
if transpose_Y:
shape_Y += [N, K]
else:
shape_Y += [K, N]
return shape_X, shape_Y
# Test case n-dim
for dim in [4]:
for transpose_X in [False, True]:
for transpose_Y in [False, True]:
test_name = (
'TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format(
dim, dim, transpose_X, transpose_Y))
shape_X, shape_Y = generate_compatible_shapes(dim, transpose_X,
transpose_Y)
globals()[test_name] = type(test_name, (Generator, OpTest), {
'shape_X': shape_X,
'shape_Y': shape_Y,
'transpose_X': transpose_X,
'transpose_Y': transpose_Y,
})
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册