add 4-d for matmul_op

2edc136c · chengduoZH · 4b3e22b8 · 2edc136c · 2edc136c · 2edc136c
4 changed file
--- a/paddle/operators/math/matmul.h
+++ b/paddle/operators/math/matmul.h
@@ -41,10 +41,26 @@ class MatMulFunctor {
                      "Input tensor a must be at least 1-dimensional.");
    PADDLE_ENFORCE_GE(dim_b.size(), 1,
                      "Input tensor b must be at least 1-dimensional.");
-    PADDLE_ENFORCE_LE(dim_a.size(), 3,
-                      "Input tensor a must be at most 3-dimensional.");
-    PADDLE_ENFORCE_LE(dim_b.size(), 3,
-                      "Input tensor b must be at most 3-dimensional.");
+    PADDLE_ENFORCE_LE(dim_a.size(), 4,
+                      "Input tensor a must be at most 4-dimensional.");
+    PADDLE_ENFORCE_LE(dim_b.size(), 4,
+                      "Input tensor b must be at most 4-dimensional.");
+
+    std::vector<int64_t> out_dim;
+    int64_t batch_count = 1;
+    if (dim_a.size() > 3) {
+      PADDLE_ENFORCE(dim_b.size() > 3,
+                     "The dimensions of X and Y must be the same, and both of "
+                     "them should be 4-dimensional.");
+      for (int j = 0; j < dim_a.size() - 2; ++j) {
+        PADDLE_ENFORCE(
+            dim_b[j] == dim_a[j],
+            "The dimensions of X and Y must be the same, and both of "
+            "them should be 4-dimensional.");
+        out_dim.push_back(dim_a[j]);
+        batch_count *= dim_a[j];
+      }
+    }

    int M = 0, N = 0, kA = 0, kB = 0, batchCountA = 0, batchCountB = 0,
        strideA = 0, strideB = 0;
@@ -67,7 +83,11 @@ class MatMulFunctor {
        strideA = M * kA;
        break;
      default:
-        assert(false);
+        batchCountA = batch_count;
+        size_t mat_s = dim_a.size() - 2;
+        M = trans_a ? dim_a[mat_s + 1] : dim_a[mat_s];
+        kA = trans_a ? dim_a[mat_s] : dim_a[mat_s + 1];
+        strideA = M * kA;
    }

    switch (dim_b.size()) {
@@ -88,7 +108,11 @@ class MatMulFunctor {
        strideB = kB * N;
        break;
      default:
-        assert(false);
+        batchCountB = batch_count;
+        size_t mat_s = dim_b.size() - 2;
+        kB = trans_b ? dim_b[mat_s + 1] : dim_b[mat_s];
+        N = trans_b ? dim_b[mat_s] : dim_b[mat_s + 1];
+        strideB = kB * N;
    }

    PADDLE_ENFORCE_EQ(

--- a/paddle/operators/matmul_op.cc
+++ b/paddle/operators/matmul_op.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include "paddle/operators/matmul_op.h"
+#include <vector>

 namespace paddle {
 namespace operators {
@@ -41,10 +42,26 @@ class MatMulOp : public framework::OperatorWithKernel {
                      "Input tensor X must be at least 1-dimensional.");
    PADDLE_ENFORCE_GE(dim_y.size(), 1,
                      "Input tensor Y must be at least 1-dimensional.");
-    PADDLE_ENFORCE_LE(dim_x.size(), 3,
-                      "Input tensor X must be at most 3-dimensional.");
-    PADDLE_ENFORCE_LE(dim_y.size(), 3,
-                      "Input tensor Y must be at most 3-dimensional.");
+    PADDLE_ENFORCE_LE(dim_x.size(), 4,
+                      "Input tensor X must be at most 4-dimensional.");
+    PADDLE_ENFORCE_LE(dim_y.size(), 4,
+                      "Input tensor Y must be at most 4-dimensional.");
+
+    std::vector<int64_t> out_dim;
+    int64_t batch_count = 1;
+    if (dim_x.size() > 3) {
+      PADDLE_ENFORCE(dim_y.size() == dim_x.size(),
+                     "The dimensions of X and Y must be the same, and both of "
+                     "them should be 4-dimensional.");
+      for (int j = 0; j < dim_x.size() - 2; ++j) {
+        PADDLE_ENFORCE(
+            dim_y[j] == dim_x[j],
+            "The dimensions of X and Y must be the same, and both of "
+            "them should be 4-dimensional.");
+        out_dim.push_back(dim_x[j]);
+        batch_count *= dim_x[j];
+      }
+    }

    int M = 0, N = 0, KX = 0, KY = 0, batchCountX = 0, batchCountY = 0;
    bool remove_initial_dim = false, remove_final_dim = false;
@@ -70,7 +87,11 @@ class MatMulOp : public framework::OperatorWithKernel {
        KX = transpose_x ? dim_x[1] : dim_x[2];
        break;
      default:
-        assert(false);
+        batchCountX = batch_count;
+        size_t mat_s = dim_x.size() - 2;
+        M = transpose_x ? dim_x[mat_s + 1] : dim_x[mat_s];
+        KX = transpose_x ? dim_x[mat_s] : dim_x[mat_s + 1];
+        break;
    }

    switch (dim_y.size()) {
@@ -94,7 +115,10 @@ class MatMulOp : public framework::OperatorWithKernel {
        N = transpose_y ? dim_y[1] : dim_y[2];
        break;
      default:
-        assert(false);
+        batchCountY = batch_count;
+        size_t mat_s = dim_y.size() - 2;
+        KY = transpose_y ? dim_y[mat_s + 1] : dim_y[mat_s];
+        N = transpose_y ? dim_y[mat_s] : dim_y[mat_s + 1];
    }

    PADDLE_ENFORCE_EQ(
@@ -110,7 +134,11 @@ class MatMulOp : public framework::OperatorWithKernel {

    std::vector<int64_t> dim_out;
    if (batchCount) {
-      dim_out.push_back(batchCount);
+      if (dim_x.size() > 3) {
+        dim_out.insert(dim_out.begin(), out_dim.begin(), out_dim.end());
+      } else {
+        dim_out.push_back(batchCount);
+      }
    }
    if (!remove_initial_dim) {
      dim_out.push_back(M);

--- a/paddle/operators/matmul_op.h
+++ b/paddle/operators/matmul_op.h
@@ -149,7 +149,10 @@ class MatMulGradKernel : public framework::OpKernel<T> {
        M = transpose_x ? x_dims[2] : x_dims[1];
        break;
      default:
-        assert(false);
+        batchCountX = accumulate(x_dims.begin(), x_dims.end() - 2, 1,
+                                 std::multiplies<int>());
+        size_t mat_s = x_dims.size() - 2;
+        M = transpose_x ? x_dims[mat_s + 1] : x_dims[mat_s];
    }

    switch (y_dims.size()) {
@@ -161,7 +164,10 @@ class MatMulGradKernel : public framework::OpKernel<T> {
        N = transpose_y ? y_dims[1] : y_dims[2];
        break;
      default:
-        assert(false);
+        batchCountY = accumulate(y_dims.begin(), y_dims.end() - 2, 1,
+                                 std::multiplies<int>());
+        size_t mat_s = y_dims.size() - 2;
+        N = transpose_y ? y_dims[mat_s] : y_dims[mat_s + 1];
    }
    if (batchCountX && batchCountY) {
      PADDLE_ENFORCE_EQ(
@@ -172,7 +178,13 @@ class MatMulGradKernel : public framework::OpKernel<T> {
    int batchCount = std::max(batchCountX, batchCountY);
    std::vector<int64_t> dout_dims = {M, N};
    if (batchCount) {
-      dout_dims.insert(dout_dims.begin(), batchCount);
+      if (x_dims.size() > 3) {
+        dout_dims.insert(dout_dims.begin(), x_dims.begin(), x_dims.end() - 2);
+      } else if (y_dims.size() > 3) {
+        dout_dims.insert(dout_dims.begin(), y_dims.begin(), y_dims.end() - 2);
+      } else {
+        dout_dims.insert(dout_dims.begin(), batchCount);
+      }
    }
    Tensor X = Reshape<T>(x, make_ddim(x_dims));
    Tensor Y = Reshape<T>(y, make_ddim(y_dims));

--- a/python/paddle/v2/fluid/tests/test_matmul_op.py
+++ b/python/paddle/v2/fluid/tests/test_matmul_op.py
@@ -128,5 +128,22 @@ for dim_X in [1, 2, 3]:
                })
                globals()[test_name] = test_class

+# Test case 4-dim
+dim_X = 4
+dim_Y = 4
+transpose_X = False
+transpose_Y = False
+test_name = ('TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format(
+    dim_X, dim_Y, transpose_X, transpose_Y))
+
+shape_X = [2, 2, 2, 3]
+shape_Y = [2, 2, 3, 4]
+test_class = type(test_name, (Generator, OpTest), {
+    'shape_X': shape_X,
+    'shape_Y': shape_Y,
+    'transpose_X': transpose_X,
+    'transpose_Y': transpose_Y,
+})
+
 if __name__ == "__main__":
    unittest.main()