add unittest

c2631ebf · qijun · 95df4700 · c2631ebf · c2631ebf · c2631ebf
3 changed file
--- a/paddle/operators/math/math_function.cc
+++ b/paddle/operators/math/math_function.cc
@@ -59,9 +59,16 @@ void matmul<platform::CPUPlace, float>(const framework::Tensor& in1, bool in1_T,
  PADDLE_ENFORCE(
      in1_dim.size() == 2 && in2_dim.size() == 2 && out_dim.size() == 2,
      "The input and output of matmul be matrix");
-  PADDLE_ENFORCE(
-      in1_dim[1] == in2_dim[0],
-      "First matrix's width must be equal with second matrix's height.");
+
+  if (!in1_T && !in2_T) {
+    PADDLE_ENFORCE(in1_dim[1] == in2_dim[0]);
+  } else if (in1_T && !in2_T) {
+    PADDLE_ENFORCE(in1_dim[0] == in2_dim[0]);
+  } else if (!in1_T && in2_T) {
+    PADDLE_ENFORCE(in1_dim[1] == in2_dim[0]);
+  } else {
+    PADDLE_ENFORCE(in1_dim[0] == in2_dim[1]);
+  }

  PADDLE_ENFORCE(platform::is_cpu_place(in1.place()) &&
                     platform::is_cpu_place(in2.place()) &&
@@ -93,9 +100,15 @@ void matmul<platform::CPUPlace, double>(const framework::Tensor& in1,
  PADDLE_ENFORCE(
      in1_dim.size() == 2 && in2_dim.size() == 2 && out_dim.size() == 2,
      "The input and output of matmul be matrix");
-  PADDLE_ENFORCE(
-      in1_dim[1] == in2_dim[0],
-      "First matrix's width must be equal with second matrix's height.");
+  if (!in1_T && !in2_T) {
+    PADDLE_ENFORCE(in1_dim[1] == in2_dim[0]);
+  } else if (in1_T && !in2_T) {
+    PADDLE_ENFORCE(in1_dim[0] == in2_dim[0]);
+  } else if (!in1_T && in2_T) {
+    PADDLE_ENFORCE(in1_dim[1] == in2_dim[0]);
+  } else {
+    PADDLE_ENFORCE(in1_dim[0] == in2_dim[1]);
+  }

  PADDLE_ENFORCE(platform::is_cpu_place(in1.place()) &&
                     platform::is_cpu_place(in2.place()) &&

--- a/paddle/operators/math/math_function.cu
+++ b/paddle/operators/math/math_function.cu
@@ -71,9 +71,15 @@ void matmul<platform::GPUPlace, float>(const framework::Tensor& in1, bool in1_T,
  PADDLE_ENFORCE(
      in1_dim.size() == 2 && in2_dim.size() == 2 && out_dim.size() == 2,
      "The input and output of matmul be matrix");
-  PADDLE_ENFORCE(
-      in1_dim[1] == in2_dim[0],
-      "First matrix's width must be equal with second matrix's height.");
+  if (!in1_T && !in2_T) {
+    PADDLE_ENFORCE(in1_dim[1] == in2_dim[0]);
+  } else if (in1_T && !in2_T) {
+    PADDLE_ENFORCE(in1_dim[0] == in2_dim[0]);
+  } else if (!in1_T && in2_T) {
+    PADDLE_ENFORCE(in1_dim[1] == in2_dim[0]);
+  } else {
+    PADDLE_ENFORCE(in1_dim[0] == in2_dim[1]);
+  }

  PADDLE_ENFORCE(platform::is_gpu_place(in1.place()) &&
                     platform::is_gpu_place(in2.place()) &&
@@ -105,9 +111,15 @@ void matmul<platform::GPUPlace, double>(const framework::Tensor& in1,
  PADDLE_ENFORCE(
      in1_dim.size() == 2 && in2_dim.size() == 2 && out_dim.size() == 2,
      "The input and output of matmul be matrix");
-  PADDLE_ENFORCE(
-      in1_dim[1] == in2_dim[0],
-      "First matrix's width must be equal with second matrix's height.");
+  if (!in1_T && !in2_T) {
+    PADDLE_ENFORCE(in1_dim[1] == in2_dim[0]);
+  } else if (in1_T && !in2_T) {
+    PADDLE_ENFORCE(in1_dim[0] == in2_dim[0]);
+  } else if (!in1_T && in2_T) {
+    PADDLE_ENFORCE(in1_dim[1] == in2_dim[0]);
+  } else {
+    PADDLE_ENFORCE(in1_dim[0] == in2_dim[1]);
+  }

  PADDLE_ENFORCE(platform::is_gpu_place(in1.place()) &&
                     platform::is_gpu_place(in2.place()) &&

--- a/paddle/operators/math/math_function_test.cc
+++ b/paddle/operators/math/math_function_test.cc
@@ -2,7 +2,7 @@
 #include "gtest/gtest.h"

 #ifndef PADDLE_ONLY_CPU
-TEST(math_function, GPU) {
+TEST(math_function, N_T) {
  paddle::framework::Tensor input1;
  paddle::framework::Tensor input1_gpu;
  paddle::framework::Tensor input2_gpu;
@@ -10,9 +10,9 @@ TEST(math_function, GPU) {
  paddle::framework::Tensor out;

  auto* cpu_place = new paddle::platform::CPUPlace();
-  float* input1_ptr = input1.mutable_data<float>({2, 2}, *cpu_place);
-  float arr[4] = {0, 1, 2, 3};
-  memcpy(input1_ptr, arr, 4 * sizeof(int));
+  float* input1_ptr = input1.mutable_data<float>({2, 3}, *cpu_place);
+  float arr[6] = {0, 1, 2, 3, 4, 5};
+  memcpy(input1_ptr, arr, 6 * sizeof(float));

  auto* gpu_place = new paddle::platform::GPUPlace(0);
  paddle::platform::DeviceContext* context =
@@ -20,17 +20,56 @@ TEST(math_function, GPU) {

  input1_gpu.CopyFrom<float>(input1, *gpu_place);
  input2_gpu.CopyFrom<float>(input1, *gpu_place);
-  out_gpu.CopyFrom<float>(input1, *gpu_place);
+
+  out_gpu.mutable_data<float>({2, 2}, *gpu_place);
+
+  paddle::operators::math::matmul<paddle::platform::GPUPlace, float>(
+      input1_gpu, false, input2_gpu, true, 1, &out_gpu, 0, context);
+
+  out.CopyFrom<float>(out_gpu, *cpu_place);
+
+  float* out_ptr = out.data<float>();
+  EXPECT_EQ(out_ptr[0], 5);
+  EXPECT_EQ(out_ptr[1], 14);
+  EXPECT_EQ(out_ptr[2], 14);
+  EXPECT_EQ(out_ptr[3], 50);
+}
+
+TEST(math_function, T_N) {
+  paddle::framework::Tensor input1;
+  paddle::framework::Tensor input1_gpu;
+  paddle::framework::Tensor input2_gpu;
+  paddle::framework::Tensor out_gpu;
+  paddle::framework::Tensor out;
+
+  auto* cpu_place = new paddle::platform::CPUPlace();
+  float* input1_ptr = input1.mutable_data<float>({2, 3}, *cpu_place);
+  float arr[6] = {0, 1, 2, 3, 4, 5};
+  memcpy(input1_ptr, arr, 6 * sizeof(float));
+
+  auto* gpu_place = new paddle::platform::GPUPlace(0);
+  paddle::platform::DeviceContext* context =
+      new paddle::platform::CUDADeviceContext(*gpu_place);
+
+  input1_gpu.CopyFrom<float>(input1, *gpu_place);
+  input2_gpu.CopyFrom<float>(input1, *gpu_place);
+
+  out_gpu.mutable_data<float>({3, 3}, *gpu_place);

  paddle::operators::math::matmul<paddle::platform::GPUPlace, float>(
-      input1_gpu, false, input2_gpu, false, 1, &out_gpu, 0, context);
+      input1_gpu, true, input2_gpu, false, 1, &out_gpu, 0, context);

  out.CopyFrom<float>(out_gpu, *cpu_place);

  float* out_ptr = out.data<float>();
-  EXPECT_EQ(out_ptr[0], 2);
-  EXPECT_EQ(out_ptr[1], 3);
-  EXPECT_EQ(out_ptr[2], 6);
-  EXPECT_EQ(out_ptr[3], 11);
+  EXPECT_EQ(out_ptr[0], 9);
+  EXPECT_EQ(out_ptr[1], 12);
+  EXPECT_EQ(out_ptr[2], 15);
+  EXPECT_EQ(out_ptr[3], 12);
+  EXPECT_EQ(out_ptr[4], 17);
+  EXPECT_EQ(out_ptr[5], 22);
+  EXPECT_EQ(out_ptr[6], 15);
+  EXPECT_EQ(out_ptr[7], 22);
+  EXPECT_EQ(out_ptr[8], 29);
 }
 #endif