fix mul kernel test

299be048 · tensor-tang · 24e4be6a · 299be048 · 299be048
Showing with 104 addition and 89 deletion

paddle/fluid/lite/kernels/arm/conv_compute_test.cc paddle/fluid/lite/kernels/arm/conv_compute_test.cc +15 -0

paddle/fluid/lite/kernels/arm/mul_compute_test.cc paddle/fluid/lite/kernels/arm/mul_compute_test.cc +89 -89

未找到文件。
--- a/paddle/fluid/lite/kernels/arm/conv_compute_test.cc
+++ b/paddle/fluid/lite/kernels/arm/conv_compute_test.cc
@@ -124,6 +124,20 @@ TEST(conv_arm, init) {
 TEST(conv_arm, compute) {
  DeviceInfo::Init();
+#if 1
+  for (auto n : {2}) {
+    for (auto ic : {6}) {
+      for (auto oc : {6}) {
+        for (auto ih : {9}) {
+          for (auto iw : {9}) {
+            for (auto flag_bias : {false, true}) {
+              for (auto flag_relu : {false, true}) {
+                for (auto depthwise : {false, true}) {
+                  for (auto dilation : {1}) {
+                    for (auto stride : {1, 2}) {
+                      for (auto padding : {0, 1, 2}) {
+                        for (auto ks : {1, 3, 5}) {
+#else
  for (auto n : {1, 2}) {
    for (auto ic : {6, 32 /*, 128*/}) {
      for (auto oc : {6, 32 /*, 128*/}) {
@@ -136,6 +150,7 @@ TEST(conv_arm, compute) {
                    for (auto stride : {1, 2}) {
                      for (auto padding : {0, 1, 2}) {
                        for (auto ks : {1, 3, 5}) {
+#endif
                          int group = 1;
                          if (depthwise) {  // depthwise convolution ?
                            group = oc = ic;

--- a/paddle/fluid/lite/kernels/arm/mul_compute_test.cc
+++ b/paddle/fluid/lite/kernels/arm/mul_compute_test.cc
@@ -14,7 +14,10 @@
 #include "paddle/fluid/lite/kernels/arm/mul_compute.h"
 #include <gtest/gtest.h>
+#include <algorithm>
+#include <iostream>
 #include <memory>
+#include <random>
 #include <utility>
 #include <vector>
 #include "paddle/fluid/lite/arm/math/funcs.h"
@@ -25,6 +28,17 @@ namespace lite {
 namespace kernels {
 namespace arm {
+template <typename T>
+void FillData(T* a, const int n, const T lower = static_cast<T>(-2.f),
+              const T upper = static_cast<T>(2.f)) {
+  static unsigned int seed = 100;
+  std::mt19937 rng(seed++);
+  std::uniform_real_distribution<double> uniform_dist(0, 1);
+  for (int i = 0; i < n; ++i) {
+    a[i] = static_cast<T>(uniform_dist(rng) * (upper - lower) + lower);
+  }
+}
 TEST(mul_arm, retrive_op) {
  auto mul =
      KernelRegistry::Global().Create<TARGET(kARM), PRECISION(kFloat)>("mul");
@@ -33,114 +47,100 @@ TEST(mul_arm, retrive_op) {
 }
 TEST(mul_arm, init) {
-  FcCompute mul;
+  MulCompute mul;
  ASSERT_EQ(mul.precision(), PRECISION(kFloat));
  ASSERT_EQ(mul.target(), TARGET(kARM));
 }
 TEST(mul_arm, compare_test) {
-  lite::Tensor x, w, b, out, ref;
+  using T = float;
-  constexpr int batch_size = 2;
-  x.Resize({batch_size, 3});
+  for (int m : {1, 2, 3, 4}) {
-  w.Resize({3, 4});
+    for (int n : {1, 2, 3, 4}) {
-  b.Resize({1, 4});
+      for (int k : {1, 2, 3, 4}) {
-  out.Resize({batch_size, 4});
+        lite::Tensor x, y, out, ref;
-  ref.Resize({batch_size, 4});
+        x.Resize({m, k});
+        y.Resize({k, n});
-  auto x_data = x.mutable_data<float>();
+        out.Resize({m, n});
-  auto w_data = w.mutable_data<float>();
+        ref.Resize({m, n});
-  auto b_data = b.mutable_data<float>();
-  auto out_data = out.mutable_data<float>();
+        auto* x_data = x.mutable_data<T>();
-  auto ref_data = ref.mutable_data<float>();
+        auto* y_data = y.mutable_data<T>();
+        auto* out_data = out.mutable_data<T>();
-  for (int64_t i = 0; i < x.dims().product(); i++) {
+        auto* ref_data = ref.mutable_data<T>();
-    x_data[i] = static_cast<float>(i);
-  }
+        FillData<T>(x_data, x.dims().production());
-  for (int64_t i = 0; i < w.dims().product(); i++) {
+        FillData<T>(y_data, y.dims().production());
-    w_data[i] = static_cast<float>(i);
+        FillData<T>(out_data, out.dims().production());
-  }
+        FillData<T>(ref_data, out.dims().production());
-  for (int64_t i = 0; i < b.dims().product(); i++) {
-    b_data[i] = static_cast<float>(i);
+        MulCompute mul;
-  }
+        operators::MulParam param;
-  lite::arm::math::fc_compute_eigen(x_data, batch_size, 3,  //
+        param.x = &x;
-                                    w_data, 3, 4,           //
+        param.y = &y;
-                                    b_data, ref_data);
-  // mul compute kernel
-  FcCompute mul;
-  operators::FcParam param;
-  param.in_num_col_dims = 1;
-  param.input = &x;
-  param.w = &w;
-  param.bias = &b;
        param.output = &out;
-  param.in_mat_dims = x.dims();
        DeviceInfo::Init();
        std::unique_ptr<KernelContext> ctx(new KernelContext);
        ctx->As<ARMContext>();
        mul.SetParam(param);
        mul.SetContext(std::move(ctx));
+        mul.PrepareForRun();
        mul.Run();
-  VLOG(3) << "output vs ref";
+        lite::arm::math::mul_compute_eigen(x_data, m, k, y_data, k, n,
-  for (int i = 0; i < out.dims().product(); i++) {
+                                           ref_data);
-    VLOG(3) << out_data[i] << " vs " << ref_data[i];
+        for (int i = 0; i < out.dims().production(); i++) {
+          EXPECT_NEAR(out_data[i], ref_data[i], 1e-3);
+        }
+      }
    }
-  for (int i = 0; i < out.dims().product(); ++i) {
-    EXPECT_NEAR(out_data[i], ref_data[i], 1e-5);
  }
 }
 TEST(mul_arm, num_col_dims) {
-  FcCompute mul;
+  using T = float;
-  operators::FcParam param;
+  lite::Tensor x, y, out, ref;
-  lite::Tensor x;
+  x.Resize({2, 3, 4});
-  lite::Tensor w;
+  y.Resize({3, 4, 5});
-  lite::Tensor bias;
+  out.Resize({2, 5});
-  lite::Tensor output;
+  ref.Resize({2, 5});
-  x.Resize({1, 2, 3});
+  auto* x_data = x.mutable_data<T>();
-  w.Resize({3, 4});
+  auto* y_data = y.mutable_data<T>();
-  bias.Resize({1, 4});
+  auto* out_data = out.mutable_data<T>();
-  output.Resize({2, 4});
+  auto* ref_data = ref.mutable_data<T>();
-  auto* x_data = x.mutable_data<float>();
+  FillData<T>(x_data, x.dims().production());
-  auto* w_data = w.mutable_data<float>();
+  FillData<T>(y_data, y.dims().production());
-  auto* bias_data = bias.mutable_data<float>();
+  FillData<T>(out_data, out.dims().production());
-  auto* output_data = output.mutable_data<float>();
+  FillData<T>(ref_data, out.dims().production());
-  for (int64_t i = 0; i < x.dims().product(); i++) {
+  MulCompute mul;
-    x_data[i] = static_cast<float>(i);
+  operators::MulParam param;
-  }
-  for (int64_t i = 0; i < w.dims().product(); i++) {
-    w_data[i] = static_cast<float>(i);
-  }
-  for (int64_t i = 0; i < bias.dims().product(); i++) {
-    bias_data[i] = static_cast<float>(i);
-  }
-  for (int64_t i = 0; i < output.dims().product(); i++) {
-    output_data[i] = static_cast<float>(i);
-  }
-  param.in_num_col_dims = 2;
+  param.x = &x;
-  param.input = &x;
+  param.y = &y;
-  param.w = &w;
+  param.output = &out;
-  param.bias = &bias;
+  param.x_num_col_dims = 1;
-  param.output = &output;
+  param.y_num_col_dims = 2;
-  param.in_mat_dims = x.dims();
+  DeviceInfo::Init();
  std::unique_ptr<KernelContext> ctx(new KernelContext);
  ctx->As<ARMContext>();
-  DeviceInfo::Init();
  mul.SetParam(param);
  mul.SetContext(std::move(ctx));
+  mul.PrepareForRun();
  mul.Run();
+  lite::arm::math::mul_compute_eigen(x_data, 2, 12, y_data, 12, 5, ref_data);
+  for (int i = 0; i < out.dims().production(); i++) {
+    EXPECT_NEAR(out_data[i], ref_data[i], 1e-3);
+  }
 }
 }  // namespace arm