Fix eltwise sub and div swapped bug.

25d2ad2d · liuqi · f078a265 · 25d2ad2d · 25d2ad2d · 25d2ad2d
4 changed file
--- a/mace/kernels/eltwise.h
+++ b/mace/kernels/eltwise.h
@@ -114,6 +114,7 @@ inline void TensorVector(const EltwiseType type,
                         const index_t batch,
                         const index_t channel,
                         const index_t hw,
+                         const bool swapped,
                         float *output) {
  switch (type) {
    case SUM:
@@ -129,13 +130,26 @@ inline void TensorVector(const EltwiseType type,
      }
      break;
    case SUB:
+      if (swapped) {
 #pragma omp parallel for collapse(3)
-      for (index_t b = 0; b < batch; ++b) {
-        for (index_t c = 0; c < channel; ++c) {
-          for (index_t i = 0; i < hw; ++i) {
-            const index_t idx0 = (b * channel + c) * hw + i;
-            const index_t idx1 = b * channel + c;
-            output[idx0] = input0[idx0] - input1[idx1];
+        for (index_t b = 0; b < batch; ++b) {
+          for (index_t c = 0; c < channel; ++c) {
+            for (index_t i = 0; i < hw; ++i) {
+              const index_t idx0 = (b * channel + c) * hw + i;
+              const index_t idx1 = b * channel + c;
+              output[idx0] = input1[idx1] - input0[idx0];
+            }
+          }
+        }
+      } else {
+#pragma omp parallel for collapse(3)
+        for (index_t b = 0; b < batch; ++b) {
+          for (index_t c = 0; c < channel; ++c) {
+            for (index_t i = 0; i < hw; ++i) {
+              const index_t idx0 = (b * channel + c) * hw + i;
+              const index_t idx1 = b * channel + c;
+              output[idx0] = input0[idx0] - input1[idx1];
+            }
          }
        }
      }
@@ -153,13 +167,26 @@ inline void TensorVector(const EltwiseType type,
      }
      break;
    case DIV:
+      if (swapped) {
 #pragma omp parallel for collapse(3)
-      for (index_t b = 0; b < batch; ++b) {
-        for (index_t c = 0; c < channel; ++c) {
-          for (index_t i = 0; i < hw; ++i) {
-            const index_t idx0 = (b * channel + c) * hw + i;
-            const index_t idx1 = b * channel + c;
-            output[idx0] = input0[idx0] / input1[idx1];
+        for (index_t b = 0; b < batch; ++b) {
+          for (index_t c = 0; c < channel; ++c) {
+            for (index_t i = 0; i < hw; ++i) {
+              const index_t idx0 = (b * channel + c) * hw + i;
+              const index_t idx1 = b * channel + c;
+              output[idx0] = input1[idx1] / input0[idx0];
+            }
+          }
+        }
+      } else {
+#pragma omp parallel for collapse(3)
+        for (index_t b = 0; b < batch; ++b) {
+          for (index_t c = 0; c < channel; ++c) {
+            for (index_t i = 0; i < hw; ++i) {
+              const index_t idx0 = (b * channel + c) * hw + i;
+              const index_t idx1 = b * channel + c;
+              output[idx0] = input0[idx0] / input1[idx1];
+            }
          }
        }
      }
@@ -283,12 +310,14 @@ struct EltwiseFunctor<DeviceType::CPU, float>: EltwiseFunctorBase {
                  const Tensor *input1,
                  Tensor *output,
                  StatsFuture *future) {
+    bool swapped = false;
    if (input1 != nullptr) {
      MACE_CHECK(input0->dim_size() == input1->dim_size())
        << "Inputs of Eltwise op must be same shape";
      if (input0->size() != input1->size()) {
        if (input0->size() < input1->size()) {
          std::swap(input0, input1);
+          swapped = true;
        }
        MACE_CHECK(input0->dim(0) == input1->dim(0) &&
            input0->dim(1) == input1->dim(1) &&
@@ -316,7 +345,7 @@ struct EltwiseFunctor<DeviceType::CPU, float>: EltwiseFunctorBase {
        const index_t channel = input0->dim(1);
        const index_t hw = input0->dim(2) * input0->dim(3);
        TensorVector(type_, input0_ptr, input1_ptr,
-                     batch, channel, hw, output_ptr);
+                     batch, channel, hw, swapped, output_ptr);
      } else {
        if (!coeff_.empty() && type_ == SUM) {
 #pragma omp parallel for

--- a/mace/kernels/opencl/cl/eltwise.cl
+++ b/mace/kernels/opencl/cl/eltwise.cl
@@ -45,11 +45,19 @@ __kernel void eltwise(KERNEL_ERROR_PARAMS
    out = in0 + in1;
  #endif
 #elif ELTWISE_TYPE == 1
-  out = in0 - in1;
+  #ifdef SWAPPED
+    out = in1 - in0;
+  #else
+    out = in0 - in1;
+  #endif
 #elif ELTWISE_TYPE == 2
  out = in0 * in1;
 #elif ELTWISE_TYPE == 3
-  out = in0 / in1;
+  #ifdef SWAPPED
+    out = in1 / in0;
+  #else
+    out = in0 / in1;
+  #endif
 #elif ELTWISE_TYPE == 4
  out = fmin(in0, in1);
 #elif ELTWISE_TYPE == 5

--- a/mace/kernels/opencl/eltwise_opencl.cc
+++ b/mace/kernels/opencl/eltwise_opencl.cc
@@ -25,12 +25,14 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
                                                       const Tensor *input1,
                                                       Tensor *output,
                                                       StatsFuture *future) {
+  bool swapped = false;
  if (input1 != nullptr) {
    MACE_CHECK(input0->dim_size() == input1->dim_size())
      << "Inputs of Eltwise op must be same shape";
    if (input0->size() != input1->size()) {
      if (input0->size() < input1->size()) {
        std::swap(input0, input1);
+        swapped = true;
      }
      MACE_CHECK(input0->dim(0) == input1->dim(0) &&
          input1->dim(1) == 1 &&
@@ -62,9 +64,10 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
    built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt));
    built_options.emplace(MakeString("-DELTWISE_TYPE=", type_));
    if (input1 == nullptr) {
-      built_options.emplace(MakeString("-DINPUT_TYPE=1"));
+      built_options.emplace("-DINPUT_TYPE=1");
    } else if (input0->size() != input1->size()) {
-      built_options.emplace(MakeString("-DINPUT_TYPE=2"));
+      built_options.emplace("-DINPUT_TYPE=2");
+      if (swapped) built_options.emplace("-DSWAPPED");
    }
    if (!coeff_.empty()) built_options.emplace("-DCOEFF_SUM");


--- a/mace/ops/eltwise_test.cc
+++ b/mace/ops/eltwise_test.cc
@@ -238,6 +238,12 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) {
                                              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
                                              {1, 1, 1, 5}, {1, 2, 3, 4, 5},
                                              {0, 0, 0, 0, 0, 5, 5, 5, 5, 5});
+  SimpleTensorEltwise<DeviceType::CPU, float>(kernels::EltwiseType::SUB,
+                                              {1, 1, 1, 5}, {1, 2, 3, 4, 5},
+                                              {1, 2, 1, 5},
+                                              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+                                              {0, 0, 0, 0, 0,
+                                               -5, -5, -5, -5, -5});
  SimpleTensorEltwise<DeviceType::CPU, float>(kernels::EltwiseType::PROD,
                                              {1, 1, 1, 3}, {1, 2, 3},
                                              {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6},
@@ -247,6 +253,11 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) {
                                              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
                                              {1, 1, 1, 5}, {1, 1, 1, 1, 5},
                                              {1, 2, 3, 4, 1, 6, 7, 8, 9, 2});
+  SimpleTensorEltwise<DeviceType::CPU, float>(kernels::EltwiseType::DIV,
+                                              {1, 1, 1, 5}, {1, 1, 1, 2, 4},
+                                              {1, 2, 1, 5},
+                                              {1, 1, 1, 2, 2, 1, 1, 1, 1, 1},
+                                              {1, 1, 1, 1, 2, 1, 1, 1, 2, 4});
  SimpleTensorEltwise<DeviceType::CPU, float>(kernels::EltwiseType::MIN,
                                              {1, 1, 1, 5}, {1, 2, 3, 4, 5},
                                              {1, 2, 1, 5},
@@ -276,6 +287,12 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorVector) {
      {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
      {1, 1, 1, 5}, {1, 2, 3, 4, 5},
      {0, 0, 0, 0, 0, 5, 5, 5, 5, 5});
+  SimpleTensorEltwise<DeviceType::OPENCL, float>(
+      kernels::EltwiseType::SUB,
+      {1, 1, 1, 5}, {1, 2, 3, 4, 5},
+      {1, 2, 1, 5},
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+      {0, 0, 0, 0, 0, -5, -5, -5, -5, -5});
  SimpleTensorEltwise<DeviceType::OPENCL, float>(
      kernels::EltwiseType::PROD,
      {1, 1, 1, 3}, {1, 2, 3},
@@ -286,6 +303,12 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorVector) {
      {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
      {1, 1, 1, 5}, {1, 1, 1, 1, 5},
      {1, 2, 3, 4, 1, 6, 7, 8, 9, 2});
+  SimpleTensorEltwise<DeviceType::OPENCL, float>(
+      kernels::EltwiseType::DIV,
+      {1, 1, 1, 5}, {1, 1, 1, 2, 4},
+      {1, 2, 1, 5},
+      {1, 1, 1, 2, 2, 1, 1, 1, 1, 1},
+      {1, 1, 1, 1, 2, 1, 1, 1, 2, 4});
  SimpleTensorEltwise<DeviceType::OPENCL, float>(
      kernels::EltwiseType::MIN,
      {1, 1, 1, 5}, {1, 2, 3, 4, 5},
@@ -530,6 +553,10 @@ TEST_F(EltwiseOpTest, RandomTensorVecFloat) {
                            {1, 32, 32, 16}, {1, 1, 1, 16});
  RandomTensorEltwise<float>(kernels::EltwiseType::SUB,
                            {5, 32, 32, 16}, {5, 1, 1, 16});
+  RandomTensorEltwise<float>(kernels::EltwiseType::SUB,
+                             {5, 1, 1, 16}, {5, 32, 32, 16});
+  RandomTensorEltwise<float>(kernels::EltwiseType::PROD,
+                             {1, 31, 37, 17}, {1, 1, 1, 17});
  RandomTensorEltwise<float>(kernels::EltwiseType::PROD,
                            {1, 1, 1, 17}, {1, 31, 37, 17});
  RandomTensorEltwise<float>(kernels::EltwiseType::DIV,
@@ -547,8 +574,12 @@ TEST_F(EltwiseOpTest, RandomTensorVecHalf) {
                           {1, 32, 32, 16}, {1, 1, 1, 16});
  RandomTensorEltwise<half>(kernels::EltwiseType::SUB,
                           {3, 32, 32, 16}, {3, 1, 1, 16});
+  RandomTensorEltwise<half>(kernels::EltwiseType::SUB,
+                            {3, 1, 1, 16}, {3, 32, 32, 16});
  RandomTensorEltwise<half>(kernels::EltwiseType::PROD,
                           {1, 1, 1, 17}, {1, 31, 37, 17});
+  RandomTensorEltwise<half>(kernels::EltwiseType::DIV,
+                            {5, 31, 37, 17}, {5, 1, 1, 17});
  RandomTensorEltwise<half>(kernels::EltwiseType::DIV,
                           {5, 1, 1, 17}, {5, 31, 37, 17});
  RandomTensorEltwise<half>(kernels::EltwiseType::MIN,