Added floordiv() operator (#336)

* Added floordiv() operator * Restored original indentation * Update eltwise.cl Update eltwise.cl according to the recent fix.

Added floordiv() operator (#336)
* Added floordiv() operator * Restored original indentation * Update eltwise.cl Update eltwise.cl according to the recent fix.
9d3e2cc5 · w-adamski · Liangliang He · e93296e0 · 9d3e2cc5 · 9d3e2cc5
9 changed file
--- a/docs/user_guide/op_lists.rst
+++ b/docs/user_guide/op_lists.rst
@@ -20,7 +20,7 @@ Operator lists
    "DEPTHWISE_CONV_2D","Y","Only multiplier = 1 is supported; Fusion is supported."
    "DEPTH_TO_SPACE","Y",""
    "DEQUANTIZE","Y","Model quantization will be supported later."
-    "ELEMENT_WISE","Y","ADD/MUL/DIV/MIN/MAX/NEG/ABS/SQR_DIFF/POW/RSQRT/SQRT/EQUAL"
+    "ELEMENT_WISE","Y","ADD/MUL/DIV/MIN/MAX/NEG/ABS/SQR_DIFF/POW/RSQRT/SQRT/EQUAL/FLOOR_DIV"
    "EMBEDDING_LOOKUP","Y",""
    "EXPANDDIMS","Y","Only CPU and TensorFlow is supported."
    "FILL","Y","Only CPU and TensorFlow is supported."

--- a/mace/ops/eltwise.cc
+++ b/mace/ops/eltwise.cc
@@ -136,6 +136,23 @@ inline void TensorGeneralBroadcastEltwise(
        }
      }
      break;
+    case FLOOR_DIV:
+      if (!swapped) {
+        for (index_t i = 0; i < output_size; ++i) {
+          const index_t idx0 = GetIndex(input0_shape, out_index);
+          const index_t idx1 = GetIndex(input1_shape, out_index);
+          output[i] = std::floor(input0[idx0] / input1[idx1]);
+          IncreaseIndex(output_shape, &out_index);
+        }
+      } else {
+        for (index_t i = 0; i < output_size; ++i) {
+          const index_t idx0 = GetIndex(input0_shape, out_index);
+          const index_t idx1 = GetIndex(input1_shape, out_index);
+          output[i] = std::floor(input1[idx1] / input0[idx0]);
+          IncreaseIndex(output_shape, &out_index);
+        }
+      }
+      break;
    case MIN:
      for (index_t i = 0; i < output_size; ++i) {
        const index_t idx0 = GetIndex(input0_shape, out_index);
@@ -270,6 +287,25 @@ inline void TensorBroadcastEltwise(const EltwiseType type,
        }
      }
      break;
+    case FLOOR_DIV:
+      if (!swapped) {
+#pragma omp parallel for collapse(2) schedule(runtime)
+        for (index_t d = 0; d < diff_size; ++d) {
+          for (index_t i = 0; i < common_size; ++i) {
+            output[i + d * common_size] =
+                std::floor(input0[i + d * common_size] / input1[i]);
+          }
+        }
+      } else {
+#pragma omp parallel for collapse(2) schedule(runtime)
+        for (index_t d = 0; d < diff_size; ++d) {
+          for (index_t i = 0; i < common_size; ++i) {
+            output[i + d * common_size] =
+                std::floor(input1[i] / input0[i + d * common_size]);
+          }
+        }
+      }
+      break;
    case MIN:
 #pragma omp parallel for collapse(2) schedule(runtime)
      for (index_t d = 0; d < diff_size; ++d) {
@@ -405,6 +441,19 @@ inline void TensorEltwise(const EltwiseType type,
        }
      }
      break;
+    case FLOOR_DIV:
+      if (!swapped) {
+#pragma omp parallel for schedule(runtime)
+        for (index_t i = 0; i < size; ++i) {
+          output[i] = std::floor(input0[i] / input1[i]);
+        }
+      } else {
+#pragma omp parallel for schedule(runtime)
+        for (index_t i = 0; i < size; ++i) {
+          output[i] = std::floor(input1[i] / input0[i]);
+        }
+      }
+      break;
    case MIN:
 #pragma omp parallel for schedule(runtime)
      for (index_t i = 0; i < size; ++i) {
@@ -525,6 +574,19 @@ inline void TensorScalarEltwise(const EltwiseType type,
        }
      }
      break;
+    case FLOOR_DIV:
+      if (!swapped) {
+#pragma omp parallel for schedule(runtime)
+        for (index_t i = 0; i < size; ++i) {
+          output[i] = std::floor(input0[i] / input1);
+        }
+      } else {
+#pragma omp parallel for schedule(runtime)
+        for (index_t i = 0; i < size; ++i) {
+          output[i] = std::floor(input1 / input0[i]);
+        }
+      }
+      break;
    case MIN:
 #pragma omp parallel for schedule(runtime)
      for (index_t i = 0; i < size; ++i) {
@@ -694,6 +756,33 @@ inline void TensorEltwisePerChannel(const EltwiseType type,
        }
      }
      break;
+    case FLOOR_DIV:
+      if (!swapped) {
+#pragma omp parallel for collapse(2) schedule(runtime)
+        for (index_t b = 0; b < batch0; ++b) {
+          for (index_t c = 0; c < channel; ++c) {
+            const T *in0_ptr = input0 + ((b * channel) + c) * image_size;
+            const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0);
+            DstType *out_ptr = output + ((b * channel) + c) * image_size;
+            for (index_t i = 0; i < image_size; ++i) {
+              out_ptr[i] = std::floor(in0_ptr[i] / in1_ptr[c]);
+            }
+          }
+        }
+      } else {
+#pragma omp parallel for collapse(2) schedule(runtime)
+        for (index_t b = 0; b < batch0; ++b) {
+          for (index_t c = 0; c < channel; ++c) {
+            const T *in0_ptr = input0 + ((b * channel) + c) * image_size;
+            const T *in1_ptr = input1 + (batch1 > 1 ? b * channel : 0);
+            DstType *out_ptr = output + ((b * channel) + c) * image_size;
+            for (index_t i = 0; i < image_size; ++i) {
+              out_ptr[i] = std::floor(in1_ptr[c] / in0_ptr[i]);
+            }
+          }
+        }
+      }
+      break;
    case MIN:
 #pragma omp parallel for collapse(2) schedule(runtime)
      for (index_t b = 0; b < batch0; ++b) {

--- a/mace/ops/eltwise.h
+++ b/mace/ops/eltwise.h
@@ -30,7 +30,8 @@ enum EltwiseType {
  SQR_DIFF = 8,
  POW = 9,
  EQUAL = 10,
-  NONE = 11,
+  FLOOR_DIV = 11,
+  NONE = 12,
 };
 inline bool IsLogicalType(EltwiseType type) { return type == EQUAL; }

--- a/mace/ops/eltwise_test.cc
+++ b/mace/ops/eltwise_test.cc
@@ -223,6 +223,10 @@ TEST_F(EltwiseOpTest, CPUSimpleScalarScalar) {
      ops::EltwiseType::PROD, 1, 2, 2);
  SimpleScalarScalar<DeviceType::CPU, float, float>(
      ops::EltwiseType::DIV, 1, 2, 0.5);
+  SimpleScalarScalar<DeviceType::CPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, 1, 2, 0);
+  SimpleScalarScalar<DeviceType::CPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, 1, -2, -1);
  SimpleScalarScalar<DeviceType::CPU, float, float>(
      ops::EltwiseType::MIN, 1, 2, 1);
  SimpleScalarScalar<DeviceType::CPU, float, float>(
@@ -249,6 +253,12 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorScalar) {
  SimpleTensorScalar<DeviceType::CPU, float, float>(
      ops::EltwiseType::DIV, {1, 1, 2, 3}, {2, 4, 6, 8, 10, 12}, 2,
      {1, 2, 3, 4, 5, 6});
+  SimpleTensorScalar<DeviceType::CPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, {1, 1, 2, 3}, {2, 4, 6, 8, 10, 12}, 3,
+      {0, 1, 2, 2, 3, 4});
+  SimpleTensorScalar<DeviceType::CPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, {1, 1, 2, 3}, {2, 4, 6, 8, 10, 12}, -3,
+      {-1, -2, -2, -3, -4, -4});
  SimpleTensorScalar<DeviceType::CPU, float, float>(
      ops::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 1,
      {1, 1, 1, 1, 1, 1});
@@ -281,6 +291,12 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorScalar) {
  SimpleTensorScalar<DeviceType::GPU, float, float>(
      ops::EltwiseType::DIV, {1, 1, 2, 3}, {2, 4, 6, 8, 10, 12}, 2,
      {1, 2, 3, 4, 5, 6});
+  SimpleTensorScalar<DeviceType::GPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, {1, 1, 2, 3}, {2, 4, 6, 8, 10, 12}, 3,
+      {0, 1, 2, 2, 3, 4});
+  SimpleTensorScalar<DeviceType::GPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, {1, 1, 2, 3}, {2, 4, 6, 8, 10, 12}, -3,
+      {-1, -2, -2, -3, -4, -4});
  SimpleTensorScalar<DeviceType::GPU, float, float>(
      ops::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, 1,
      {1, 1, 1, 1, 1, 1});
@@ -317,6 +333,22 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) {
  SimpleTensorEltwise<DeviceType::CPU, float, float>(
      ops::EltwiseType::DIV, {1, 1, 1, 5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5},
      {1, 1, 1, 2, 2, 1, 1, 1, 1, 1}, {1, 1, 1, 1, 2, 1, 1, 1, 2, 4});
+  SimpleTensorEltwise<DeviceType::CPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV,
+      {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+      {1, 1, 1, 5}, {2, 2, 2, 2, 3}, {0, 1, 1, 2, 1, 3, 3, 4, 4, 3});
+  SimpleTensorEltwise<DeviceType::CPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV,
+      {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+      {1, 1, 1, 5}, {-2, -2, -2, -2, -3},
+      {-1, -1, -2, -2, -2, -3, -4, -4, -5, -4});
+  SimpleTensorEltwise<DeviceType::CPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, {1, 1, 1, 5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5},
+      {2, 2, 2, 3, 3, 2, 2, 2, 2, 2}, {0, 0, 0, 0, 1, 0, 0, 0, 1, 2});
+  SimpleTensorEltwise<DeviceType::CPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, {1, 1, 1, 5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5},
+      {-2, -2, -2, -3, -3, -2, -2, -2, -2, -2},
+      {-1, -1, -1, -1, -2, -1, -1, -1, -1, -2});
  SimpleTensorEltwise<DeviceType::CPU, float, float>(
      ops::EltwiseType::MIN, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5},
      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5});
@@ -349,6 +381,21 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) {
  SimpleTensorEltwise<DeviceType::CPU, float, float>(
      ops::EltwiseType::DIV, {5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5},
      {1, 1, 1, 2, 2, 1, 1, 1, 1, 1}, {1, 1, 1, 1, 2, 1, 1, 1, 2, 4});
+  SimpleTensorEltwise<DeviceType::CPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV,
+      {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+      {5}, {2, 2, 2, 2, 3}, {0, 1, 1, 2, 1, 3, 3, 4, 4, 3});
+  SimpleTensorEltwise<DeviceType::CPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV,
+      {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+      {5}, {-2, -2, -2, -2, -3}, {-1, -1, -2, -2, -2, -3, -4, -4, -5, -4});
+  SimpleTensorEltwise<DeviceType::CPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, {5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5},
+      {2, 2, 2, 3, 3, 2, 2, 2, 2, 2}, {0, 0, 0, 0, 1, 0, 0, 0, 1, 2});
+  SimpleTensorEltwise<DeviceType::CPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, {5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5},
+      {-2, -2, -2, -3, -3, -2, -2, -2, -2, -2},
+      {-1, -1, -1, -1, -2, -1, -1, -1, -1, -2});
  SimpleTensorEltwise<DeviceType::CPU, float, float>(
      ops::EltwiseType::MIN, {5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5},
      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5});
@@ -382,6 +429,22 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorVector) {
  SimpleTensorEltwise<DeviceType::GPU, float, float>(
      ops::EltwiseType::DIV, {1, 1, 1, 5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5},
      {1, 1, 1, 2, 2, 1, 1, 1, 1, 1}, {1, 1, 1, 1, 2, 1, 1, 1, 2, 4});
+  SimpleTensorEltwise<DeviceType::GPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV,
+      {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+      {1, 1, 1, 5}, {2, 2, 2, 2, 3}, {0, 1, 1, 2, 1, 3, 3, 4, 4, 3});
+  SimpleTensorEltwise<DeviceType::GPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV,
+      {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+      {1, 1, 1, 5}, {-2, -2, -2, -2, -3},
+      {-1, -1, -2, -2, -2, -3, -4, -4, -5, -4});
+  SimpleTensorEltwise<DeviceType::GPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, {1, 1, 1, 5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5},
+      {2, 2, 2, 3, 3, 2, 2, 2, 2, 2}, {0, 0, 0, 0, 1, 0, 0, 0, 1, 2});
+  SimpleTensorEltwise<DeviceType::GPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, {1, 1, 1, 5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5},
+      {-2, -2, -2, -3, -3, -2, -2, -2, -2, -2},
+      {-1, -1, -1, -1, -2, -1, -1, -1, -1, -2});
  SimpleTensorEltwise<DeviceType::GPU, float, float>(
      ops::EltwiseType::MIN, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5},
      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5});
@@ -410,6 +473,12 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorTensor) {
  SimpleTensorEltwise<DeviceType::CPU, float, float>(
      ops::EltwiseType::DIV, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 2, 1, 3},
      {1, 2, 3, 4, 5, 6}, {1, 1, 1, 1, 1, 1});
+  SimpleTensorEltwise<DeviceType::CPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, {1, 2, 1, 3}, {2, 3, 4, 5, 6, 7},
+      {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {2, 1, 1, 1, 1, 1});
+  SimpleTensorEltwise<DeviceType::CPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, {1, 2, 1, 3}, {-2, -3, -4, -5, -6, -7},
+      {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {-2, -2, -2, -2, -2, -2});
  SimpleTensorEltwise<DeviceType::CPU, float, float>(
      ops::EltwiseType::MIN, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5},
      {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
@@ -442,6 +511,12 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorTensor) {
  SimpleTensorEltwise<DeviceType::GPU, float, float>(
      ops::EltwiseType::DIV, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 2, 1, 3},
      {1, 2, 3, 4, 5, 6}, {1, 1, 1, 1, 1, 1});
+  SimpleTensorEltwise<DeviceType::GPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, {1, 2, 1, 3}, {2, 3, 4, 5, 6, 7},
+      {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {2, 1, 1, 1, 1, 1});
+  SimpleTensorEltwise<DeviceType::GPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, {1, 2, 1, 3}, {-2, -3, -4, -5, -6, -7},
+      {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {-2, -2, -2, -2, -2, -2});
  SimpleTensorEltwise<DeviceType::GPU, float, float>(
      ops::EltwiseType::MIN, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5},
      {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
@@ -767,6 +842,12 @@ TEST_F(EltwiseOpTest, TensorGeneralBroadcastCPU) {
  TensorGeneralBroadcastEltwise<DeviceType::CPU, float, float>(
      ops::EltwiseType::DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1},
      {1, 2}, {1, 1, 2, 3}, {1, 2, 3, 2, 2.5, 3});
+  TensorGeneralBroadcastEltwise<DeviceType::CPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6},
+      {1, 1, 2, 1}, {2, 3}, {1, 1, 2, 3}, {0, 1, 1, 1, 1, 2});
+  TensorGeneralBroadcastEltwise<DeviceType::CPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6},
+      {1, 1, 2, 1}, {-2, -3}, {1, 1, 2, 3}, {-1, -1, -2, -2, -2, -2});
  TensorGeneralBroadcastEltwise<DeviceType::CPU, float, float>(
      ops::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1},
      {1, 2}, {1, 1, 2, 3}, {1, 1, 1, 2, 2, 2});
@@ -794,6 +875,12 @@ TEST_F(EltwiseOpTest, TensorGeneralBroadcastGPU) {
  TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
      ops::EltwiseType::DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1},
      {1, 2}, {1, 1, 2, 3}, {1, 2, 3, 2, 2.5, 3});
+  TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6},
+      {1, 1, 2, 1}, {2, 3}, {1, 1, 2, 3}, {0, 1, 1, 1, 1, 2});
+  TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6},
+      {1, 1, 2, 1}, {-2, -3}, {1, 1, 2, 3}, {-1, -1, -2, -2, -2, -2});
  TensorGeneralBroadcastEltwise<DeviceType::GPU, float, float>(
      ops::EltwiseType::MIN, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 1},
      {1, 2}, {1, 1, 2, 3}, {1, 1, 1, 2, 2, 2});

--- a/mace/ops/opencl/cl/eltwise.cl
+++ b/mace/ops/opencl/cl/eltwise.cl
@@ -81,13 +81,19 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS
  #else
    out = pow(in0, in1);
  #endif
+#elif ELTWISE_TYPE == 11
+  #ifdef SWAPPED
+    out = floor(in1 / in0);
+  #else
+    out = floor(in0 / in1);
+  #endif
 #endif
 #if ((INPUT_TYPE == 1 || INPUT_TYPE == 4) &&                            \
     (ELTWISE_TYPE == 0 || ELTWISE_TYPE == 1 || ELTWISE_TYPE == 4 ||    \
      ELTWISE_TYPE == 5 || ELTWISE_TYPE == 8 || ELTWISE_TYPE == 9)) ||  \
    ((INPUT_TYPE != 1 || INPUT_TYPE != 4) &&                            \
-     (ELTWISE_TYPE == 3 || ELTWISE_TYPE == 9))
+     (ELTWISE_TYPE == 3 || ELTWISE_TYPE == 9 || ELTWISE_TYPE == 11))
  const int remain_channel = channel - 4 * chan_idx;
  if (remain_channel < 4) {
    switch (remain_channel) {

--- a/mace/ops/scalar_math.cc
+++ b/mace/ops/scalar_math.cc
@@ -58,6 +58,10 @@ void ScalarEltwise(const T* in0,
      else
        out[0] = in0[0] / in1[0];
      break;
+    case FLOOR_DIV:
+      out[0] = std::floor(swapped ? in1[0] / in0[0] :
+                                    in0[0] / in1[0]);
+      break;
    case MIN:
      out[0] = std::min(in1[0], in0[0]);
      break;

--- a/mace/ops/scalar_math_test.cc
+++ b/mace/ops/scalar_math_test.cc
@@ -63,6 +63,10 @@ TEST_F(ScalarMathOpTest, SimpleCPU) {
      ops::EltwiseType::PROD, 3, -2, 3, -6);
  ScalarMathTest<DeviceType::CPU, float, float>(
      ops::EltwiseType::DIV, 3, -2, 1, -1.5);
+  ScalarMathTest<DeviceType::CPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, 3, -2, 1, -2);
+  ScalarMathTest<DeviceType::CPU, float, float>(
+      ops::EltwiseType::FLOOR_DIV, 3, 2, 1, 1);
  ScalarMathTest<DeviceType::CPU, float, float>(
      ops::EltwiseType::MIN, 3, -2, 1, -2);
  ScalarMathTest<DeviceType::CPU, float, float>(

--- a/mace/python/tools/converter_tool/base_converter.py
+++ b/mace/python/tools/converter_tool/base_converter.py
@@ -79,6 +79,7 @@ class EltwiseType(Enum):
    SQR_DIFF = 8
    POW = 9
    EQUAL = 10
+    FLOOR_DIV = 11
 class ReduceType(Enum):

--- a/mace/python/tools/converter_tool/tensorflow_converter.py
+++ b/mace/python/tools/converter_tool/tensorflow_converter.py
@@ -113,6 +113,7 @@ TFSupportedOps = [
    'ArgMax',
    'Split',
    'FakeQuantWithMinMaxVars',
+    'FloorDiv',
    'Sqrt',
 ]
@@ -185,6 +186,7 @@ class TensorflowConverter(base_converter.ConverterInterface):
        TFOpType.Abs.name: EltwiseType.ABS,
        TFOpType.Pow.name: EltwiseType.POW,
        TFOpType.RealDiv.name: EltwiseType.DIV,
+        TFOpType.FloorDiv.name: EltwiseType.FLOOR_DIV,
        TFOpType.SquaredDifference.name: EltwiseType.SQR_DIFF,
        TFOpType.Square.name: EltwiseType.POW,
        TFOpType.Rsqrt.name: EltwiseType.POW,
@@ -264,6 +266,7 @@ class TensorflowConverter(base_converter.ConverterInterface):
            TFOpType.ArgMax.name: self.convert_argmax,
            TFOpType.Split.name: self.convert_split,
            TFOpType.FakeQuantWithMinMaxVars.name: self.convert_fake_quantize,
+            TFOpType.FloorDiv.name: self.convert_elementwise,
            TFOpType.Sqrt.name: self.convert_elementwise,
        }
        self._option = option