add onnx clip

405c502a · liutuo · 52a379a2 · 405c502a · 405c502a · 405c502a
7 changed file
--- a/.gitignore
+++ b/.gitignore
@@ -27,3 +27,4 @@ examples/android/macelibrary/src/main/cpp/mace/
 examples/android/macelibrary/src/main/cpp/include/
 examples/android/macelibrary/src/main/cpp/lib/arm64-v8a/
 examples/android/macelibrary/src/main/jniLibs/arm64-v8a/
+
--- a/mace/ops/common/eltwise_type.h
+++ b/mace/ops/common/eltwise_type.h
@@ -31,7 +31,8 @@ enum EltwiseType {
  POW = 9,
  EQUAL = 10,
  FLOOR_DIV = 11,
-  NONE = 12,
+  CLIP = 12,
+  NONE = 13,
 };

 }  // namespace ops

--- a/mace/ops/eltwise.cc
+++ b/mace/ops/eltwise.cc
@@ -376,6 +376,15 @@ inline void TensorBroadcastEltwise(const OpContext *context,
          }
        }
        break;
+      case CLIP:
+        for (index_t d = start0; d < end0; d += step0) {
+          for (index_t i = start1; i < end1; i += step1) {
+            output[i + d * common_size] =
+                std::fmaxf(coeff[0],
+                           std::fminf(coeff[1], input0[i + d * common_size]));
+          }
+        }
+        break;
      default:LOG(FATAL) << "Eltwise op not support type " << type;
    }
  }, 0, diff_size, 1, 0, common_size, 1);
@@ -497,6 +506,11 @@ inline void TensorEltwise(const OpContext *context,
          output[i] = input0[i] == input1[i];
        }
        break;
+      case CLIP:
+        for (index_t i = start; i < end; i += step) {
+          output[i] = std::fmaxf(coeff[0], std::fminf(coeff[1], input0[i]));
+        }
+        break;
      default:LOG(FATAL) << "Eltwise op not support type " << type;
    }
  }, 0, size, 1);
@@ -617,7 +631,11 @@ inline void TensorScalarEltwise(const OpContext *context,
        for (index_t i = start; i < end; i += step) {
          output[i] = input0[i] == input1;
        }
-
+        break;
+      case CLIP:
+        for (index_t i = start; i < end; i += step) {
+          output[i] = std::fmaxf(coeff[0], std::fminf(coeff[1], input0[i]));
+        }
        break;
      default:LOG(FATAL) << "Eltwise op not support type " << type;
    }
@@ -886,6 +904,11 @@ class EltwiseOp : public Operation {
      input1 = &scalar_tensor_;
    }

+    if (type_ == CLIP) {
+      MACE_CHECK(coeff_.size() == 2 && coeff_[0] < coeff_[1],
+                 "Clip's min/max values are not correct.");
+    }
+
    if (IsLogicalType(type_)) {
      // as we do not have bool-type tensor, we use int type
      return DoEltwise<int32_t>(context, input0, input1, output);
@@ -1144,6 +1167,11 @@ class EltwiseOp<DeviceType::GPU, T> : public Operation {
        Operation::GetOptionalArg<int>(
            "type", static_cast<int>(ops::EltwiseType::NONE)));
    std::vector<float> coeff = Operation::GetRepeatedArgs<float>("coeff");
+    if (type == ops::EltwiseType::CLIP) {
+      MACE_CHECK(coeff.size() == 2 && coeff[0] < coeff[1],
+                 "Clip's min/max values are not correct.");
+    }
+
    float scalar_input = Operation::GetOptionalArg<float>("scalar_input", 1.0);
    int32_t scalar_input_index = Operation::GetOptionalArg<int32_t>(
            "scalar_input_index", 1);

--- a/mace/ops/opencl/cl/eltwise.cl
+++ b/mace/ops/opencl/cl/eltwise.cl
@@ -87,13 +87,15 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS
  #else
    out = floor(in0 / in1);
  #endif
+#elif ELTWISE_TYPE == 12
+  out = fmax(coeff0, fmin(coeff1, in0));
 #endif

 #if defined(NOT_DIVISIBLE_FOUR) &&                                       \
    ((ELTWISE_TYPE == 3 || ELTWISE_TYPE == 9 || ELTWISE_TYPE == 11)      \
     || ((defined(INPUT_SCALAR) || defined(INPUT_TENSOR_BC_CHAN)) &&     \
         (ELTWISE_TYPE == 0 || ELTWISE_TYPE == 1 || ELTWISE_TYPE == 4 || \
-          ELTWISE_TYPE == 5 || ELTWISE_TYPE == 8)))
+          ELTWISE_TYPE == 5 || ELTWISE_TYPE == 8 || ELTWISE_TYPE == 12)))
  const int remain_channel = channel - 4 * chan_idx;
  if (remain_channel < 4) {
    switch (remain_channel) {

--- a/mace/python/tools/converter_tool/base_converter.py
+++ b/mace/python/tools/converter_tool/base_converter.py
@@ -80,6 +80,7 @@ class EltwiseType(Enum):
    POW = 9
    EQUAL = 10
    FLOOR_DIV = 11
+    CLIP = 12


 class ReduceType(Enum):
@@ -273,6 +274,7 @@ class MaceKeyword(object):
    mace_exclusive_str = 'exclusive'
    mace_reverse_str = 'reverse'
    mace_const_data_num_arg_str = 'const_data_num'
+    mace_coeff_str = 'coeff'


 class TransformerRule(Enum):

--- a/mace/python/tools/converter_tool/onnx_converter.py
+++ b/mace/python/tools/converter_tool/onnx_converter.py
@@ -69,7 +69,7 @@ OnnxSupportedOps = [
    'BatchNorm',
    'Cast',
    # 'Ceil',
-    # 'Clip',
+    'Clip',
    # 'Compress',
    'Concat',
    # 'Constant',
@@ -300,6 +300,7 @@ class OnnxConverter(base_converter.ConverterInterface):
        OnnxOpType.Sqrt.name: EltwiseType.POW,
        OnnxOpType.Reciprocal.name: EltwiseType.POW,
        OnnxOpType.Scale.name: EltwiseType.PROD,
+        OnnxOpType.Clip.name: EltwiseType.CLIP,
    }

    reduce_type = {
@@ -331,6 +332,7 @@ class OnnxConverter(base_converter.ConverterInterface):
            OnnxOpType.BatchNormalization.name: self.convert_fused_batchnorm,
            OnnxOpType.BatchNorm.name: self.convert_fused_batchnorm,
            OnnxOpType.Cast.name: self.convert_cast,
+            OnnxOpType.Clip.name: self.convert_eltwise,
            OnnxOpType.Concat.name: self.convert_concat,
            OnnxOpType.Conv.name: self.convert_conv2d,
            OnnxOpType.ConvTranspose.name: self.convert_deconv,
@@ -862,6 +864,18 @@ class OnnxConverter(base_converter.ConverterInterface):
            value_arg = op.arg.add()
            value_arg.name = MaceKeyword.mace_scalar_input_str
            value_arg.f = value
+        elif node.op_type == OnnxOpType.Clip.name:
+            if 'min' in node.attrs:
+                min_value = node.attrs['min']
+            else:
+                min_value = np.finfo(np.float32).min
+            if 'max' in node.attrs:
+                max_value = node.attrs['max']
+            else:
+                max_value = np.finfo(np.float32).max
+            coeff_arg = op.arg.add()
+            coeff_arg.name = MaceKeyword.mace_coeff_str
+            coeff_arg.floats.extend([min_value, max_value])

    @staticmethod
    def copy_node_attr(op, node, attr_name, dtype=AttributeType.INT,

--- a/test/ccunit/mace/ops/eltwise_test.cc
+++ b/test/ccunit/mace/ops/eltwise_test.cc
@@ -114,7 +114,8 @@ void SimpleTensorEltwise(const ops::EltwiseType type,

  // Add input data
  net.AddInputFromArray<D, T>("Input0", shape0, input0);
-  net.AddInputFromArray<D, T>("Input1", shape1, input1);
+  if (shape1.size() > 0 && input1.size() > 0)
+    net.AddInputFromArray<D, T>("Input1", shape1, input1);

  if (D == DeviceType::CPU) {
    auto op_builder =
@@ -136,7 +137,7 @@ void SimpleTensorEltwise(const ops::EltwiseType type,
      net.TransformDataFormat<D, T>(
          "Input1", DataFormat::NHWC, "TInput1", DataFormat::NCHW);
      op_builder.Input("TInput1");
-    } else {
+    } else if (shape1.size() > 0) {
      op_builder.Input("Input1");
    }
    op_builder.Finalize(net.NewOperatorDef());
@@ -146,14 +147,15 @@ void SimpleTensorEltwise(const ops::EltwiseType type,
    net.TransformDataFormat<D, DstType>(
        "TOutput", DataFormat::NCHW, "Output", DataFormat::NHWC);
  } else {
-    OpDefBuilder("Eltwise", "EltwiseTest")
-        .Input("Input0")
-        .Input("Input1")
-        .AddIntArg("type", static_cast<int>(type))
-        .AddFloatsArg("coeff", coeff)
-        .Output("Output")
-        .Finalize(net.NewOperatorDef());
-
+    auto op_builder =
+        OpDefBuilder("Eltwise", "EltwiseTest")
+            .Input("Input0")
+            .AddIntArg("type", static_cast<int>(type))
+            .AddFloatsArg("coeff", coeff)
+            .Output("Output");
+    if (input1.size() > 0 && shape1.size() > 0)
+      op_builder.Input("Input1");
+    op_builder.Finalize(net.NewOperatorDef());
    // Run
    net.RunOp(D);
  }
@@ -500,6 +502,10 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorTensor) {
  SimpleTensorEltwise<DeviceType::CPU, int32_t, int32_t>(
      ops::EltwiseType::EQUAL, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6},
      {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 1, 1, 1, 1});
+  SimpleTensorEltwise<DeviceType::CPU, float, float>(
+      ops::EltwiseType::CLIP, {1, 2, 1, 5},
+      {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, {},
+      {}, {2, 2, 3, 3, 3, 2, 2, 3, 3, 3}, {2.0f, 3.0f});
 }
 TEST_F(EltwiseOpTest, GPUSimpleTensorTensor) {
  SimpleTensorEltwise<DeviceType::GPU, float, float>(
@@ -535,6 +541,10 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorTensor) {
      ops::EltwiseType::SQR_DIFF, {1, 2, 1, 5},
      {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, {1, 2, 1, 5},
      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 0, 0, 0, 0, 25, 25, 25, 25, 25});
+  SimpleTensorEltwise<DeviceType::GPU, float, float>(
+      ops::EltwiseType::CLIP, {1, 2, 1, 5},
+      {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, {},
+      {}, {2, 2, 3, 3, 3, 2, 2, 3, 3, 3}, {2.0f, 3.0f});
 }

 namespace {
@@ -912,6 +922,8 @@ TEST_F(EltwiseOpTest, RandomTensorTensorFloat) {
                             {3, 31, 37, 17});
  RandomTensorEltwise<float>(ops::EltwiseType::SQR_DIFF, {3, 31, 37, 17},
                             {3, 31, 37, 17});
+  RandomTensorEltwise<float>(ops::EltwiseType::CLIP, {3, 31, 37, 17},
+                             {3, 31, 37, 17}, {-0.2, 0.85});
 }

 TEST_F(EltwiseOpTest, RandomTensorTensorHalf) {
@@ -929,6 +941,8 @@ TEST_F(EltwiseOpTest, RandomTensorTensorHalf) {
                            {3, 31, 37, 17});
  RandomTensorEltwise<half>(ops::EltwiseType::SQR_DIFF, {3, 31, 37, 17},
                            {3, 31, 37, 17});
+  RandomTensorEltwise<half>(ops::EltwiseType::CLIP, {3, 31, 37, 17},
+                            {3, 31, 37, 17}, {-0.2, 0.85});
 }

 TEST_F(EltwiseOpTest, TensorGeneralBroadcastCPU) {