Merge branch 'quantize' into 'master'

Move quantize info into op's output See merge request !790

Merge branch 'quantize' into 'master'
Move quantize info into op's output See merge request !790
9e79fef3 · 李滨 · af264bfb · 0c6ec590 · 9e79fef3 · 9e79fef3
13 changed file
--- a/mace/core/tensor.h
+++ b/mace/core/tensor.h
@@ -109,7 +109,9 @@ class Tensor {
        name_(""),
        is_weight_(is_weight),
        scale_(0.f),
-        zero_point_(0) {}
+        zero_point_(0),
+        minval_(0.f),
+        maxval_(0.f) {}
  Tensor(BufferBase *buffer, DataType dtype,
         bool is_weight = false)
@@ -120,7 +122,9 @@ class Tensor {
      name_(""),
      is_weight_(is_weight),
      scale_(0.f),
-      zero_point_(0) {}
+      zero_point_(0),
+      minval_(0.f),
+      maxval_(0.f) {}
  Tensor(const BufferSlice &buffer_slice,
         DataType dtype,
@@ -132,7 +136,9 @@ class Tensor {
        name_(""),
        is_weight_(is_weight),
        scale_(0.f),
-        zero_point_(0) {
+        zero_point_(0),
+        minval_(0.f),
+        maxval_(0.f) {
    buffer_ = &buffer_slice_;
  }
@@ -391,6 +397,15 @@ class Tensor {
    return zero_point_;
  }
+  // hexagon now uses min/max instead of scale and zero
+  inline float minval() const {
+    return minval_;
+  }
+  inline float maxval() const {
+    return maxval_;
+  }
  inline void SetScale(float scale) {
    scale_ = scale;
  }
@@ -403,6 +418,14 @@ class Tensor {
    is_weight_ = is_weight;
  }
+  inline void SetMinVal(float minval) {
+    minval_ = minval;
+  }
+  inline void SetMaxVal(float maxval) {
+    maxval_ = maxval;
+  }
 private:
  Allocator *allocator_;
  DataType dtype_;
@@ -416,6 +439,8 @@ class Tensor {
  bool is_weight_;
  float scale_;
  int32_t zero_point_;
+  float minval_;
+  float maxval_;
  MACE_DISABLE_COPY_AND_ASSIGN(Tensor);
 };

--- a/mace/core/workspace.cc
+++ b/mace/core/workspace.cc
@@ -178,16 +178,19 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def,
    if (status != MaceStatus::MACE_SUCCESS) return status;
  }
-  if (device_type == DeviceType::CPU && net_def.has_quantize_info()) {
+  if (device_type == DeviceType::CPU) {
-    for (const auto
+    for (const auto &op : net_def.op()) {
-          &activation_info: net_def.quantize_info().activation_info()) {
+      VLOG(2) << "Add quantize info for op: " << op.name();
-      if (HasTensor(activation_info.tensor_name())) {
+      MACE_CHECK(op.quantize_info().empty()
-        Tensor *tensor = GetTensor(activation_info.tensor_name());
+                     || op.quantize_info().size() == op.output().size(),
-        tensor->SetScale(activation_info.scale());
+                 "quantize info size must be equal to output size or empty");
-        tensor->SetZeroPoint(activation_info.zero_point());
+      for (int i = 0; i < op.quantize_info().size(); ++i) {
-      } else {
+        auto &quantize_info = op.quantize_info(i);
-        LOG(WARNING) << "Quantize info exists for non-existed tensor: "
+        Tensor *tensor = GetTensor(op.output(i));
-            << activation_info.tensor_name();
+        tensor->SetScale(quantize_info.scale());
+        tensor->SetZeroPoint(quantize_info.zero_point());
+        tensor->SetMinVal(quantize_info.minval());
+        tensor->SetMaxVal(quantize_info.maxval());
      }
    }
  }
@@ -233,8 +236,7 @@ MaceStatus Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
        std::unique_ptr<BufferBase> tensor_buf(
            new Buffer(GetCPUAllocator()));
        MACE_RETURN_IF_ERROR(tensor_buf->Allocate(
-            mem_block.x() * GetEnumTypeSize(dtype)
+            mem_block.x() + MACE_EXTRA_BUFFER_PAD_SIZE));
-                + MACE_EXTRA_BUFFER_PAD_SIZE));
        preallocated_allocator_.SetBuffer(mem_block.mem_id(),
                                          std::move(tensor_buf));
      } else if (mem_block.mem_type() == MemoryType::GPU_IMAGE) {

--- a/mace/kernels/conv_2d.h
+++ b/mace/kernels/conv_2d.h
@@ -879,6 +879,17 @@ struct Conv2dFunctor<DeviceType::CPU, uint8_t> : Conv2dFunctorBase {
    const index_t depth = input_channels * filter_h * filter_w;
    const index_t columns = batch * height * width;
+    VLOG(2) << "input scale/zero: " << input->scale() << ", "
+            << input->zero_point();
+    VLOG(2) << "filter scale/zero: " << filter->scale() << ", "
+            << filter->zero_point();
+    if (bias) {
+      VLOG(2) << "bias scale/zero: " << bias->scale() << ", "
+              << bias->zero_point();
+    }
+    VLOG(2) << "output scale/zero: " << output->scale() << ", "
+            << output->zero_point();
    MACE_CHECK(filter->dim(0) == channels, filter->dim(0), " != ", channels);
    MACE_CHECK(filter->dim(3) == input_channels, filter->dim(3), " != ",
               input_channels);

--- a/mace/proto/mace.proto
+++ b/mace/proto/mace.proto
@@ -59,6 +59,13 @@ message OutputShape {
  repeated int64 dims = 1;
 }
+message QuantizeActivationInfo {
+  optional float scale = 1;
+  optional int32 zero_point = 2;
+  optional float minval = 3;  // hexagon uses min/max
+  optional float maxval = 4;
+}
 message OperatorDef {
  repeated string input = 1;
  repeated string output = 2;
@@ -67,6 +74,7 @@ message OperatorDef {
  repeated Argument arg = 5;
  repeated OutputShape output_shape = 6;
  repeated DataType output_type = 7;
+  repeated QuantizeActivationInfo quantize_info = 8;
  repeated int32 mem_id = 10;
@@ -106,23 +114,12 @@ message OutputInfo {
  optional DataType data_type = 5 [default = DT_FLOAT];
 }
-message QuantizeActivationInfo {
-  optional string tensor_name = 1;
-  optional float scale = 2;
-  optional int32 zero_point = 3;
-}
-message QuantizeInfo {
-  repeated QuantizeActivationInfo activation_info = 1;
-}
 message NetDef {
  optional string name = 1;
  repeated OperatorDef op = 2;
  optional string version = 3;
  repeated Argument arg = 4;
  repeated ConstTensor tensors = 5;
-  optional QuantizeInfo quantize_info = 6;
  // for mem optimization
  optional MemoryArena mem_arena = 10;

--- a/mace/python/tools/converter.py
+++ b/mace/python/tools/converter.py
@@ -64,6 +64,10 @@ def parse_int_array_from_str(ints_str):
    return [int(int_str) for int_str in ints_str.split(',')]
+def parse_float_array_from_str(ints_str):
+    return [float(int_str) for int_str in ints_str.split(',')]
 def main(unused_args):
    if not os.path.isfile(FLAGS.model_file):
        print("Input graph file '" + FLAGS.model_file + "' does not exist!")
@@ -105,12 +109,18 @@ def main(unused_args):
    input_node_names = FLAGS.input_node.split(',')
    input_node_shapes = FLAGS.input_shape.split(':')
+    if FLAGS.input_range:
+        input_node_ranges = FLAGS.input_range.split(':')
+    else:
+        input_node_ranges = []
    if len(input_node_names) != len(input_node_shapes):
        raise Exception('input node count and shape count do not match.')
    for i in xrange(len(input_node_names)):
        input_node = cvt.NodeInfo()
        input_node.name = input_node_names[i]
        input_node.shape = parse_int_array_from_str(input_node_shapes[i])
+        if len(input_node_ranges) > i:
+            input_node.range = parse_float_array_from_str(input_node_ranges[i])
        option.add_input_node(input_node)
    output_node_names = FLAGS.output_node.split(',')
@@ -276,6 +286,8 @@ def parse_args():
        "--dsp_mode", type=int, default=0, help="dsp run mode, defalut=0")
    parser.add_argument(
        "--input_shape", type=str, default="", help="input shape.")
+    parser.add_argument(
+        "--input_range", type=str, default="", help="input range.")
    parser.add_argument(
        "--platform", type=str, default="tensorflow", help="tensorflow/caffe")
    parser.add_argument(

--- a/mace/python/tools/converter_tool/base_converter.py
+++ b/mace/python/tools/converter_tool/base_converter.py
@@ -205,6 +205,8 @@ class TransformerRule(Enum):
    QUANTIZE_WEIGHTS = 25
    TRANSFORM_LSTMCELL_ZEROSTATE = 26
    TRANSFORM_BASIC_LSTMCELL = 27
+    TRANSFORM_FAKE_QUANTIZE = 28
+    CHECK_QUANTIZE_INFO = 29
 class ConverterInterface(object):
@@ -220,6 +222,7 @@ class NodeInfo(object):
    def __init__(self):
        self._name = None
        self._shape = []
+        self._range = [-1.0, 1.0]
    @property
    def name(self):
@@ -229,6 +232,10 @@ class NodeInfo(object):
    def shape(self):
        return self._shape
+    @property
+    def range(self):
+        return self._range
    @name.setter
    def name(self, name):
        self._name = name
@@ -237,6 +244,10 @@ class NodeInfo(object):
    def shape(self, shape):
        self._shape = shape
+    @range.setter
+    def range(self, range):
+        self._range = range
    def __str__(self):
        return '%s %s' % (self._name, str(self._shape))
@@ -341,6 +352,7 @@ class ConverterOption(object):
        else:
            self._transformer_option = [
                # Model structure related transformation
+                TransformerRule.TRANSFORM_FAKE_QUANTIZE,
                TransformerRule.REMOVE_IDENTITY_OP,
                TransformerRule.TRANSFORM_GLOBAL_POOLING,
                TransformerRule.TRANSFORM_LSTMCELL_ZEROSTATE,
@@ -370,15 +382,17 @@ class ConverterOption(object):
                # Transform finalization
                TransformerRule.ADD_MACE_INPUT_AND_OUTPUT_NODES,
                # for quantization entropy calibration use
-                TransformerRule.ADD_QUANTIZE_TENSOR_RANGE,
                TransformerRule.SORT_BY_EXECUTION,
+                # Need to be put after SORT_BY_EXECUTION
+                TransformerRule.ADD_QUANTIZE_TENSOR_RANGE,
            ]
            if self._quantize:
-                self._transformer_option = self._transformer_option[:-1] + [
+                self._transformer_option = self._transformer_option + [
+                    # need to be put after ADD_QUANTIZE_TENSOR_RANGE
                    TransformerRule.QUANTIZE_NODES,
-                    TransformerRule.ADD_QUANTIZE_TENSOR_RANGE,
                    TransformerRule.QUANTIZE_WEIGHTS,
                    TransformerRule.SORT_BY_EXECUTION,
+                    TransformerRule.CHECK_QUANTIZE_INFO,
                ]

--- a/mace/python/tools/converter_tool/tensorflow_converter.py
+++ b/mace/python/tools/converter_tool/tensorflow_converter.py
@@ -104,6 +104,7 @@ TFSupportedOps = [
    'Cast',
    'ArgMax',
    'Split',
+    'FakeQuantWithMinMaxVars',
 ]
 TFOpType = Enum('TFOpType', [(op, op) for op in TFSupportedOps], type=str)
@@ -209,6 +210,7 @@ class TensorflowConverter(base_converter.ConverterInterface):
            TFOpType.Cast.name: self.convert_cast,
            TFOpType.ArgMax.name: self.convert_argmax,
            TFOpType.Split.name: self.convert_split,
+            TFOpType.FakeQuantWithMinMaxVars.name: self.convert_fake_quantize,
        }
        self._option = option
        self._mace_net_def = mace_pb2.NetDef()
@@ -874,3 +876,21 @@ class TensorflowConverter(base_converter.ConverterInterface):
        num_split_arg.i = tf_op.get_attr('num_split')
        self._skip_tensor.add(tf_op.inputs[0].name)
+    def convert_fake_quantize(self, tf_op):
+        op = self.convert_general_op(tf_op)
+        min_arg = op.arg.add()
+        min_arg.name = 'min'
+        min_arg.f = tf_op.inputs[1].eval()
+        max_arg = op.arg.add()
+        max_arg.name = 'max'
+        max_arg.f = tf_op.inputs[2].eval()
+        narrow_range_arg = op.arg.add()
+        narrow_range_arg.name = 'narrow_range'
+        narrow_range_arg.i = int(tf_op.get_attr('narrow_range'))
+        num_bits_arg = op.arg.add()
+        num_bits_arg.name = 'num_bits'
+        num_bits_arg.i = int(tf_op.get_attr('num_bits'))
+        self._skip_tensor.add(tf_op.inputs[1].name)
+        self._skip_tensor.add(tf_op.inputs[2].name)
--- a/mace/python/tools/converter_tool/transformer.py
+++ b/mace/python/tools/converter_tool/transformer.py
@@ -47,6 +47,8 @@ class Transformer(base_converter.ConverterInterface):
        # Dependencies
        # (TRANSFORM_MATMUL_TO_FC, TRANSFORM_GLOBAL_CONV_TO_FC) -> RESHAPE_FC_WEIGHT  # noqa
        self._registered_transformers = {
+            TransformerRule.TRANSFORM_FAKE_QUANTIZE:
+                self.transform_fake_quantize,
            TransformerRule.REMOVE_IDENTITY_OP: self.remove_identity_op,
            TransformerRule.TRANSFORM_GLOBAL_POOLING:
                self.transform_global_pooling,
@@ -91,6 +93,8 @@ class Transformer(base_converter.ConverterInterface):
            TransformerRule.ADD_MACE_INPUT_AND_OUTPUT_NODES:
                self.add_mace_input_and_output_nodes,
            TransformerRule.SORT_BY_EXECUTION: self.sort_by_execution,
+            TransformerRule.CHECK_QUANTIZE_INFO:
+                self.check_quantize_info,
        }
        self._option = option
@@ -774,16 +778,22 @@ class Transformer(base_converter.ConverterInterface):
    def transform_add_to_biasadd(self):
        net = self._model
        for op in net.op:
-            if op.type == 'Add' \
+            if (op.type == 'Eltwise'
-                    and len(op.input) == 2 \
+                    and ConverterUtil.get_arg(op, MaceKeyword.mace_element_type_str).i == EltwiseType.SUM.value  # noqa
-                    and op.input[1] in self._consts \
+                    and len(op.input) == 2
-                    and len(self._consts[op.input[1]].dims) == 1:
+                    and op.input[1] in self._consts
+                    and len(self._consts[op.input[1]].dims) == 1):
                print("Transform add to biasadd: %s(%s)" % (op.name, op.type))
                op.type = MaceOp.BiasAdd.name
                return True
        return False
+    def replace_quantize_info(self, op, replace_op):
+        if len(replace_op.quantize_info) > 0:
+            del op.quantize_info[:]
+            op.quantize_info.extend(replace_op.quantize_info)
    def fold_biasadd(self):
        net = self._model
        for op in net.op:
@@ -799,6 +809,7 @@ class Transformer(base_converter.ConverterInterface):
                if consumer_op.type == MaceOp.BiasAdd.name:
                    print("Fold biasadd: %s(%s)" % (op.name, op.type))
                    op.input.append(consumer_op.input[1])
+                    self.replace_quantize_info(op, consumer_op)
                    self.safe_remove_node(consumer_op, op)
                    return True
@@ -886,6 +897,7 @@ class Transformer(base_converter.ConverterInterface):
                                or arg.name == MaceKeyword.mace_activation_max_limit_str:  # noqa
                            op.arg.extend([arg])
+                    self.replace_quantize_info(op, consumer_op)
                    self.safe_remove_node(consumer_op, op)
                    return True
@@ -1163,7 +1175,8 @@ class Transformer(base_converter.ConverterInterface):
        transposed_filter = set()
        transposed_deconv_filter = set()
-        if self._option.quantize:
+        if self._option.quantize and \
+                self._option.device == DeviceType.CPU.value:
            print("Transpose filters to OHWI")
            if filter_format == FilterFormat.HWIO:
                transpose_order = [3, 0, 1, 2]
@@ -1601,6 +1614,9 @@ class Transformer(base_converter.ConverterInterface):
        return False
    def quantize_nodes(self):
+        if not self._option.quantize:
+            return False
        print("Add mace quantize and dequantize nodes")
        for op in self._model.op:
@@ -1647,28 +1663,13 @@ class Transformer(base_converter.ConverterInterface):
        self._input_output_added = True
-    def add_quantize_tensor_range(self):
+        return False
-        print("Add quantize tensor range")
-        net = self._model
-        range_file = self._option.quantize_range_file
-        if not range_file:
-            return
-        with open(range_file) as f:
-            for line in f:
-                tensor_name, minmax = line.split("@@")
-                min_val, max_val = [float(i) for i in
-                                    minmax.strip().split(",")]
-                scale, zero = quantize_util.adjust_range(min_val, max_val,
-                                                         non_zero=False)
-                activation_info = net.quantize_info.activation_info.add()
-                activation_info.tensor_name = tensor_name
-                activation_info.scale = scale
-                activation_info.zero_point = zero
-                self._quantize_activation_info[tensor_name] = activation_info
    def quantize_tensor(self, tensor):
        """Assume biasadd has been already folded with convolution and fc"""
+        if not self._option.quantize:
+            return False
        if tensor.data_type == mace_pb2.DT_FLOAT:
            ops = self._consumers.get(tensor.name, None)
            if len(ops) == 1 and ops[0].type in [MaceOp.Conv2D.name,
@@ -1698,8 +1699,131 @@ class Transformer(base_converter.ConverterInterface):
            tensor.zero_point = quantized_tensor.zero
            self._quantized_tensor.update([tensor.name])
+        return False
    def quantize_weights(self):
        print("Quantize weights")
        net = self._model
        for tensor in net.tensors:
            self.quantize_tensor(tensor)
+        return False
+    def add_quantize_info(self, op, minval, maxval):
+        scale, zero = quantize_util.adjust_range(minval, maxval,
+                                                 non_zero=False)
+        quantize_info = op.quantize_info.add()
+        quantize_info.minval = minval
+        quantize_info.maxval = maxval
+        quantize_info.scale = scale
+        quantize_info.zero_point = zero
+        return quantize_info
+    def transform_fake_quantize(self):
+        if not self._option.quantize:
+            return False
+        # Quantize info from fixpoint fine tune
+        print("Transform fake quantize")
+        range_file = self._option.quantize_range_file
+        if range_file:
+            return
+        net = self._model
+        for op in net.op:
+            if op.type == 'FakeQuantWithMinMaxVars':
+                producer_op = self._producer[op.input[0]]
+                minval = ConverterUtil.get_arg(op, 'min').f
+                maxval = ConverterUtil.get_arg(op, 'max').f
+                quantize_info = \
+                    self.add_quantize_info(producer_op, minval, maxval)
+                self._quantize_activation_info[op.input[0]] = quantize_info
+                op.type = MaceOp.Identity.name
+        return False
+    def add_quantize_tensor_range(self):
+        if not self._option.quantize:
+            return False
+        # Quantize info from range statistics
+        print("Add quantize tensor range")
+        range_file = self._option.quantize_range_file
+        if range_file:
+            with open(range_file) as f:
+                for line in f:
+                    tensor_name, minmax = line.split("@@")
+                    min_val, max_val = [float(i) for i in
+                                        minmax.strip().split(",")]
+                    scale, zero = quantize_util.adjust_range(min_val, max_val,
+                                                             non_zero=False)
+                    activation_info = mace_pb2.QuantizeActivationInfo()
+                    activation_info.minval = min_val
+                    activation_info.maxval = max_val
+                    activation_info.scale = scale
+                    activation_info.zero_point = zero
+                    self._quantize_activation_info[tensor_name] = activation_info  # noqa
+            for op in self._model.op:
+                if op.name.find(MaceKeyword.mace_output_node_name) >= 0:
+                    continue
+                for output in op.output:
+                    mace_check(output in self._quantize_activation_info,
+                               "%s does not have quantize activation info"
+                               % op)
+                    op.quantize_info.extend([
+                        self._quantize_activation_info[output]
+                        for output in op.output])
+        print ("Add default quantize info for ops like Pooling, Softmax")
+        for op in self._model.op:
+            if op.type in [MaceOp.Pooling.name,
+                           MaceOp.Squeeze.name,
+                           MaceOp.Concat.name,
+                           MaceOp.ResizeBilinear.name,
+                           MaceOp.BatchToSpaceND.name,
+                           MaceOp.SpaceToBatchND.name]:
+                del op.quantize_info[:]
+                producer_op = self._producer[op.input[0]]
+                quantize_info = op.quantize_info.add()
+                quantize_info.minval = producer_op.quantize_info[0].minval
+                quantize_info.maxval = producer_op.quantize_info[0].maxval
+                quantize_info.scale = producer_op.quantize_info[0].scale
+                quantize_info.zero_point = \
+                    producer_op.quantize_info[0].zero_point
+                self._quantize_activation_info[op.output[0]] = quantize_info
+            elif op.type == MaceOp.Softmax.name:
+                del op.quantize_info[:]
+                quantize_info = \
+                    self.add_quantize_info(op, 0.0, 1.0)
+                self._quantize_activation_info[op.output[0]] = quantize_info
+        print ("Add default quantize info for input")
+        for input_node in self._option.input_nodes.values():
+            if input_node.name not in self._quantize_activation_info:
+                print("Input range %s: %s" % (input_node.name,
+                                              str(input_node.range)))
+                scale, zero = quantize_util.adjust_range(input_node.range[0],
+                                                         input_node.range[1],
+                                                         non_zero=False)
+                quantize_info = mace_pb2.QuantizeActivationInfo()
+                quantize_info.minval = input_node.range[0]
+                quantize_info.maxval = input_node.range[1]
+                quantize_info.scale = scale
+                quantize_info.zero_point = zero
+                self._quantize_activation_info[input_node.name] = quantize_info
+        return False
+    def check_quantize_info(self):
+        if not self._option.quantize:
+            return False
+        for op in self._model.op:
+            if (op.name.find(MaceKeyword.mace_input_node_name) == -1
+                and op.name.find(MaceKeyword.mace_output_node_name) == -1
+                and op.type != MaceOp.Quantize.name
+                and op.type != MaceOp.Dequantize.name):  # noqa
+                mace_check(len(op.output) == len(op.quantize_info),
+                           "missing quantize info: %s" % op)
--- a/mace/python/tools/memory_optimizer.py
+++ b/mace/python/tools/memory_optimizer.py
@@ -80,9 +80,13 @@ class MemoryOptimizer(object):
    def op_need_optimize_memory(self, op):
        return True
-    def get_op_mem_block(self, op_type, output_shape):
+    def get_op_mem_block(self, op_type, output_shape, output_type):
+        data_type_size = 4
+        if output_type == mace_pb2.DT_UINT8:
+            data_type_size = 1
        return MemoryBlock(mace_pb2.CPU_BUFFER,
-                           [reduce(operator.mul, output_shape, 1)])
+                           [reduce(operator.mul, output_shape, 1) *
+                            data_type_size])
    def mem_size(self, memory_block):
        return memory_block.block[0]
@@ -143,9 +147,13 @@ class MemoryOptimizer(object):
                    # make these ops reuse memory of input tensor
                    mem_id = self.op_mem.get(op.input[0], -1)
                else:
+                    output_type = mace_pb2.DT_FLOAT
+                    if len(op.output_type) > i:
+                        output_type = op.output_type[i]
                    op_mem_block = self.get_op_mem_block(
                        op.type,
-                        op.output_shape[i].dims)
+                        op.output_shape[i].dims,
+                        output_type)
                    mem_id = -1
                    if len(self.idle_mem) > 0:
                        best_mem_add_size = sys.maxint
@@ -221,7 +229,7 @@ class GPUMemoryOptimizer(MemoryOptimizer):
                    return False
        return op.type != 'ImageToBuffer'
-    def get_op_mem_block(self, op_type, output_shape):
+    def get_op_mem_block(self, op_type, output_shape, output_type):
        if op_type == 'WinogradTransform' or op_type == 'MatMul':
            buffer_shape = list(output_shape) + [1]
            mem_block = MemoryBlock(

--- a/mace/python/tools/model.jinja2
+++ b/mace/python/tools/model.jinja2
@@ -138,20 +138,6 @@ void CreateMemoryArena(mace::MemoryArena *mem_arena) {
 }
 {% endif %}
-void AddQuantizeInfo(NetDef *net_def) {
-  MACE_LATENCY_LOGGER(1, "Add quantize info");
-  (void) net_def;
-  {% for i in range(net.quantize_info.activation_info|length) %}
-  mace::QuantizeActivationInfo *activation_info{{i}} =
-      net_def->mutable_quantize_info()->add_activation_info();
-  activation_info{{i}}->set_tensor_name("{{net.quantize_info.activation_info[i].tensor_name}}");
-  activation_info{{i}}->set_scale({{net.quantize_info.activation_info[i].scale}});
-  activation_info{{i}}->set_zero_point({{net.quantize_info.activation_info[i].zero_point}});
-  {% endfor %}
-}
 }  // namespace
 namespace {{tag}} {
@@ -179,8 +165,6 @@ const std::shared_ptr<NetDef> CreateNet() {
  CreateOutputInfo(net_def.get());
  {% endif %}
-  AddQuantizeInfo(net_def.get());
  return net_def;
 }

--- a/mace/python/tools/operator.jinja2
+++ b/mace/python/tools/operator.jinja2
@@ -122,6 +122,16 @@ void CreateOperator{{i}}(mace::OperatorDef *op) {
          {{ net.op[i].node_id }},
          { {{ net.op[i].mem_id | join(', ') }} });
+  op->mutable_quantize_info()->Reserve({{ net.op[i].quantize_info | length }});
+  {% for j in range(net.op[i].quantize_info|length) %}
+    auto quantize_info{{j}} = op->add_quantize_info();
+    quantize_info{{j}}->set_scale({{ net.op[i].quantize_info[j].scale }});
+    quantize_info{{j}}->set_zero_point({{ net.op[i].quantize_info[j].zero_point }});
+    quantize_info{{j}}->set_minval({{ net.op[i].quantize_info[j].minval }});
+    quantize_info{{j}}->set_maxval({{ net.op[i].quantize_info[j].maxval }});
+  {% endfor %}
  {% if runtime == 'dsp' %}
    op->set_padding({{ net.op[i].padding }});
    {% if net.op[i].node_input | length > 0 %}

--- a/tools/converter.py
+++ b/tools/converter.py
@@ -760,6 +760,7 @@ def convert_model(configs):
            runtime,
            model_name,
            ":".join(subgraphs[0][YAMLKeyword.input_shapes]),
+            ":".join(subgraphs[0][YAMLKeyword.input_ranges]),
            model_config[YAMLKeyword.nnlib_graph_mode],
            embed_model_data,
            model_config[YAMLKeyword.winograd],

--- a/tools/sh_commands.py
+++ b/tools/sh_commands.py
@@ -551,6 +551,7 @@ def gen_model_code(model_codegen_dir,
                   runtime,
                   model_tag,
                   input_shapes,
+                   input_ranges,
                   dsp_mode,
                   embed_model_data,
                   winograd,
@@ -579,6 +580,7 @@ def gen_model_code(model_codegen_dir,
              "--template=%s" % "mace/python/tools",
              "--model_tag=%s" % model_tag,
              "--input_shape=%s" % input_shapes,
+              "--input_range=%s" % input_ranges,
              "--dsp_mode=%s" % dsp_mode,
              "--embed_model_data=%s" % embed_model_data,
              "--winograd=%s" % winograd,