diff --git a/mace/python/tools/converter_tool/transformer.py b/mace/python/tools/converter_tool/transformer.py index 1083e23545767725e2f4e0d9c394d790fd5d0dd3..0ee51ab546929417d1d857cfbfef67cf522c3891 100644 --- a/mace/python/tools/converter_tool/transformer.py +++ b/mace/python/tools/converter_tool/transformer.py @@ -1563,6 +1563,7 @@ class Transformer(base_converter.ConverterInterface): else: non_zero = self._option.device == DeviceType.CPU.value quantized_tensor = quantize_util.quantize(tensor.float_data, + self._option.device, non_zero) tensor.data_type = mace_pb2.DT_UINT8 @@ -1587,7 +1588,8 @@ class Transformer(base_converter.ConverterInterface): def add_quantize_info(self, op, minval, maxval): scale, zero, minval, maxval = \ - quantize_util.adjust_range(minval, maxval, non_zero=False) + quantize_util.adjust_range(minval, maxval, self._option.device, + non_zero=False) quantize_info = op.quantize_info.add() quantize_info.minval = minval quantize_info.maxval = maxval @@ -1687,8 +1689,9 @@ class Transformer(base_converter.ConverterInterface): min_val, max_val = [float(i) for i in minmax.strip().split(",")] scale, zero, min_val, max_val = \ - quantize_util.adjust_range( - min_val, max_val, non_zero=False) + quantize_util.adjust_range(min_val, max_val, + self._option.device, + non_zero=False) activation_info = mace_pb2.QuantizeActivationInfo() activation_info.minval = min_val activation_info.maxval = max_val @@ -1703,9 +1706,8 @@ class Transformer(base_converter.ConverterInterface): mace_check(output in self._quantize_activation_info, "%s does not have quantize activation info" % op) - op.quantize_info.extend([ - self._quantize_activation_info[output] - for output in op.output]) + op.quantize_info.append( + self._quantize_activation_info[output]) if not self._option.quantize: return False @@ -1719,6 +1721,7 @@ class Transformer(base_converter.ConverterInterface): scale, zero, minval, maxval = \ quantize_util.adjust_range(input_node.range[0], input_node.range[1], + self._option.device, non_zero=False) quantize_info = \ mace_pb2.QuantizeActivationInfo() @@ -1995,7 +1998,7 @@ class Transformer(base_converter.ConverterInterface): if input_tensor in self._consts: const_tensor = self._consts[input_tensor] quantized_tensor = quantize_util.quantize( - const_tensor.float_data, non_zero) + const_tensor.float_data, self._option.device, non_zero) del const_tensor.float_data[:] const_tensor.int32_data.extend(quantized_tensor.data) const_tensor.data_type = mace_pb2.DT_UINT8 diff --git a/mace/python/tools/quantization/quantize_util.py b/mace/python/tools/quantization/quantize_util.py index 666b94bdf58e6311e50d5351df8b233a60f50922..b037d058940a178b31632bf953aa77d7d160476c 100644 --- a/mace/python/tools/quantization/quantize_util.py +++ b/mace/python/tools/quantization/quantize_util.py @@ -1,6 +1,8 @@ import numpy as np import math +from mace.python.tools.converter_tool.base_converter import DeviceType + class QuantizedData(object): def __init__(self): @@ -51,7 +53,10 @@ class QuantizedData(object): self._maxval = maxval -def adjust_range(in_min, in_max, non_zero): +def adjust_range(in_min, in_max, device, non_zero): + if device in [DeviceType.HEXAGON.value, DeviceType.HTA.value]: + return adjust_range_for_hexagon(in_min, in_max) + out_max = max(0.0, in_max) out_min = min(0.0, in_min) if non_zero: @@ -61,12 +66,33 @@ def adjust_range(in_min, in_max, non_zero): if out_min < -eps and out_max > eps: zero = -out_min / scale zero_int = int(round(zero)) - if abs(zero - zero_int) > eps: - if zero < zero_int or non_zero: - zero_int = int(math.ceil(zero)) - scale = out_max / (255.0 - zero_int) - else: - scale = -out_min / zero_int + if abs(zero - zero_int) > eps and non_zero: + zero_int = int(math.ceil(zero)) + elif out_min > -eps: + zero_int = 0 + else: + zero_int = 255 + + return scale, zero_int, -zero_int*scale, (255-zero_int)*scale + + +def adjust_range_for_hexagon(in_min, in_max): + out_max = max(0.0, in_max) + out_min = min(0.0, in_min) + scale = (out_max - out_min) / 255.0 + eps = 1e-6 + if out_min < -eps and out_max > eps: + zero = -out_min / scale + zero_int = int(round(zero)) + # if zero_int <=0 or >= 255, try to avoid divide by 0, + # else, try to make adjustment as small as possible + ceil = int(math.ceil(zero)) + keep_max = (ceil - zero) / out_max < (zero + 1 - ceil) / -out_min + if zero_int <= 0 or (zero_int < 254 and keep_max): + zero_int = ceil + scale = out_max / (255.0 - zero_int) + else: + scale = -out_min / zero_int elif out_min > -eps: zero_int = 0 else: @@ -108,11 +134,11 @@ def quantize_with_scale_and_zero(data, scale, zero): return quantized_data -def quantize(data, non_zero): +def quantize(data, device, non_zero): np_data = np.array(data).astype(float) in_min = np_data.min() in_max = np_data.max() - scale, zero, out_min, out_max = adjust_range(in_min, in_max, + scale, zero, out_min, out_max = adjust_range(in_min, in_max, device, non_zero=non_zero) output = np.clip((np.round(zero + data / scale).astype(np.int32)), 0, 255) diff --git a/mace/utils/quantize.h b/mace/utils/quantize.h index 7634833cc1e75763d79901f68b47f46705fa97db..30595046cabffc6d33a57803dcf59d638962a6d4 100644 --- a/mace/utils/quantize.h +++ b/mace/utils/quantize.h @@ -57,15 +57,8 @@ inline void AdjustRange(const float in_min_data, int32_t quantized_zero_near_int = static_cast(roundf(quantized_zero)); *zero_point = quantized_zero_near_int; - if (fabs(quantized_zero - quantized_zero_near_int) > kEps) { - if (quantized_zero < quantized_zero_near_int || non_zero) { - // keep out_max fixed, and move out_min - *zero_point = static_cast(std::ceil(quantized_zero)); - *scale = out_max / (quantized_max - *zero_point); - } else { - // keep out_min fixed, and move out_max - *scale = out_min / (quantized_min - *zero_point); - } + if (fabs(quantized_zero - quantized_zero_near_int) > kEps && non_zero) { + *zero_point = static_cast(std::ceil(quantized_zero)); } } else if (out_min > -kEps) { *zero_point = quantized_min;