提交 93df7ac5 编写于 作者: 李寅

Merge branch 'quantize' into 'master'

Fix quantize

See merge request !1053
...@@ -1570,6 +1570,7 @@ class Transformer(base_converter.ConverterInterface): ...@@ -1570,6 +1570,7 @@ class Transformer(base_converter.ConverterInterface):
else: else:
non_zero = self._option.device == DeviceType.CPU.value non_zero = self._option.device == DeviceType.CPU.value
quantized_tensor = quantize_util.quantize(tensor.float_data, quantized_tensor = quantize_util.quantize(tensor.float_data,
self._option.device,
non_zero) non_zero)
tensor.data_type = mace_pb2.DT_UINT8 tensor.data_type = mace_pb2.DT_UINT8
...@@ -1594,7 +1595,8 @@ class Transformer(base_converter.ConverterInterface): ...@@ -1594,7 +1595,8 @@ class Transformer(base_converter.ConverterInterface):
def add_quantize_info(self, op, minval, maxval): def add_quantize_info(self, op, minval, maxval):
scale, zero, minval, maxval = \ scale, zero, minval, maxval = \
quantize_util.adjust_range(minval, maxval, non_zero=False) quantize_util.adjust_range(minval, maxval, self._option.device,
non_zero=False)
quantize_info = op.quantize_info.add() quantize_info = op.quantize_info.add()
quantize_info.minval = minval quantize_info.minval = minval
quantize_info.maxval = maxval quantize_info.maxval = maxval
...@@ -1694,8 +1696,9 @@ class Transformer(base_converter.ConverterInterface): ...@@ -1694,8 +1696,9 @@ class Transformer(base_converter.ConverterInterface):
min_val, max_val = [float(i) for i in min_val, max_val = [float(i) for i in
minmax.strip().split(",")] minmax.strip().split(",")]
scale, zero, min_val, max_val = \ scale, zero, min_val, max_val = \
quantize_util.adjust_range( quantize_util.adjust_range(min_val, max_val,
min_val, max_val, non_zero=False) self._option.device,
non_zero=False)
activation_info = mace_pb2.QuantizeActivationInfo() activation_info = mace_pb2.QuantizeActivationInfo()
activation_info.minval = min_val activation_info.minval = min_val
activation_info.maxval = max_val activation_info.maxval = max_val
...@@ -1710,9 +1713,8 @@ class Transformer(base_converter.ConverterInterface): ...@@ -1710,9 +1713,8 @@ class Transformer(base_converter.ConverterInterface):
mace_check(output in self._quantize_activation_info, mace_check(output in self._quantize_activation_info,
"%s does not have quantize activation info" "%s does not have quantize activation info"
% op) % op)
op.quantize_info.extend([ op.quantize_info.append(
self._quantize_activation_info[output] self._quantize_activation_info[output])
for output in op.output])
if not self._option.quantize: if not self._option.quantize:
return False return False
...@@ -1726,6 +1728,7 @@ class Transformer(base_converter.ConverterInterface): ...@@ -1726,6 +1728,7 @@ class Transformer(base_converter.ConverterInterface):
scale, zero, minval, maxval = \ scale, zero, minval, maxval = \
quantize_util.adjust_range(input_node.range[0], quantize_util.adjust_range(input_node.range[0],
input_node.range[1], input_node.range[1],
self._option.device,
non_zero=False) non_zero=False)
quantize_info = \ quantize_info = \
mace_pb2.QuantizeActivationInfo() mace_pb2.QuantizeActivationInfo()
...@@ -2002,7 +2005,7 @@ class Transformer(base_converter.ConverterInterface): ...@@ -2002,7 +2005,7 @@ class Transformer(base_converter.ConverterInterface):
if input_tensor in self._consts: if input_tensor in self._consts:
const_tensor = self._consts[input_tensor] const_tensor = self._consts[input_tensor]
quantized_tensor = quantize_util.quantize( quantized_tensor = quantize_util.quantize(
const_tensor.float_data, non_zero) const_tensor.float_data, self._option.device, non_zero)
del const_tensor.float_data[:] del const_tensor.float_data[:]
const_tensor.int32_data.extend(quantized_tensor.data) const_tensor.int32_data.extend(quantized_tensor.data)
const_tensor.data_type = mace_pb2.DT_UINT8 const_tensor.data_type = mace_pb2.DT_UINT8
......
import numpy as np import numpy as np
import math import math
from mace.python.tools.converter_tool.base_converter import DeviceType
class QuantizedData(object): class QuantizedData(object):
def __init__(self): def __init__(self):
...@@ -51,7 +53,10 @@ class QuantizedData(object): ...@@ -51,7 +53,10 @@ class QuantizedData(object):
self._maxval = maxval self._maxval = maxval
def adjust_range(in_min, in_max, non_zero): def adjust_range(in_min, in_max, device, non_zero):
if device in [DeviceType.HEXAGON.value, DeviceType.HTA.value]:
return adjust_range_for_hexagon(in_min, in_max)
out_max = max(0.0, in_max) out_max = max(0.0, in_max)
out_min = min(0.0, in_min) out_min = min(0.0, in_min)
if non_zero: if non_zero:
...@@ -61,9 +66,30 @@ def adjust_range(in_min, in_max, non_zero): ...@@ -61,9 +66,30 @@ def adjust_range(in_min, in_max, non_zero):
if out_min < -eps and out_max > eps: if out_min < -eps and out_max > eps:
zero = -out_min / scale zero = -out_min / scale
zero_int = int(round(zero)) zero_int = int(round(zero))
if abs(zero - zero_int) > eps: if abs(zero - zero_int) > eps and non_zero:
if zero < zero_int or non_zero:
zero_int = int(math.ceil(zero)) zero_int = int(math.ceil(zero))
elif out_min > -eps:
zero_int = 0
else:
zero_int = 255
return scale, zero_int, -zero_int*scale, (255-zero_int)*scale
def adjust_range_for_hexagon(in_min, in_max):
out_max = max(0.0, in_max)
out_min = min(0.0, in_min)
scale = (out_max - out_min) / 255.0
eps = 1e-6
if out_min < -eps and out_max > eps:
zero = -out_min / scale
zero_int = int(round(zero))
# if zero_int <=0 or >= 255, try to avoid divide by 0,
# else, try to make adjustment as small as possible
ceil = int(math.ceil(zero))
keep_max = (ceil - zero) / out_max < (zero + 1 - ceil) / -out_min
if zero_int <= 0 or (zero_int < 254 and keep_max):
zero_int = ceil
scale = out_max / (255.0 - zero_int) scale = out_max / (255.0 - zero_int)
else: else:
scale = -out_min / zero_int scale = -out_min / zero_int
...@@ -108,11 +134,11 @@ def quantize_with_scale_and_zero(data, scale, zero): ...@@ -108,11 +134,11 @@ def quantize_with_scale_and_zero(data, scale, zero):
return quantized_data return quantized_data
def quantize(data, non_zero): def quantize(data, device, non_zero):
np_data = np.array(data).astype(float) np_data = np.array(data).astype(float)
in_min = np_data.min() in_min = np_data.min()
in_max = np_data.max() in_max = np_data.max()
scale, zero, out_min, out_max = adjust_range(in_min, in_max, scale, zero, out_min, out_max = adjust_range(in_min, in_max, device,
non_zero=non_zero) non_zero=non_zero)
output = np.clip((np.round(zero + data / scale).astype(np.int32)), 0, 255) output = np.clip((np.round(zero + data / scale).astype(np.int32)), 0, 255)
......
...@@ -57,15 +57,8 @@ inline void AdjustRange(const float in_min_data, ...@@ -57,15 +57,8 @@ inline void AdjustRange(const float in_min_data,
int32_t int32_t
quantized_zero_near_int = static_cast<int32_t>(roundf(quantized_zero)); quantized_zero_near_int = static_cast<int32_t>(roundf(quantized_zero));
*zero_point = quantized_zero_near_int; *zero_point = quantized_zero_near_int;
if (fabs(quantized_zero - quantized_zero_near_int) > kEps) { if (fabs(quantized_zero - quantized_zero_near_int) > kEps && non_zero) {
if (quantized_zero < quantized_zero_near_int || non_zero) {
// keep out_max fixed, and move out_min
*zero_point = static_cast<int32_t>(std::ceil(quantized_zero)); *zero_point = static_cast<int32_t>(std::ceil(quantized_zero));
*scale = out_max / (quantized_max - *zero_point);
} else {
// keep out_min fixed, and move out_max
*scale = out_min / (quantized_min - *zero_point);
}
} }
} else if (out_min > -kEps) { } else if (out_min > -kEps) {
*zero_point = quantized_min; *zero_point = quantized_min;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册