提交 087f7697 编写于 作者: B Bin Li

Fix quantize

上级 b71da971
......@@ -1563,6 +1563,7 @@ class Transformer(base_converter.ConverterInterface):
else:
non_zero = self._option.device == DeviceType.CPU.value
quantized_tensor = quantize_util.quantize(tensor.float_data,
self._option.device,
non_zero)
tensor.data_type = mace_pb2.DT_UINT8
......@@ -1587,7 +1588,8 @@ class Transformer(base_converter.ConverterInterface):
def add_quantize_info(self, op, minval, maxval):
scale, zero, minval, maxval = \
quantize_util.adjust_range(minval, maxval, non_zero=False)
quantize_util.adjust_range(minval, maxval, self._option.device,
non_zero=False)
quantize_info = op.quantize_info.add()
quantize_info.minval = minval
quantize_info.maxval = maxval
......@@ -1687,8 +1689,9 @@ class Transformer(base_converter.ConverterInterface):
min_val, max_val = [float(i) for i in
minmax.strip().split(",")]
scale, zero, min_val, max_val = \
quantize_util.adjust_range(
min_val, max_val, non_zero=False)
quantize_util.adjust_range(min_val, max_val,
self._option.device,
non_zero=False)
activation_info = mace_pb2.QuantizeActivationInfo()
activation_info.minval = min_val
activation_info.maxval = max_val
......@@ -1703,9 +1706,8 @@ class Transformer(base_converter.ConverterInterface):
mace_check(output in self._quantize_activation_info,
"%s does not have quantize activation info"
% op)
op.quantize_info.extend([
self._quantize_activation_info[output]
for output in op.output])
op.quantize_info.append(
self._quantize_activation_info[output])
if not self._option.quantize:
return False
......@@ -1719,6 +1721,7 @@ class Transformer(base_converter.ConverterInterface):
scale, zero, minval, maxval = \
quantize_util.adjust_range(input_node.range[0],
input_node.range[1],
self._option.device,
non_zero=False)
quantize_info = \
mace_pb2.QuantizeActivationInfo()
......@@ -1995,7 +1998,7 @@ class Transformer(base_converter.ConverterInterface):
if input_tensor in self._consts:
const_tensor = self._consts[input_tensor]
quantized_tensor = quantize_util.quantize(
const_tensor.float_data, non_zero)
const_tensor.float_data, self._option.device, non_zero)
del const_tensor.float_data[:]
const_tensor.int32_data.extend(quantized_tensor.data)
const_tensor.data_type = mace_pb2.DT_UINT8
......
import numpy as np
import math
from mace.python.tools.converter_tool.base_converter import DeviceType
class QuantizedData(object):
def __init__(self):
......@@ -51,7 +53,10 @@ class QuantizedData(object):
self._maxval = maxval
def adjust_range(in_min, in_max, non_zero):
def adjust_range(in_min, in_max, device, non_zero):
if device in [DeviceType.HEXAGON.value, DeviceType.HTA.value]:
return adjust_range_for_hexagon(in_min, in_max)
out_max = max(0.0, in_max)
out_min = min(0.0, in_min)
if non_zero:
......@@ -61,12 +66,33 @@ def adjust_range(in_min, in_max, non_zero):
if out_min < -eps and out_max > eps:
zero = -out_min / scale
zero_int = int(round(zero))
if abs(zero - zero_int) > eps:
if zero < zero_int or non_zero:
zero_int = int(math.ceil(zero))
scale = out_max / (255.0 - zero_int)
else:
scale = -out_min / zero_int
if abs(zero - zero_int) > eps and non_zero:
zero_int = int(math.ceil(zero))
elif out_min > -eps:
zero_int = 0
else:
zero_int = 255
return scale, zero_int, -zero_int*scale, (255-zero_int)*scale
def adjust_range_for_hexagon(in_min, in_max):
out_max = max(0.0, in_max)
out_min = min(0.0, in_min)
scale = (out_max - out_min) / 255.0
eps = 1e-6
if out_min < -eps and out_max > eps:
zero = -out_min / scale
zero_int = int(round(zero))
# if zero_int <=0 or >= 255, try to avoid divide by 0,
# else, try to make adjustment as small as possible
ceil = int(math.ceil(zero))
keep_max = (ceil - zero) / out_max < (zero + 1 - ceil) / -out_min
if zero_int <= 0 or (zero_int < 254 and keep_max):
zero_int = ceil
scale = out_max / (255.0 - zero_int)
else:
scale = -out_min / zero_int
elif out_min > -eps:
zero_int = 0
else:
......@@ -108,11 +134,11 @@ def quantize_with_scale_and_zero(data, scale, zero):
return quantized_data
def quantize(data, non_zero):
def quantize(data, device, non_zero):
np_data = np.array(data).astype(float)
in_min = np_data.min()
in_max = np_data.max()
scale, zero, out_min, out_max = adjust_range(in_min, in_max,
scale, zero, out_min, out_max = adjust_range(in_min, in_max, device,
non_zero=non_zero)
output = np.clip((np.round(zero + data / scale).astype(np.int32)), 0, 255)
......
......@@ -57,15 +57,8 @@ inline void AdjustRange(const float in_min_data,
int32_t
quantized_zero_near_int = static_cast<int32_t>(roundf(quantized_zero));
*zero_point = quantized_zero_near_int;
if (fabs(quantized_zero - quantized_zero_near_int) > kEps) {
if (quantized_zero < quantized_zero_near_int || non_zero) {
// keep out_max fixed, and move out_min
*zero_point = static_cast<int32_t>(std::ceil(quantized_zero));
*scale = out_max / (quantized_max - *zero_point);
} else {
// keep out_min fixed, and move out_max
*scale = out_min / (quantized_min - *zero_point);
}
if (fabs(quantized_zero - quantized_zero_near_int) > kEps && non_zero) {
*zero_point = static_cast<int32_t>(std::ceil(quantized_zero));
}
} else if (out_min > -kEps) {
*zero_point = quantized_min;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册