diff --git a/docs/user_guide/quantization_usage.rst b/docs/user_guide/quantization_usage.rst index 6ce08f5f7eb5ae79ded2df9a8cac3ef781ace53b..205f854fbcb16661eef9fe508968beb4be60f768 100644 --- a/docs/user_guide/quantization_usage.rst +++ b/docs/user_guide/quantization_usage.rst @@ -6,9 +6,9 @@ MACE supports two kinds of quantization mechanisms, i.e., * **Quantization-aware training (Recommend)** After pre-training model using float point, insert simulated quantization operations into the model. Fine tune the new model. -Refer to `Tensorflow quantization-aware training `__. +Refer to `Tensorflow quantization-aware training `__. -* **Post training quantization** +* **Post-training quantization** After pre-training model using float point, estimate output range of each activation layer using sample inputs. @@ -28,7 +28,7 @@ models, e.g., MobileNet. The only thing you need to make it run using MACE is to 2. `quantize`: set `quantize` to be 1. -Post training quantization +Post-training quantization --------------------------- MACE supports post-training quantization if you want to take a chance to quantize model directly without fine tuning. This method requires developer to calculate tensor range of each activation layer statistically using sample inputs. @@ -84,6 +84,16 @@ MACE provides tools to do statistics with following steps(using `inception-v3` f `quantize` to `1` and `quantize_range_file` to the overall_range file path in yaml config). +Mixing usage +--------------------------- +As `quantization-aware training` is still evolving, there are some operations that are not supported, +which leaves some activation layers without tensor range. In this case, `post-training quantization` +can be used to calculate these missing ranges. To mix the usage, just get a `quantization-aware training` +model and then go through all the steps of `post-training quantization`. MACE will use the tensor ranges +from the `overall_range` file of `post-training quantization` if the ranges are missing from the +`quantization-aware training` model. + + Supported devices ----------------- MACE supports running quantized models on ARM CPU and other acceleration devices, e.g., Qualcomm Hexagon DSP, MediaTek APU. diff --git a/tools/python/transform/transformer.py b/tools/python/transform/transformer.py index dd5f86eeb3eef9b587375cac16846ec51af982f2..2c8901a918dde7fef38357ad187d72e9892f1cfa 100644 --- a/tools/python/transform/transformer.py +++ b/tools/python/transform/transformer.py @@ -1758,20 +1758,14 @@ class Transformer(base_converter.ConverterInterface): quantize_info.zero_point = info.zero_point def transform_fake_quantize(self): - if not self._option.quantize: - return False - # Quantize info from fixpoint fine tune print("Transform fake quantize") - range_file = self._option.quantize_range_file - if range_file: - return net = self._model for op in net.op: if op.type == 'FakeQuantWithMinMaxVars' or \ op.type == 'FakeQuantWithMinMaxArgs': - if op.input[0] not in self._consts: + if self._option.quantize and op.input[0] not in self._consts: producer_op = self._producer[op.input[0]] minval = ConverterUtil.get_arg(op, 'min').f maxval = ConverterUtil.get_arg(op, 'max').f @@ -1842,6 +1836,7 @@ class Transformer(base_converter.ConverterInterface): range_file = self._option.quantize_range_file if range_file: print("Add quantize tensor range") + post_quantize_info = {} with open(range_file) as f: for line in f: tensor_name, minmax = line.split("@@")[:2] @@ -1856,17 +1851,21 @@ class Transformer(base_converter.ConverterInterface): activation_info.maxval = max_val activation_info.scale = scale activation_info.zero_point = zero - self._quantize_activation_info[tensor_name] = activation_info # noqa + if tensor_name not in self._quantize_activation_info: + post_quantize_info[tensor_name] = activation_info for op in self._model.op: if op.name.find(MaceKeyword.mace_output_node_name) >= 0: continue for output in op.output: - mace_check(output in self._quantize_activation_info, - "%s does not have quantize activation info" - % op) - op.quantize_info.extend([ - self._quantize_activation_info[output]]) + # Prefer quantize info from quantization-aware training + if output not in self._quantize_activation_info: + mace_check(output in post_quantize_info, + "%s does not have quantize activation info" + % op) + op.quantize_info.extend([post_quantize_info[output]]) + self._quantize_activation_info[output] = \ + post_quantize_info[output] if not self._option.quantize: return False @@ -1979,6 +1978,7 @@ class Transformer(base_converter.ConverterInterface): maxval = producer_op0.quantize_info[0].maxval \ - producer_op1.quantize_info[0].minval else: + print(op) mace_check(False, "Quantized Elementwise only support:" " SUM and SUB without ranges now.") quantize_info = \