From 7c4f1a3851c1fdbea4aa5ca5f15f19dcc7bf4476 Mon Sep 17 00:00:00 2001
From: Megvii Engine Team <megengine@megvii.com>
Date: Sat, 9 May 2020 14:55:30 +0800
Subject: [PATCH] feat(mge/quantization): add calibration support

GitOrigin-RevId: f16fbba2b7cbc6138c4382fcb96b70f5eb71074c
---
 python_module/megengine/module/module.py      | 17 +++++++--------
 .../megengine/module/quantized/concat.py      | 11 +++-------
 .../module/quantized/conv_bn_relu.py          | 11 ++++------
 .../megengine/module/quantized/elemwise.py    | 11 +++-------
 .../module/quantized/quant_dequant.py         | 14 ++++---------
 .../megengine/quantization/__init__.py        |  1 +
 .../megengine/quantization/observer.py        |  2 +-
 .../megengine/quantization/quantize.py        | 21 +++++++++++++++++++
 8 files changed, 45 insertions(+), 43 deletions(-)

diff --git a/python_module/megengine/module/module.py b/python_module/megengine/module/module.py
index 4ee9f62b..60e77dca 100644
--- a/python_module/megengine/module/module.py
+++ b/python_module/megengine/module/module.py
@@ -496,8 +496,11 @@ class QATModule(Module):
         self, target: Tensor, fq: "FakeQuantize", obs: "Observer"
     ):
         oup = self.apply_observer(target, obs)
-        scale, zero_point = obs.get_qparams()
-        return fq(oup, scale, zero_point)
+        if self.quantizing == self.QATMode.CALIBRATION:
+            return oup
+        else:
+            scale, zero_point = obs.get_qparams()
+            return fq(oup, scale, zero_point)
 
     def set_qat_mode(self, mode: QATMode):
         r"""
@@ -524,11 +527,7 @@ class QATModule(Module):
         """
 
     def __call__(self, *args, **kwargs):
-        if self.quantizing == self.QATMode.QAT:
-            return self.forward_qat(*args, **kwargs)
-        elif self.quantizing == self.QATMode.CALIBRATION:
-            # TODO implement the CALIBRATION
-            assert False
-            return None
-        else:
+        if self.quantizing == self.QATMode.DISABLED:
             return self.forward(*args, **kwargs)
+        else:
+            return self.forward_qat(*args, **kwargs)
diff --git a/python_module/megengine/module/quantized/concat.py b/python_module/megengine/module/quantized/concat.py
index 62a7778a..f3f266a1 100644
--- a/python_module/megengine/module/quantized/concat.py
+++ b/python_module/megengine/module/quantized/concat.py
@@ -20,11 +20,9 @@ class Concat(Module):
     A :class:`~.Module` to do quantized concat, inference only.
     """
 
-    def __init__(self):
+    def __init__(self, dtype=None):
         super().__init__()
-        self.scale = 1.0
-        self.zero_point = 0.0
-        self.output_dtype = mgb.dtype.qint8(self.scale)
+        self.output_dtype = dtype
 
     def forward(self, inps: Iterable[Tensor], axis: int = 0):
         if self.training:
@@ -39,7 +37,4 @@ def to_quantized(float_module):
     Replace :class:`~.module.QATModule`'s ``to_quantized`` method.
     implemented here to avoid circular import.
     """
-    qmod = Concat()
-    qmod.output_dtype = float_module.act_observer.get_dtype()
-    qmod.scale, qmod.zero_point = float_module.act_observer.get_qparams()
-    return qmod
+    return Concat(float_module.act_observer.get_dtype())
diff --git a/python_module/megengine/module/quantized/conv_bn_relu.py b/python_module/megengine/module/quantized/conv_bn_relu.py
index dfc502a7..18eddaa8 100644
--- a/python_module/megengine/module/quantized/conv_bn_relu.py
+++ b/python_module/megengine/module/quantized/conv_bn_relu.py
@@ -34,6 +34,7 @@ class _ConvBnActivation2d(Conv2d):
         groups: int = 1,
         conv_mode: str = "CROSS_CORRELATION",
         compute_mode: str = "DEFAULT",
+        dtype=None,
     ):
         super().__init__(
             in_channels,
@@ -47,11 +48,7 @@ class _ConvBnActivation2d(Conv2d):
             conv_mode,
             compute_mode,
         )
-        self.scale = 1.0
-        self.zero_point = 0.0
-        self.output_dtype = mgb.dtype.qint8(self.scale)
-        self.weight = self.weight.astype(self.output_dtype)
-        self.bias = self.bias.astype(mgb.dtype.qint32(self.scale))
+        self.output_dtype = dtype
 
     def calc_conv_quantized(self, inp, nonlinear_mode="IDENTITY"):
         inp_scale = mgb.dtype.get_scale(inp.dtype)
@@ -87,6 +84,7 @@ class ConvBnRelu2d(_ConvBnActivation2d):
 
 
 def to_quantized(quantized_class, float_module):
+    output_dtype = float_module.act_observer.get_dtype()
     qconv = quantized_class(
         float_module.conv.in_channels,
         float_module.conv.out_channels,
@@ -95,15 +93,14 @@ def to_quantized(quantized_class, float_module):
         float_module.conv.padding,
         float_module.conv.dilation,
         float_module.conv.groups,
+        dtype=output_dtype,
     )
     w_fold, b_fold = float_module.fold_weight_bias(
         float_module.bn.running_mean, float_module.bn.running_var
     )
     weight = w_fold.astype(float_module.weight_observer.get_dtype())
-    qconv.output_dtype = float_module.act_observer.get_dtype()
     qconv.weight = Parameter(weight.numpy())
     qconv.bias = Parameter(b_fold.numpy())
-    qconv.scale, qconv.zero_point = float_module.act_observer.get_qparams()
 
     return qconv
 
diff --git a/python_module/megengine/module/quantized/elemwise.py b/python_module/megengine/module/quantized/elemwise.py
index 9a03ac9a..47f30e47 100644
--- a/python_module/megengine/module/quantized/elemwise.py
+++ b/python_module/megengine/module/quantized/elemwise.py
@@ -34,12 +34,10 @@ class Elemwise(Module):
 
     _elemwise_multi_type_mode = mgb.opr_param_defs.ElemwiseMultiType.Mode
 
-    def __init__(self, method):
+    def __init__(self, method, dtype=None):
         super().__init__()
         self.method = self._elemwise_multi_type_mode.convert("Q" + method)
-        self.scale = 1.0
-        self.zero_point = 0.0
-        self.output_dtype = mgb.dtype.qint8(self.scale)
+        self.output_dtype = dtype
 
     def forward(self, *inps):
         if self.training:
@@ -53,7 +51,4 @@ def to_quantized(float_module):
     Replace :class:`~.module.QATModule`'s ``to_quantized`` method.
     implemented here to avoid circular import.
     """
-    qmod = Elemwise(float_module.method.name)
-    qmod.output_dtype = float_module.act_observer.get_dtype()
-    qmod.scale, qmod.zero_point = float_module.act_observer.get_qparams()
-    return qmod
+    return Elemwise(float_module.method.name, float_module.act_observer.get_dtype())
diff --git a/python_module/megengine/module/quantized/quant_dequant.py b/python_module/megengine/module/quantized/quant_dequant.py
index 5faf9238..5a91b6fd 100644
--- a/python_module/megengine/module/quantized/quant_dequant.py
+++ b/python_module/megengine/module/quantized/quant_dequant.py
@@ -16,11 +16,9 @@ class QuantStub(Module):
     A helper quantize operation on input and inference only.
     """
 
-    def __init__(self):
+    def __init__(self, dtype=None):
         super().__init__()
-        self.scale = 1.0
-        self.zero_point = 0.0
-        self.output_dtype = mgb.dtype.qint8(self.scale)
+        self.output_dtype = dtype
 
     def forward(self, inp):
         if self.training:
@@ -45,10 +43,7 @@ def to_quantized(float_module):
     Replace :class:`~.module.QATModule`'s ``to_quantized`` method.
     implemented here to avoid circular import.
     """
-    qmod = QuantStub()
-    qmod.output_dtype = float_module.act_observer.get_dtype()
-    qmod.scale, qmod.zero_point = float_module.act_observer.get_qparams()
-    return qmod
+    return QuantStub(float_module.act_observer.get_dtype())
 
 
 @register_method_to_class(Float.DequantStub)
@@ -57,5 +52,4 @@ def to_quantized(float_module):
     Replace :class:`~.module.QATModule`'s ``to_quantized`` method.
     implemented here to avoid circular import.
     """
-    qmod = DequantStub()
-    return qmod
+    return DequantStub()
diff --git a/python_module/megengine/quantization/__init__.py b/python_module/megengine/quantization/__init__.py
index 46145bd8..9d490be8 100644
--- a/python_module/megengine/quantization/__init__.py
+++ b/python_module/megengine/quantization/__init__.py
@@ -14,5 +14,6 @@ from .quantize import (
     enable_fake_quant,
     enable_observer,
     quantize,
+    quantize_calibration,
     quantize_qat,
 )
diff --git a/python_module/megengine/quantization/observer.py b/python_module/megengine/quantization/observer.py
index 3c4484e6..b6799e79 100644
--- a/python_module/megengine/quantization/observer.py
+++ b/python_module/megengine/quantization/observer.py
@@ -11,7 +11,7 @@ import numpy as np
 
 from .. import functional as F
 from .._internal.dtype import _metadata_dict, get_quantized_dtype
-from ..core import Buffer, Function, ones, tensor, zeros
+from ..core import Buffer, Function, tensor
 from ..module import Module
 
 
diff --git a/python_module/megengine/quantization/quantize.py b/python_module/megengine/quantization/quantize.py
index 1ce5c953..1bfba352 100644
--- a/python_module/megengine/quantization/quantize.py
+++ b/python_module/megengine/quantization/quantize.py
@@ -34,6 +34,8 @@ def quantize(module: Module, inplace=True):
         else:
             setattr(parent, key.split(".")[-1], submodule.to_quantized())
 
+    return module
+
 
 def quantize_qat(module: Module, qconfig: QConfig = ema_fakequant_qconfig):
     r"""
@@ -53,6 +55,25 @@ def quantize_qat(module: Module, qconfig: QConfig = ema_fakequant_qconfig):
     module.apply(fn)
 
 
+def quantize_calibration(module: Module, qconfig: QConfig = ema_fakequant_qconfig):
+    r"""
+    Recursively convert `module` to `calibration` mode through :meth:`~.Module.apply`
+    and set qconfig relatively.
+
+    :param module: root module to do convert recursively.
+    :param qconfig: a instance of :class:`~.QConfig` to be set as submodules' qconfig.
+        default is :any:`~.qconfig.ema_fakequant_qconfig`.
+    """
+
+    def fn(mod: Module):
+        if isinstance(mod, QATModule):
+            mod.set_qat_mode(QATModule.QATMode.CALIBRATION)
+            mod.set_qconfig(qconfig)
+
+    module.apply(fn)
+    enable_observer(module)
+
+
 def disable_fake_quant(module: Module):
     r"""
     Recursively disable `module` fake quantization in QATModule through :meth:`~.Module.apply`
-- 
GitLab