Add stub for quantization (#50510)

b5809912 · whs · GitHub · 2451841f · b5809912 · b5809912
4 changed file
--- a/python/paddle/nn/quant/__init__.py
+++ b/python/paddle/nn/quant/__init__.py
@@ -24,5 +24,6 @@ from .functional_layers import flatten  # noqa: F401
 from .functional_layers import matmul  # noqa: F401
 from .quant_layers import QuantStub  # noqa: F401
 from . import qat
+from .stub import Stub

-__all__ = []
+__all__ = ["Stub"]
--- a/python/paddle/nn/quant/stub.py
+++ b/python/paddle/nn/quant/stub.py
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" Define stub used in quantization."""
+
+from paddle.nn import Layer
+
+
+class Stub(Layer):
+    r"""
+    The stub is used as placeholders that will be replaced by observers before PTQ or QAT.
+    It is hard to assign a quantization configuration to a functional API called in
+    the forward of a layer. Instead, we can create a stub and add it to the sublayers of the layer.
+    And call the stub before the functional API in the forward. The observer held by the
+    stub will observe or quantize the inputs of the functional API.
+    Args:
+        observer(QuanterFactory) - The configured information of the observer to be inserted.
+        It will use a global configuration to create the observers if the 'observer' is none.
+    Examples:
+        .. code-block:: python
+            import paddle
+            from paddle.nn.quant import Stub
+            from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
+            from paddle.nn import Conv2D
+            from paddle.quantization import QAT, QuantConfig
+            quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
+            class Model(paddle.nn.Layer):
+                def __init__(self, num_classes=10):
+                    super(Model, self).__init__()
+                    self.conv = Conv2D(3, 6, 3, stride=1, padding=1)
+                    self.quant = Stub(quanter)
+                def forward(self, inputs):
+                    out = self.conv(inputs)
+                    out = self.quant(out)
+                    return paddle.nn.functional.relu(out)
+            model = Model()
+            q_config = QuantConfig(activation=quanter, weight=quanter)
+            qat = QAT(q_config)
+            quant_model = qat.quantize(model)
+            print(quant_model)
+    """
+
+    def __init__(self, observer=None):
+        super(Stub, self).__init__()
+        self._observer = observer
+
+    def forward(self, input):
+        return input
+
+
+class QuanterStub(Layer):
+    r"""
+    It is an identity layer with an observer observing the input.
+    Before QAT or PTQ, the stub in the model will be replaced with an instance of QuanterStub.
+    The user should not use this class directly.
+
+    Args:
+        layer(paddle.nn.Layer) - The stub layer with an observer configure factory. If the observer
+        of the stub layer is none, it will use 'q_config' to create an observer instance.
+        q_config(QuantConfig) - The quantization configuration for the current stub layer.
+    """
+
+    def __init__(self, layer: Stub, q_config):
+        super(QuanterStub, self).__init__()
+        self._observer = None
+        if layer._observer is not None:
+            self._observer = layer._observer._instance(layer)
+        elif q_config.activation is not None:
+            self._observer = q_config.activation._instance(layer)
+
+    def forward(self, input):
+        return self._observer(input) if self._observer is not None else input
--- a/python/paddle/quantization/config.py
+++ b/python/paddle/quantization/config.py
@@ -24,6 +24,7 @@ from .wrapper import ObserveWrapper

 # TODO: Implement quanted layer and fill the mapping dict
 DEFAULT_QAT_LAYER_MAPPINGS: Dict[Layer, Layer] = {
+    nn.quant.Stub: nn.quant.stub.QuanterStub,
    nn.Linear: nn.quant.qat.QuantedLinear,
    nn.Conv2D: nn.quant.qat.QuantedConv2D,
 }

--- a/python/paddle/tests/quantization/test_stub.py
+++ b/python/paddle/tests/quantization/test_stub.py
+# copyright (c) 2023 paddlepaddle authors. all rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import paddle
+from paddle.nn import Conv2D
+from paddle.nn.quant import Stub
+from paddle.quantization import QAT, QuantConfig
+from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
+from paddle.quantization.quanters.abs_max import (
+    FakeQuanterWithAbsMaxObserverLayer,
+)
+
+quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
+
+
+class Model(paddle.nn.Layer):
+    def __init__(self, num_classes=10):
+        super(Model, self).__init__()
+        self.quant_in = Stub()
+        self.conv = Conv2D(3, 6, 3, stride=1, padding=1)
+        self.quant = Stub(quanter)
+        self.quant_out = Stub()
+
+    def forward(self, inputs):
+        out = self.conv(inputs)
+        out = self.quant(out)
+        out = paddle.nn.functional.relu(out)
+        return self.quant_out(out)
+
+
+class TestStub(unittest.TestCase):
+    def test_stub(self):
+        model = Model()
+        q_config = QuantConfig(activation=quanter, weight=quanter)
+        qat = QAT(q_config)
+        q_config.add_layer_config(model.quant_in, activation=None, weight=None)
+        quant_model = qat.quantize(model)
+        image = paddle.rand([1, 3, 32, 32], dtype="float32")
+        out = model(image)
+        out = quant_model(image)
+        out.backward()
+
+        quanter_count = 0
+        for _layer in quant_model.sublayers(True):
+            if isinstance(_layer, FakeQuanterWithAbsMaxObserverLayer):
+                quanter_count += 1
+        self.assertEqual(quanter_count, 5)
+
+
+if __name__ == '__main__':
+    unittest.main()