diff --git a/python/paddle/nn/quant/__init__.py b/python/paddle/nn/quant/__init__.py index f1c5f7590c657c5b7e1f80ed759d70baf73b9150..cd221dd29bcfdf0af57743b466f9d779224c0172 100644 --- a/python/paddle/nn/quant/__init__.py +++ b/python/paddle/nn/quant/__init__.py @@ -24,5 +24,6 @@ from .functional_layers import flatten # noqa: F401 from .functional_layers import matmul # noqa: F401 from .quant_layers import QuantStub # noqa: F401 from . import qat +from .stub import Stub -__all__ = [] +__all__ = ["Stub"] diff --git a/python/paddle/nn/quant/stub.py b/python/paddle/nn/quant/stub.py new file mode 100644 index 0000000000000000000000000000000000000000..74deb8aa75d10a7188c7bee9aa40586b4b53a747 --- /dev/null +++ b/python/paddle/nn/quant/stub.py @@ -0,0 +1,82 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Define stub used in quantization.""" + +from paddle.nn import Layer + + +class Stub(Layer): + r""" + The stub is used as placeholders that will be replaced by observers before PTQ or QAT. + It is hard to assign a quantization configuration to a functional API called in + the forward of a layer. Instead, we can create a stub and add it to the sublayers of the layer. + And call the stub before the functional API in the forward. The observer held by the + stub will observe or quantize the inputs of the functional API. + Args: + observer(QuanterFactory) - The configured information of the observer to be inserted. + It will use a global configuration to create the observers if the 'observer' is none. + Examples: + .. code-block:: python + import paddle + from paddle.nn.quant import Stub + from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver + from paddle.nn import Conv2D + from paddle.quantization import QAT, QuantConfig + quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9) + class Model(paddle.nn.Layer): + def __init__(self, num_classes=10): + super(Model, self).__init__() + self.conv = Conv2D(3, 6, 3, stride=1, padding=1) + self.quant = Stub(quanter) + def forward(self, inputs): + out = self.conv(inputs) + out = self.quant(out) + return paddle.nn.functional.relu(out) + model = Model() + q_config = QuantConfig(activation=quanter, weight=quanter) + qat = QAT(q_config) + quant_model = qat.quantize(model) + print(quant_model) + """ + + def __init__(self, observer=None): + super(Stub, self).__init__() + self._observer = observer + + def forward(self, input): + return input + + +class QuanterStub(Layer): + r""" + It is an identity layer with an observer observing the input. + Before QAT or PTQ, the stub in the model will be replaced with an instance of QuanterStub. + The user should not use this class directly. + + Args: + layer(paddle.nn.Layer) - The stub layer with an observer configure factory. If the observer + of the stub layer is none, it will use 'q_config' to create an observer instance. + q_config(QuantConfig) - The quantization configuration for the current stub layer. + """ + + def __init__(self, layer: Stub, q_config): + super(QuanterStub, self).__init__() + self._observer = None + if layer._observer is not None: + self._observer = layer._observer._instance(layer) + elif q_config.activation is not None: + self._observer = q_config.activation._instance(layer) + + def forward(self, input): + return self._observer(input) if self._observer is not None else input diff --git a/python/paddle/quantization/config.py b/python/paddle/quantization/config.py index 27624908314d1a59e0343c90ac0b237f71a6248a..8412c7fba90d63c50f47edea01fc85469c12d627 100644 --- a/python/paddle/quantization/config.py +++ b/python/paddle/quantization/config.py @@ -24,6 +24,7 @@ from .wrapper import ObserveWrapper # TODO: Implement quanted layer and fill the mapping dict DEFAULT_QAT_LAYER_MAPPINGS: Dict[Layer, Layer] = { + nn.quant.Stub: nn.quant.stub.QuanterStub, nn.Linear: nn.quant.qat.QuantedLinear, nn.Conv2D: nn.quant.qat.QuantedConv2D, } diff --git a/python/paddle/tests/quantization/test_stub.py b/python/paddle/tests/quantization/test_stub.py new file mode 100644 index 0000000000000000000000000000000000000000..6f0337d1ac48109e1976ed2ac7137a56ca0b5fb4 --- /dev/null +++ b/python/paddle/tests/quantization/test_stub.py @@ -0,0 +1,64 @@ +# copyright (c) 2023 paddlepaddle authors. all rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import paddle +from paddle.nn import Conv2D +from paddle.nn.quant import Stub +from paddle.quantization import QAT, QuantConfig +from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver +from paddle.quantization.quanters.abs_max import ( + FakeQuanterWithAbsMaxObserverLayer, +) + +quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9) + + +class Model(paddle.nn.Layer): + def __init__(self, num_classes=10): + super(Model, self).__init__() + self.quant_in = Stub() + self.conv = Conv2D(3, 6, 3, stride=1, padding=1) + self.quant = Stub(quanter) + self.quant_out = Stub() + + def forward(self, inputs): + out = self.conv(inputs) + out = self.quant(out) + out = paddle.nn.functional.relu(out) + return self.quant_out(out) + + +class TestStub(unittest.TestCase): + def test_stub(self): + model = Model() + q_config = QuantConfig(activation=quanter, weight=quanter) + qat = QAT(q_config) + q_config.add_layer_config(model.quant_in, activation=None, weight=None) + quant_model = qat.quantize(model) + image = paddle.rand([1, 3, 32, 32], dtype="float32") + out = model(image) + out = quant_model(image) + out.backward() + + quanter_count = 0 + for _layer in quant_model.sublayers(True): + if isinstance(_layer, FakeQuanterWithAbsMaxObserverLayer): + quanter_count += 1 + self.assertEqual(quanter_count, 5) + + +if __name__ == '__main__': + unittest.main()