未验证 提交 b5809912 编写于 作者: W whs 提交者: GitHub

Add stub for quantization (#50510)

上级 2451841f
......@@ -24,5 +24,6 @@ from .functional_layers import flatten # noqa: F401
from .functional_layers import matmul # noqa: F401
from .quant_layers import QuantStub # noqa: F401
from . import qat
from .stub import Stub
__all__ = []
__all__ = ["Stub"]
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Define stub used in quantization."""
from paddle.nn import Layer
class Stub(Layer):
r"""
The stub is used as placeholders that will be replaced by observers before PTQ or QAT.
It is hard to assign a quantization configuration to a functional API called in
the forward of a layer. Instead, we can create a stub and add it to the sublayers of the layer.
And call the stub before the functional API in the forward. The observer held by the
stub will observe or quantize the inputs of the functional API.
Args:
observer(QuanterFactory) - The configured information of the observer to be inserted.
It will use a global configuration to create the observers if the 'observer' is none.
Examples:
.. code-block:: python
import paddle
from paddle.nn.quant import Stub
from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
from paddle.nn import Conv2D
from paddle.quantization import QAT, QuantConfig
quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
class Model(paddle.nn.Layer):
def __init__(self, num_classes=10):
super(Model, self).__init__()
self.conv = Conv2D(3, 6, 3, stride=1, padding=1)
self.quant = Stub(quanter)
def forward(self, inputs):
out = self.conv(inputs)
out = self.quant(out)
return paddle.nn.functional.relu(out)
model = Model()
q_config = QuantConfig(activation=quanter, weight=quanter)
qat = QAT(q_config)
quant_model = qat.quantize(model)
print(quant_model)
"""
def __init__(self, observer=None):
super(Stub, self).__init__()
self._observer = observer
def forward(self, input):
return input
class QuanterStub(Layer):
r"""
It is an identity layer with an observer observing the input.
Before QAT or PTQ, the stub in the model will be replaced with an instance of QuanterStub.
The user should not use this class directly.
Args:
layer(paddle.nn.Layer) - The stub layer with an observer configure factory. If the observer
of the stub layer is none, it will use 'q_config' to create an observer instance.
q_config(QuantConfig) - The quantization configuration for the current stub layer.
"""
def __init__(self, layer: Stub, q_config):
super(QuanterStub, self).__init__()
self._observer = None
if layer._observer is not None:
self._observer = layer._observer._instance(layer)
elif q_config.activation is not None:
self._observer = q_config.activation._instance(layer)
def forward(self, input):
return self._observer(input) if self._observer is not None else input
......@@ -24,6 +24,7 @@ from .wrapper import ObserveWrapper
# TODO: Implement quanted layer and fill the mapping dict
DEFAULT_QAT_LAYER_MAPPINGS: Dict[Layer, Layer] = {
nn.quant.Stub: nn.quant.stub.QuanterStub,
nn.Linear: nn.quant.qat.QuantedLinear,
nn.Conv2D: nn.quant.qat.QuantedConv2D,
}
......
# copyright (c) 2023 paddlepaddle authors. all rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle
from paddle.nn import Conv2D
from paddle.nn.quant import Stub
from paddle.quantization import QAT, QuantConfig
from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
from paddle.quantization.quanters.abs_max import (
FakeQuanterWithAbsMaxObserverLayer,
)
quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
class Model(paddle.nn.Layer):
def __init__(self, num_classes=10):
super(Model, self).__init__()
self.quant_in = Stub()
self.conv = Conv2D(3, 6, 3, stride=1, padding=1)
self.quant = Stub(quanter)
self.quant_out = Stub()
def forward(self, inputs):
out = self.conv(inputs)
out = self.quant(out)
out = paddle.nn.functional.relu(out)
return self.quant_out(out)
class TestStub(unittest.TestCase):
def test_stub(self):
model = Model()
q_config = QuantConfig(activation=quanter, weight=quanter)
qat = QAT(q_config)
q_config.add_layer_config(model.quant_in, activation=None, weight=None)
quant_model = qat.quantize(model)
image = paddle.rand([1, 3, 32, 32], dtype="float32")
out = model(image)
out = quant_model(image)
out.backward()
quanter_count = 0
for _layer in quant_model.sublayers(True):
if isinstance(_layer, FakeQuanterWithAbsMaxObserverLayer):
quanter_count += 1
self.assertEqual(quanter_count, 5)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册