ptq.py 2.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import copy

import paddle.distributed.fleet as fleet
from paddle.nn import Layer

from .config import QuantConfig
from .quantize import Quantization


class PTQ(Quantization):
    """
    Applying post training quantization to the model.
    """

    def __init__(self, config: QuantConfig):
        super(PTQ, self).__init__(config)

    def _is_parallel_training(self):
        try:
            if fleet.worker_num() > 2:
                return True
            else:
                return False
        except Exception:  # fleet is not initialized
            return False

    def quantize(self, model: Layer, inplace=False):
        r"""
        Create a model for post-training quantization.

        The quantization configuration will be propagated in the model.
        And it will insert observers into the model to collect and compute
        quantization parameters.

        Args:
            model(Layer) - The model to be quantized.
            inplace(bool) - Whether to modify the model in-place.

        Return: The prepared model for post-training quantization.

        Examples:
        .. code-block:: python
            from paddle.quantization import PTQ, QuantConfig
            from paddle.quantization.observers import AbsmaxObserver
            from paddle.vision.models import LeNet

            observer = AbsmaxObserver()
            q_config = QuantConfig(activation=observer, weight=observer)
            ptq = PTQ(q_config)
            model = LeNet()
            model.eval()
            quant_model = ptq.quantize(model)
            print(quant_model)
        """
        _model = model
        if not inplace:
            assert (
                not self._is_parallel_training()
            ), "'inplace' is not compatible with parallel training."
            _model = copy.deepcopy(model)
            _model.eval()
        assert (
            not model.training
        ), "Post-Training Quantization shoud not work on training models. Please set evaluation mode by model.eval()."
        self._config._specify(_model)
        self._convert_to_quant_layers(_model, self._config)
        self._insert_activation_observers(_model, self._config)
        return _model