qat.py

# Copyright (c) 2021  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import copy
import logging

import paddle
from ...common import get_logger

_logger = get_logger(__name__, level=logging.INFO)

WEIGHT_QUANTIZATION_TYPES = [
    'abs_max', 'channel_wise_abs_max', 'range_abs_max', 'moving_average_abs_max'
]

ACTIVATION_QUANTIZATION_TYPES = [
    'abs_max', 'range_abs_max', 'moving_average_abs_max'
]

BUILT_IN_PREPROCESS_TYPES = ['PACT']

VALID_DTYPES = ['int8']

__all__ = ['QAT']

_quant_config_default = {
    # weight preprocess type, default is None and no preprocessing is performed. 
    'weight_preprocess_type': None,
    # activation preprocess type, default is None and no preprocessing is performed.
    'activation_preprocess_type': None,
    # weight quantize type, default is 'channel_wise_abs_max'
    'weight_quantize_type': 'channel_wise_abs_max',
    # activation quantize type, default is 'moving_average_abs_max'
    'activation_quantize_type': 'moving_average_abs_max',
    # weight quantize bit num, default is 8
    'weight_bits': 8,
    # activation quantize bit num, default is 8
    'activation_bits': 8,
    # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
    'dtype': 'int8',
    # window size for 'range_abs_max' quantization. default is 10000
    'window_size': 10000,
    # The decay coefficient of moving average, default is 0.9
    'moving_rate': 0.9,
    # for dygraph quantization, layers of type in quantizable_layer_type will be quantized
    'quantizable_layer_type': ['Conv2D', 'Linear'],
    # whether fuse conv and bn before QAT
    'fuse_conv_bn': False,
    # Whether to export the quantized model with format of ONNX. Default is True.
    'onnx_format': True,
}


def _parse_configs(user_config):
    """
    check if user's configs are valid.
    Args:
        user_config(dict): user's config.
    Return:
        configs(dict): final configs will be used.
    """

    configs = copy.deepcopy(_quant_config_default)
    configs.update(user_config)

    # check if configs is valid
    weight_types = WEIGHT_QUANTIZATION_TYPES
    activation_types = WEIGHT_QUANTIZATION_TYPES

    assert configs['weight_preprocess_type'] in BUILT_IN_PREPROCESS_TYPES or configs['weight_preprocess_type'] is None, \
        "Unknown weight_preprocess_type: {}. only supports {} ".format(configs['weight_preprocess_type'],
                BUILT_IN_PREPROCESS_TYPES)

    assert configs['activation_preprocess_type'] in BUILT_IN_PREPROCESS_TYPES or configs['activation_preprocess_type'] is None, \
        "Unknown activation_preprocess_type: {}. only supports {}".format(configs['activation_preprocess_type'],
                BUILT_IN_PREPROCESS_TYPES)

    assert configs['weight_quantize_type'] in WEIGHT_QUANTIZATION_TYPES, \
        "Unknown weight_quantize_type: {}. only supports {} ".format(configs['weight_quantize_type'],
                WEIGHT_QUANTIZATION_TYPES)

    assert configs['activation_quantize_type'] in ACTIVATION_QUANTIZATION_TYPES, \
        "Unknown activation_quantize_type: {}. only supports {}".format(configs['activation_quantize_type'],
                ACTIVATION_QUANTIZATION_TYPES)

    assert isinstance(configs['weight_bits'], int), \
        "weight_bits must be int value."

    assert (configs['weight_bits'] >= 1 and configs['weight_bits'] <= 16), \
        "weight_bits should be between 1 and 16."

    assert isinstance(configs['activation_bits'], int), \
        "activation_bits must be int value."

    assert (configs['activation_bits'] >= 1 and configs['activation_bits'] <= 16), \
        "activation_bits should be between 1 and 16."

    assert isinstance(configs['dtype'], str), \
        "dtype must be a str."

    assert (configs['dtype'] in VALID_DTYPES), \
        "dtype can only be " + " ".join(VALID_DTYPES)

    assert isinstance(configs['window_size'], int), \
        "window_size must be int value, window size for 'range_abs_max' quantization, default is 10000."

    assert isinstance(configs['moving_rate'], float), \
        "moving_rate must be float value, The decay coefficient of moving average, default is 0.9."

    assert isinstance(configs['quantizable_layer_type'], list), \
        "quantizable_layer_type must be a list"

    return configs


class PACT(paddle.nn.Layer):
    def __init__(self):
        super(PACT, self).__init__()
        alpha_attr = paddle.ParamAttr(
            name=self.full_name() + ".pact",
            initializer=paddle.nn.initializer.Constant(value=100),
            learning_rate=1000.0)

        self.alpha = self.create_parameter(
            shape=[1], attr=alpha_attr, dtype='float32')

    def forward(self, x):
        out_left = paddle.nn.functional.relu(x - self.alpha)
        out_right = paddle.nn.functional.relu(-self.alpha - x)
        x = x - out_left + out_right
        return x


class QAT(object):
    """
    Quant Aware Training(QAT): Add the fake quant logic for given quantizable layers,
    namely add the quant_dequant computational logic both for activation inputs and weight inputs.
    """

    def __init__(self,
                 config=None,
                 weight_preprocess=None,
                 act_preprocess=None,
                 weight_quantize=None,
                 act_quantize=None):
        """
        Args:
            model(nn.Layer)
            config(dict, optional): configs for quantization. if None, will use default config. 
                    Default: None.
            weight_quantize(class, optional): Defines how to quantize weight. Using this
                    can quickly test if user's quantization method works or not. In this method, user should
                    both define quantization function and dequantization function, that is, the function's input
                    is non-quantized weight and function returns dequantized weight. If None, will use
                    quantization op defined by 'weight_quantize_type'.
                    Default is None.
            act_quantize(class, optional): Defines how to quantize activation. Using this
                    can quickly test if user's quantization method works or not. In this function, user should
                    both define quantization and dequantization process, that is, the function's input
                    is non-quantized activation and function returns dequantized activation. If None, will use 
                    quantization op defined by 'activation_quantize_type'.
                    Default is None.
            weight_preprocess(class, optional): Defines how to preprocess weight before quantization. Using this
                    can quickly test if user's preprocess method works or not. The function's input
                    is non-quantized weight and function returns processed weight to be quantized. If None, will
                    use preprocess method defined by 'weight_preprocess_type'.
                    Default is None.
            act_preprocess(class, optional): Defines how to preprocess activation before quantization. Using this
                    can quickly test if user's preprocess method works or not. The function's input
                    is non-quantized activation and function returns processed activation to be quantized. If None,
                    will use preprocess method defined by 'activation_preprocess_type'.
                    Default is None.
        """
        if config is None:
            config = _quant_config_default
        else:
            assert isinstance(config, dict), "config must be dict"
            config = _parse_configs(config)
        self.config = config

        self.weight_preprocess = PACT if self.config[
            'weight_preprocess_type'] == 'PACT' else None
        self.act_preprocess = PACT if self.config[
            'activation_preprocess_type'] == 'PACT' else None

        self.weight_preprocess = weight_preprocess if weight_preprocess is not None \
            else self.weight_preprocess
        self.act_preprocess = act_preprocess if act_preprocess is not None \
            else self.act_preprocess
        self.weight_quantize = weight_quantize
        self.act_quantize = act_quantize

        # TODO: remove try-except when the version is stable
        try:
            self.imperative_qat = paddle.quantization.ImperativeQuantAware(
                weight_bits=self.config['weight_bits'],
                activation_bits=self.config['activation_bits'],
                weight_quantize_type=self.config['weight_quantize_type'],
                activation_quantize_type=self.config[
                    'activation_quantize_type'],
                moving_rate=self.config['moving_rate'],
                quantizable_layer_type=self.config['quantizable_layer_type'],
                fuse_conv_bn=self.config[
                    'fuse_conv_bn'],  # support Paddle > 2.3
                weight_preprocess_layer=self.weight_preprocess,
                act_preprocess_layer=self.act_preprocess,
                weight_quantize_layer=self.weight_quantize,
                act_quantize_layer=self.act_quantize,
                onnx_format=self.config['onnx_format'],  # support Paddle >= 2.4
            )
        except:
            self.imperative_qat = paddle.quantization.ImperativeQuantAware(
                weight_bits=self.config['weight_bits'],
                activation_bits=self.config['activation_bits'],
                weight_quantize_type=self.config['weight_quantize_type'],
                activation_quantize_type=self.config[
                    'activation_quantize_type'],
                moving_rate=self.config['moving_rate'],
                quantizable_layer_type=self.config['quantizable_layer_type'],
                weight_preprocess_layer=self.weight_preprocess,
                act_preprocess_layer=self.act_preprocess,
                weight_quantize_layer=self.weight_quantize,
                act_quantize_layer=self.act_quantize)

    def quantize(self, model, inplace=True):
        """
        Quantize the input model.

        Args:
            model(paddle.nn.Layer): The model to be quantized.
            inplace(bool): Whether apply quantization to the input model.
                           Default: False.
        Returns:
            quantized_model(paddle.nn.Layer): The quantized model.
        """
        assert isinstance(model, paddle.nn.Layer), \
            "The model must be the instance of paddle.nn.Layer."
        if self.weight_preprocess is not None or self.act_preprocess is not None:
            self._model = copy.deepcopy(model)

        if inplace:
            quantize_model = self.imperative_qat.quantize(model)
            quant_model = quantize_model if quantize_model is not None else model
        else:
            quant_model = copy.deepcopy(model)
            quantize_model = self.imperative_qat.quantize(quant_model)
            if quantize_model is not None:
                quant_model = quantize_model

        return quant_model

    def save_quantized_model(self, model, path, input_spec=None):
        """
        Save the quantized inference model.

        Args:
            model (Layer): The model to be saved.
            path (str): The path prefix to save model. The format is 
                ``dirname/file_prefix`` or ``file_prefix``.
            input_spec (list[InputSpec|Tensor], optional): Describes the input
                of the saved model's forward method, which can be described by
                InputSpec or example Tensor. If None, all input variables of 
                the original Layer's forward method would be the inputs of
                the saved model. Default: None.

        Returns:
            None
        """
        if self.weight_preprocess is not None or self.act_preprocess is not None:
            training = model.training
            model = self._remove_preprocess(model)
            if training:
                model.train()
            else:
                model.eval()

        self.imperative_qat.save_quantized_model(
            layer=model, path=path, input_spec=input_spec)

    def _remove_preprocess(self, model):
        state_dict = model.state_dict()
        try:
            self.imperative_qat = paddle.quantization.ImperativeQuantAware(
                weight_bits=self.config['weight_bits'],
                activation_bits=self.config['activation_bits'],
                weight_quantize_type=self.config['weight_quantize_type'],
                activation_quantize_type=self.config[
                    'activation_quantize_type'],
                moving_rate=self.config['moving_rate'],
                quantizable_layer_type=self.config['quantizable_layer_type'],
                onnx_format=self.config['onnx_format'],  # support Paddle >= 2.4
            )
        except:
            self.imperative_qat = paddle.quantization.ImperativeQuantAware(
                weight_bits=self.config['weight_bits'],
                activation_bits=self.config['activation_bits'],
                weight_quantize_type=self.config['weight_quantize_type'],
                activation_quantize_type=self.config[
                    'activation_quantize_type'],
                moving_rate=self.config['moving_rate'],
                quantizable_layer_type=self.config['quantizable_layer_type'])

        paddle.disable_static()
        if hasattr(model, "_layers"):
            model = model._layers
        model = self._model
        self.imperative_qat.quantize(model)
        model.set_state_dict(state_dict)
        paddle.enable_static()

        return model