prune.py

# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import paddle
from paddle.utils import try_import

from ppdet.core.workspace import register, serializable
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)


def print_prune_params(model):
    model_dict = model.state_dict()
    for key in model_dict.keys():
        weight_name = model_dict[key].name
        logger.info('Parameter name: {}, shape: {}'.format(
            weight_name, model_dict[key].shape))


@register
@serializable
class Pruner(object):
    def __init__(self,
                 criterion,
                 pruned_params,
                 pruned_ratios,
                 print_params=False):
        super(Pruner, self).__init__()
        assert criterion in ['l1_norm', 'fpgm'], \
            "unsupported prune criterion: {}".format(criterion)
        self.criterion = criterion
        self.pruned_params = pruned_params
        self.pruned_ratios = pruned_ratios
        self.print_params = print_params

    def __call__(self, model):
        # FIXME: adapt to network graph when Training and inference are
        # inconsistent, now only supports prune inference network graph.
        model.eval()
        paddleslim = try_import('paddleslim')
        from paddleslim.analysis import dygraph_flops as flops
        input_spec = [{
            "image": paddle.ones(
                shape=[1, 3, 640, 640], dtype='float32'),
            "im_shape": paddle.full(
                [1, 2], 640, dtype='float32'),
            "scale_factor": paddle.ones(
                shape=[1, 2], dtype='float32')
        }]
        if self.print_params:
            print_prune_params(model)

        ori_flops = flops(model, input_spec) / (1000**3)
        logger.info("FLOPs before pruning: {}GFLOPs".format(ori_flops))
        if self.criterion == 'fpgm':
            pruner = paddleslim.dygraph.FPGMFilterPruner(model, input_spec)
        elif self.criterion == 'l1_norm':
            pruner = paddleslim.dygraph.L1NormFilterPruner(model, input_spec)

        logger.info("pruned params: {}".format(self.pruned_params))
        pruned_ratios = [float(n) for n in self.pruned_ratios]
        ratios = {}
        for i, param in enumerate(self.pruned_params):
            ratios[param] = pruned_ratios[i]
        pruner.prune_vars(ratios, [0])
        pruned_flops = flops(model, input_spec) / (1000**3)
        logger.info("FLOPs after pruning: {}GFLOPs; pruned ratio: {}".format(
            pruned_flops, (ori_flops - pruned_flops) / ori_flops))

        return model


@register
@serializable
class PrunerQAT(object):
    def __init__(self, criterion, pruned_params, pruned_ratios,
                 print_prune_params, quant_config, print_qat_model):
        super(PrunerQAT, self).__init__()
        assert criterion in ['l1_norm', 'fpgm'], \
            "unsupported prune criterion: {}".format(criterion)
        # Pruner hyperparameter
        self.criterion = criterion
        self.pruned_params = pruned_params
        self.pruned_ratios = pruned_ratios
        self.print_prune_params = print_prune_params
        # QAT hyperparameter
        self.quant_config = quant_config
        self.print_qat_model = print_qat_model

    def __call__(self, model):
        # FIXME: adapt to network graph when Training and inference are
        # inconsistent, now only supports prune inference network graph.
        model.eval()
        paddleslim = try_import('paddleslim')
        from paddleslim.analysis import dygraph_flops as flops
        input_spec = [{
            "image": paddle.ones(
                shape=[1, 3, 640, 640], dtype='float32'),
            "im_shape": paddle.full(
                [1, 2], 640, dtype='float32'),
            "scale_factor": paddle.ones(
                shape=[1, 2], dtype='float32')
        }]
        if self.print_prune_params:
            print_prune_params(model)

        ori_flops = flops(model, input_spec) / 1000
        logger.info("FLOPs before pruning: {}GFLOPs".format(ori_flops))
        if self.criterion == 'fpgm':
            pruner = paddleslim.dygraph.FPGMFilterPruner(model, input_spec)
        elif self.criterion == 'l1_norm':
            pruner = paddleslim.dygraph.L1NormFilterPruner(model, input_spec)

        logger.info("pruned params: {}".format(self.pruned_params))
        pruned_ratios = [float(n) for n in self.pruned_ratios]
        ratios = {}
        for i, param in enumerate(self.pruned_params):
            ratios[param] = pruned_ratios[i]
        pruner.prune_vars(ratios, [0])
        pruned_flops = flops(model, input_spec) / 1000
        logger.info("FLOPs after pruning: {}GFLOPs; pruned ratio: {}".format(
            pruned_flops, (ori_flops - pruned_flops) / ori_flops))

        self.quanter = paddleslim.dygraph.quant.QAT(config=self.quant_config)

        self.quanter.quantize(model)

        if self.print_qat_model:
            logger.info("Quantized model:")
            logger.info(model)

        return model

    def save_quantized_model(self, layer, path, input_spec=None, **config):
        self.quanter.save_quantized_model(
            model=layer, path=path, input_spec=input_spec, **config)