From 549df29046dc4bebe283aaeb926aa87121a721f3 Mon Sep 17 00:00:00 2001 From: Guanghua Yu <742925032@qq.com> Date: Fri, 20 May 2022 10:26:03 +0800 Subject: [PATCH] add yolov3 PrunerQAT joint compression strategy (#6008) * add yolov3 PrunerQAT joint compression strategy * update readme --- configs/slim/README.md | 1 + .../yolov3_mobilenetv1_prune_qat.yml | 19 ++++++ ppdet/engine/trainer.py | 2 +- ppdet/slim/__init__.py | 2 +- ppdet/slim/prune.py | 66 +++++++++++++++++++ 5 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 configs/slim/extensions/yolov3_mobilenetv1_prune_qat.yml diff --git a/configs/slim/README.md b/configs/slim/README.md index caba1e8b2..4eabd73b5 100755 --- a/configs/slim/README.md +++ b/configs/slim/README.md @@ -179,3 +179,4 @@ python3.7 tools/post_quant.py -c configs/ppyolo/ppyolo_mbv3_large_coco.yml --sli | ------------------ | ------------ | -------- | :---------: |:---------: |:---------: | :---------: |:----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | | YOLOv3-MobileNetV1 | baseline | 608 | 24.65 | 94.2 | 332.0ms | 29.4 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) | - | | YOLOv3-MobileNetV1 | 蒸馏+剪裁 | 608 | 7.54(-69.4%) | 30.9(-67.2%) | 166.1ms | 28.4(-1.0) | [下载链接](https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v1_coco_distill_prune.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/slim/extensions/yolov3_mobilenet_v1_coco_distill_prune.yml) | +| YOLOv3-MobileNetV1 | 剪裁+量化 | 608 | - | - | - | - | - | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/slim/extensions/yolov3_mobilenetv1_prune_qat.yml) | diff --git a/configs/slim/extensions/yolov3_mobilenetv1_prune_qat.yml b/configs/slim/extensions/yolov3_mobilenetv1_prune_qat.yml new file mode 100644 index 000000000..ff17ea0b4 --- /dev/null +++ b/configs/slim/extensions/yolov3_mobilenetv1_prune_qat.yml @@ -0,0 +1,19 @@ +# Weights of yolov3_mobilenet_v1_voc +pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams +slim: PrunerQAT + +PrunerQAT: + criterion: fpgm + pruned_params: ['conv2d_27.w_0', 'conv2d_28.w_0', 'conv2d_29.w_0', + 'conv2d_30.w_0', 'conv2d_31.w_0', 'conv2d_32.w_0', + 'conv2d_34.w_0', 'conv2d_35.w_0', 'conv2d_36.w_0', + 'conv2d_37.w_0', 'conv2d_38.w_0', 'conv2d_39.w_0', + 'conv2d_41.w_0', 'conv2d_42.w_0', 'conv2d_43.w_0', + 'conv2d_44.w_0', 'conv2d_45.w_0', 'conv2d_46.w_0'] + pruned_ratios: [0.1,0.2,0.2,0.2,0.2,0.1,0.2,0.3,0.3,0.3,0.2,0.1,0.3,0.4,0.4,0.4,0.4,0.3] + print_prune_params: False + quant_config: { + 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', + 'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9, + 'quantizable_layer_type': ['Conv2D', 'Linear']} + print_qat_model: True diff --git a/ppdet/engine/trainer.py b/ppdet/engine/trainer.py index 0d8d19458..d3c4d7d9c 100644 --- a/ppdet/engine/trainer.py +++ b/ppdet/engine/trainer.py @@ -785,7 +785,7 @@ class Trainer(object): save_dir) # dy2st and save model - if 'slim' not in self.cfg or self.cfg['slim_type'] != 'QAT': + if 'slim' not in self.cfg or 'QAT' not in self.cfg['slim_type']: paddle.jit.save( static_model, os.path.join(save_dir, 'model'), diff --git a/ppdet/slim/__init__.py b/ppdet/slim/__init__.py index 8b343eb60..5347d6046 100644 --- a/ppdet/slim/__init__.py +++ b/ppdet/slim/__init__.py @@ -82,7 +82,7 @@ def build_slim_model(cfg, slim_cfg, mode='train'): slim = create(cfg.slim) cfg['slim_type'] = cfg.slim # TODO: fix quant export model in framework. - if mode == 'test' and slim_load_cfg['slim'] == 'QAT': + if mode == 'test' and 'QAT' in slim_load_cfg['slim']: slim.quant_config['activation_preprocess_type'] = None cfg['model'] = slim(model) cfg['slim'] = slim diff --git a/ppdet/slim/prune.py b/ppdet/slim/prune.py index 70d3de369..28ffb7588 100644 --- a/ppdet/slim/prune.py +++ b/ppdet/slim/prune.py @@ -83,3 +83,69 @@ class Pruner(object): pruned_flops, (ori_flops - pruned_flops) / ori_flops)) return model + + +@register +@serializable +class PrunerQAT(object): + def __init__(self, criterion, pruned_params, pruned_ratios, + print_prune_params, quant_config, print_qat_model): + super(PrunerQAT, self).__init__() + assert criterion in ['l1_norm', 'fpgm'], \ + "unsupported prune criterion: {}".format(criterion) + # Pruner hyperparameter + self.criterion = criterion + self.pruned_params = pruned_params + self.pruned_ratios = pruned_ratios + self.print_prune_params = print_prune_params + # QAT hyperparameter + self.quant_config = quant_config + self.print_qat_model = print_qat_model + + def __call__(self, model): + # FIXME: adapt to network graph when Training and inference are + # inconsistent, now only supports prune inference network graph. + model.eval() + paddleslim = try_import('paddleslim') + from paddleslim.analysis import dygraph_flops as flops + input_spec = [{ + "image": paddle.ones( + shape=[1, 3, 640, 640], dtype='float32'), + "im_shape": paddle.full( + [1, 2], 640, dtype='float32'), + "scale_factor": paddle.ones( + shape=[1, 2], dtype='float32') + }] + if self.print_prune_params: + print_prune_params(model) + + ori_flops = flops(model, input_spec) / 1000 + logger.info("FLOPs before pruning: {}GFLOPs".format(ori_flops)) + if self.criterion == 'fpgm': + pruner = paddleslim.dygraph.FPGMFilterPruner(model, input_spec) + elif self.criterion == 'l1_norm': + pruner = paddleslim.dygraph.L1NormFilterPruner(model, input_spec) + + logger.info("pruned params: {}".format(self.pruned_params)) + pruned_ratios = [float(n) for n in self.pruned_ratios] + ratios = {} + for i, param in enumerate(self.pruned_params): + ratios[param] = pruned_ratios[i] + pruner.prune_vars(ratios, [0]) + pruned_flops = flops(model, input_spec) / 1000 + logger.info("FLOPs after pruning: {}GFLOPs; pruned ratio: {}".format( + pruned_flops, (ori_flops - pruned_flops) / ori_flops)) + + self.quanter = paddleslim.dygraph.quant.QAT(config=self.quant_config) + + self.quanter.quantize(model) + + if self.print_qat_model: + logger.info("Quantized model:") + logger.info(model) + + return model + + def save_quantized_model(self, layer, path, input_spec=None, **config): + self.quanter.save_quantized_model( + model=layer, path=path, input_spec=input_spec, **config) -- GitLab