add yolov3 PrunerQAT joint compression strategy (#6008)

* add yolov3 PrunerQAT joint compression strategy * update readme

add yolov3 PrunerQAT joint compression strategy (#6008)
* add yolov3 PrunerQAT joint compression strategy * update readme
549df290 · Guanghua Yu · GitHub · 73ec9173 · 549df290 · 549df290
5 changed file
--- a/configs/slim/README.md
+++ b/configs/slim/README.md
@@ -179,3 +179,4 @@ python3.7 tools/post_quant.py -c configs/ppyolo/ppyolo_mbv3_large_coco.yml --sli
 | ------------------ | ------------ | -------- | :---------: |:---------: |:---------: | :---------: |:----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: |
 | YOLOv3-MobileNetV1 | baseline     | 608      | 24.65 | 94.2 | 332.0ms  |  29.4     | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) |                              -                               |
 | YOLOv3-MobileNetV1 | 蒸馏+剪裁 | 608      | 7.54(-69.4%) | 30.9(-67.2%) | 166.1ms  | 28.4(-1.0) | [下载链接](https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v1_coco_distill_prune.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/slim/extensions/yolov3_mobilenet_v1_coco_distill_prune.yml) |
+| YOLOv3-MobileNetV1 | 剪裁+量化 | 608      | - | - | - | - | - | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/slim/extensions/yolov3_mobilenetv1_prune_qat.yml) |
--- a/configs/slim/extensions/yolov3_mobilenetv1_prune_qat.yml
+++ b/configs/slim/extensions/yolov3_mobilenetv1_prune_qat.yml
+# Weights of yolov3_mobilenet_v1_voc
+pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams
+slim: PrunerQAT
+
+PrunerQAT:
+  criterion: fpgm
+  pruned_params: ['conv2d_27.w_0', 'conv2d_28.w_0', 'conv2d_29.w_0',
+                  'conv2d_30.w_0', 'conv2d_31.w_0', 'conv2d_32.w_0',
+                  'conv2d_34.w_0', 'conv2d_35.w_0', 'conv2d_36.w_0',
+                  'conv2d_37.w_0', 'conv2d_38.w_0', 'conv2d_39.w_0',
+                  'conv2d_41.w_0', 'conv2d_42.w_0', 'conv2d_43.w_0',
+                  'conv2d_44.w_0', 'conv2d_45.w_0', 'conv2d_46.w_0']
+  pruned_ratios: [0.1,0.2,0.2,0.2,0.2,0.1,0.2,0.3,0.3,0.3,0.2,0.1,0.3,0.4,0.4,0.4,0.4,0.3]
+  print_prune_params: False
+  quant_config: {
+    'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max',
+    'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9,
+    'quantizable_layer_type': ['Conv2D', 'Linear']}
+  print_qat_model: True
--- a/ppdet/engine/trainer.py
+++ b/ppdet/engine/trainer.py
@@ -785,7 +785,7 @@ class Trainer(object):
            save_dir)

        # dy2st and save model
-        if 'slim' not in self.cfg or self.cfg['slim_type'] != 'QAT':
+        if 'slim' not in self.cfg or 'QAT' not in self.cfg['slim_type']:
            paddle.jit.save(
                static_model,
                os.path.join(save_dir, 'model'),

--- a/ppdet/slim/__init__.py
+++ b/ppdet/slim/__init__.py
@@ -82,7 +82,7 @@ def build_slim_model(cfg, slim_cfg, mode='train'):
        slim = create(cfg.slim)
        cfg['slim_type'] = cfg.slim
        # TODO: fix quant export model in framework.
-        if mode == 'test' and slim_load_cfg['slim'] == 'QAT':
+        if mode == 'test' and 'QAT' in slim_load_cfg['slim']:
            slim.quant_config['activation_preprocess_type'] = None
        cfg['model'] = slim(model)
        cfg['slim'] = slim

--- a/ppdet/slim/prune.py
+++ b/ppdet/slim/prune.py
@@ -83,3 +83,69 @@ class Pruner(object):
            pruned_flops, (ori_flops - pruned_flops) / ori_flops))

        return model
+
+
+@register
+@serializable
+class PrunerQAT(object):
+    def __init__(self, criterion, pruned_params, pruned_ratios,
+                 print_prune_params, quant_config, print_qat_model):
+        super(PrunerQAT, self).__init__()
+        assert criterion in ['l1_norm', 'fpgm'], \
+            "unsupported prune criterion: {}".format(criterion)
+        # Pruner hyperparameter
+        self.criterion = criterion
+        self.pruned_params = pruned_params
+        self.pruned_ratios = pruned_ratios
+        self.print_prune_params = print_prune_params
+        # QAT hyperparameter
+        self.quant_config = quant_config
+        self.print_qat_model = print_qat_model
+
+    def __call__(self, model):
+        # FIXME: adapt to network graph when Training and inference are
+        # inconsistent, now only supports prune inference network graph.
+        model.eval()
+        paddleslim = try_import('paddleslim')
+        from paddleslim.analysis import dygraph_flops as flops
+        input_spec = [{
+            "image": paddle.ones(
+                shape=[1, 3, 640, 640], dtype='float32'),
+            "im_shape": paddle.full(
+                [1, 2], 640, dtype='float32'),
+            "scale_factor": paddle.ones(
+                shape=[1, 2], dtype='float32')
+        }]
+        if self.print_prune_params:
+            print_prune_params(model)
+
+        ori_flops = flops(model, input_spec) / 1000
+        logger.info("FLOPs before pruning: {}GFLOPs".format(ori_flops))
+        if self.criterion == 'fpgm':
+            pruner = paddleslim.dygraph.FPGMFilterPruner(model, input_spec)
+        elif self.criterion == 'l1_norm':
+            pruner = paddleslim.dygraph.L1NormFilterPruner(model, input_spec)
+
+        logger.info("pruned params: {}".format(self.pruned_params))
+        pruned_ratios = [float(n) for n in self.pruned_ratios]
+        ratios = {}
+        for i, param in enumerate(self.pruned_params):
+            ratios[param] = pruned_ratios[i]
+        pruner.prune_vars(ratios, [0])
+        pruned_flops = flops(model, input_spec) / 1000
+        logger.info("FLOPs after pruning: {}GFLOPs; pruned ratio: {}".format(
+            pruned_flops, (ori_flops - pruned_flops) / ori_flops))
+
+        self.quanter = paddleslim.dygraph.quant.QAT(config=self.quant_config)
+
+        self.quanter.quantize(model)
+
+        if self.print_qat_model:
+            logger.info("Quantized model:")
+            logger.info(model)
+
+        return model
+
+    def save_quantized_model(self, layer, path, input_spec=None, **config):
+        self.quanter.save_quantized_model(
+            model=layer, path=path, input_spec=input_spec, **config)