未验证 提交 549df290 编写于 作者: G Guanghua Yu 提交者: GitHub

add yolov3 PrunerQAT joint compression strategy (#6008)

* add yolov3 PrunerQAT joint compression strategy

* update readme
上级 73ec9173
......@@ -179,3 +179,4 @@ python3.7 tools/post_quant.py -c configs/ppyolo/ppyolo_mbv3_large_coco.yml --sli
| ------------------ | ------------ | -------- | :---------: |:---------: |:---------: | :---------: |:----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: |
| YOLOv3-MobileNetV1 | baseline | 608 | 24.65 | 94.2 | 332.0ms | 29.4 | [下载链接](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) | - |
| YOLOv3-MobileNetV1 | 蒸馏+剪裁 | 608 | 7.54(-69.4%) | 30.9(-67.2%) | 166.1ms | 28.4(-1.0) | [下载链接](https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v1_coco_distill_prune.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/slim/extensions/yolov3_mobilenet_v1_coco_distill_prune.yml) |
| YOLOv3-MobileNetV1 | 剪裁+量化 | 608 | - | - | - | - | - | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml) | [slim配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/slim/extensions/yolov3_mobilenetv1_prune_qat.yml) |
# Weights of yolov3_mobilenet_v1_voc
pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams
slim: PrunerQAT
PrunerQAT:
criterion: fpgm
pruned_params: ['conv2d_27.w_0', 'conv2d_28.w_0', 'conv2d_29.w_0',
'conv2d_30.w_0', 'conv2d_31.w_0', 'conv2d_32.w_0',
'conv2d_34.w_0', 'conv2d_35.w_0', 'conv2d_36.w_0',
'conv2d_37.w_0', 'conv2d_38.w_0', 'conv2d_39.w_0',
'conv2d_41.w_0', 'conv2d_42.w_0', 'conv2d_43.w_0',
'conv2d_44.w_0', 'conv2d_45.w_0', 'conv2d_46.w_0']
pruned_ratios: [0.1,0.2,0.2,0.2,0.2,0.1,0.2,0.3,0.3,0.3,0.2,0.1,0.3,0.4,0.4,0.4,0.4,0.3]
print_prune_params: False
quant_config: {
'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max',
'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9,
'quantizable_layer_type': ['Conv2D', 'Linear']}
print_qat_model: True
......@@ -785,7 +785,7 @@ class Trainer(object):
save_dir)
# dy2st and save model
if 'slim' not in self.cfg or self.cfg['slim_type'] != 'QAT':
if 'slim' not in self.cfg or 'QAT' not in self.cfg['slim_type']:
paddle.jit.save(
static_model,
os.path.join(save_dir, 'model'),
......
......@@ -82,7 +82,7 @@ def build_slim_model(cfg, slim_cfg, mode='train'):
slim = create(cfg.slim)
cfg['slim_type'] = cfg.slim
# TODO: fix quant export model in framework.
if mode == 'test' and slim_load_cfg['slim'] == 'QAT':
if mode == 'test' and 'QAT' in slim_load_cfg['slim']:
slim.quant_config['activation_preprocess_type'] = None
cfg['model'] = slim(model)
cfg['slim'] = slim
......
......@@ -83,3 +83,69 @@ class Pruner(object):
pruned_flops, (ori_flops - pruned_flops) / ori_flops))
return model
@register
@serializable
class PrunerQAT(object):
def __init__(self, criterion, pruned_params, pruned_ratios,
print_prune_params, quant_config, print_qat_model):
super(PrunerQAT, self).__init__()
assert criterion in ['l1_norm', 'fpgm'], \
"unsupported prune criterion: {}".format(criterion)
# Pruner hyperparameter
self.criterion = criterion
self.pruned_params = pruned_params
self.pruned_ratios = pruned_ratios
self.print_prune_params = print_prune_params
# QAT hyperparameter
self.quant_config = quant_config
self.print_qat_model = print_qat_model
def __call__(self, model):
# FIXME: adapt to network graph when Training and inference are
# inconsistent, now only supports prune inference network graph.
model.eval()
paddleslim = try_import('paddleslim')
from paddleslim.analysis import dygraph_flops as flops
input_spec = [{
"image": paddle.ones(
shape=[1, 3, 640, 640], dtype='float32'),
"im_shape": paddle.full(
[1, 2], 640, dtype='float32'),
"scale_factor": paddle.ones(
shape=[1, 2], dtype='float32')
}]
if self.print_prune_params:
print_prune_params(model)
ori_flops = flops(model, input_spec) / 1000
logger.info("FLOPs before pruning: {}GFLOPs".format(ori_flops))
if self.criterion == 'fpgm':
pruner = paddleslim.dygraph.FPGMFilterPruner(model, input_spec)
elif self.criterion == 'l1_norm':
pruner = paddleslim.dygraph.L1NormFilterPruner(model, input_spec)
logger.info("pruned params: {}".format(self.pruned_params))
pruned_ratios = [float(n) for n in self.pruned_ratios]
ratios = {}
for i, param in enumerate(self.pruned_params):
ratios[param] = pruned_ratios[i]
pruner.prune_vars(ratios, [0])
pruned_flops = flops(model, input_spec) / 1000
logger.info("FLOPs after pruning: {}GFLOPs; pruned ratio: {}".format(
pruned_flops, (ori_flops - pruned_flops) / ori_flops))
self.quanter = paddleslim.dygraph.quant.QAT(config=self.quant_config)
self.quanter.quantize(model)
if self.print_qat_model:
logger.info("Quantized model:")
logger.info(model)
return model
def save_quantized_model(self, layer, path, input_spec=None, **config):
self.quanter.save_quantized_model(
model=layer, path=path, input_spec=input_spec, **config)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册