From 8cc39f55b51a07c895983a2cedb2fe56f8e87c74 Mon Sep 17 00:00:00 2001 From: whs Date: Fri, 24 Jun 2022 18:12:00 +0800 Subject: [PATCH] Add demo of ACT for ernie3.0 (#1184) --- demo/auto_compression/nlp/README.md | 35 ++++++++++++++----- .../nlp/configs/ernie3.0/afqmc.yaml | 9 +++++ .../nlp/configs/ernie3.0/cluewsc.yaml | 9 +++++ .../nlp/configs/ernie3.0/cmnli.yaml | 9 +++++ .../nlp/configs/ernie3.0/csl.yaml | 9 +++++ .../nlp/configs/ernie3.0/iflytek.yaml | 9 +++++ .../nlp/configs/ernie3.0/ocnli.yaml | 9 +++++ .../nlp/configs/ernie3.0/tnews.yaml | 9 +++++ .../configs/{ => pp-minilm/auto}/afqmc.yaml | 0 .../configs/{ => pp-minilm/auto}/cluewsc.yaml | 0 .../configs/{ => pp-minilm/auto}/cmnli.yaml | 0 .../nlp/configs/{ => pp-minilm/auto}/csl.yaml | 0 .../configs/{ => pp-minilm/auto}/iflytek.yaml | 0 .../configs/{ => pp-minilm/auto}/ocnli.yaml | 0 .../configs/{ => pp-minilm/auto}/tnews.yaml | 0 paddleslim/auto_compression/compressor.py | 3 ++ 16 files changed, 92 insertions(+), 9 deletions(-) create mode 100644 demo/auto_compression/nlp/configs/ernie3.0/afqmc.yaml create mode 100644 demo/auto_compression/nlp/configs/ernie3.0/cluewsc.yaml create mode 100644 demo/auto_compression/nlp/configs/ernie3.0/cmnli.yaml create mode 100644 demo/auto_compression/nlp/configs/ernie3.0/csl.yaml create mode 100644 demo/auto_compression/nlp/configs/ernie3.0/iflytek.yaml create mode 100644 demo/auto_compression/nlp/configs/ernie3.0/ocnli.yaml create mode 100644 demo/auto_compression/nlp/configs/ernie3.0/tnews.yaml rename demo/auto_compression/nlp/configs/{ => pp-minilm/auto}/afqmc.yaml (100%) rename demo/auto_compression/nlp/configs/{ => pp-minilm/auto}/cluewsc.yaml (100%) rename demo/auto_compression/nlp/configs/{ => pp-minilm/auto}/cmnli.yaml (100%) rename demo/auto_compression/nlp/configs/{ => pp-minilm/auto}/csl.yaml (100%) rename demo/auto_compression/nlp/configs/{ => pp-minilm/auto}/iflytek.yaml (100%) rename demo/auto_compression/nlp/configs/{ => pp-minilm/auto}/ocnli.yaml (100%) rename demo/auto_compression/nlp/configs/{ => pp-minilm/auto}/tnews.yaml (100%) diff --git a/demo/auto_compression/nlp/README.md b/demo/auto_compression/nlp/README.md index 590a29b2..58fe86f4 100644 --- a/demo/auto_compression/nlp/README.md +++ b/demo/auto_compression/nlp/README.md @@ -14,24 +14,29 @@ ## 1. 简介 -本示例将以自然语言处理模型PP-MiniLM为例,介绍如何使用PaddleNLP中Inference部署模型进行自动压缩。本示例使用的自动压缩策略为剪枝蒸馏和离线量化(Post-training quantization)。 +本示例将以自然语言处理模型PP-MiniLM和ERNIE 3.0-Medium为例,介绍如何使用PaddleNLP中Inference部署模型进行自动压缩. ## 2. Benchmark -- PP-MiniLM模型 -PP-MiniLM是一个6层的预训练中文小模型,使用PaddleNLP中```from_pretrained```导入PP-MiniLM之后,就可以在自己的数据集上进行fine-tuning,具体介绍可参考[PP-MiniLM文档](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples/model_compression/pp-minilm#PP-MiniLM%E4%B8%AD%E6%96%87%E5%B0%8F%E6%A8%A1%E5%9E%8B)。 +- PP-MiniLM: 6层的预训练中文小模型,使用PaddleNLP中```from_pretrained```导入PP-MiniLM之后,就可以在自己的数据集上进行fine-tuning,具体介绍可参考[PP-MiniLM文档](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples/model_compression/pp-minilm#PP-MiniLM%E4%B8%AD%E6%96%87%E5%B0%8F%E6%A8%A1%E5%9E%8B)。 +- ERNIE 3.0-Medium: 中文预训练模型, 关键参数为(6-layer, 768-hidden, 12-heads), 详情请参考[PaddleNLP ERNIE 3.0](https://github.com/PaddlePaddle/PaddleNLP/tree/v2.3.3/model_zoo/ernie-3.0) + 模型精度对比如下: | 模型 | 策略 | AFQMC | TNEWS | IFLYTEK | CMNLI | OCNLI | CLUEWSC2020 | CSL | AVG | |:------:|:------:|:------:|:------:|:------:|:------:|:-----------:|:------:|:------:|:------:| | PP-MiniLM | Base模型| 74.03 | 56.66 | 60.21 | 80.98 | 76.20 | 84.21 | 77.36 | 72.81 | | PP-MiniLM |剪枝蒸馏+离线量化| 73.56 | 56.38 | 59.87 | 80.80 | 76.44 | 82.23 | 77.77 | 72.44 | +| ERNIE 3.0-Medium | Base模型| 75.35 | 57.45 | 60.17 | 81.16 | 77.19 | 80.59 | 79.70 | 73.09 | +| ERNIE 3.0-Medium | 剪枝+量化训练| 74.17 | 56.84 | 59.75 | 80.54 | 76.03 | 76.97 | 80.80 | 72.16 | 模型在不同任务上平均精度以及加速对比如下: -| PP-MiniLM | Accuracy(avg) | 时延(ms) | 加速比 | -|:-------:|:----------:|:------------:| :------:| -| 压缩前 | 72.81 | 128.01 | - | -| 压缩后 | 72.44 | 17.97 | 612% | +| 模型 |策略| Accuracy(avg) | 时延(ms) | 加速比 | +|:-------:|:--------:|:----------:|:------------:| :------:| +|PP-MiniLM| Base模型| 72.81 | 128.01 | - | +|PP-MiniLM| 剪枝+离线量化 | 72.44 | 17.97 | 7.12 | +|ERNIE 3.0-Medium| Base模型| 73.09 | 29.25(fp16) | - | +|ERNIE 3.0-Medium| 剪枝+量化训练 | 72.16 | 19.61 | 1.49 | 性能测试的环境为 - 硬件:NVIDIA Tesla T4 单卡 @@ -76,13 +81,25 @@ pip install paddlenlp 注:其他像`__model__`和`__params__`分别对应`model.pdmodel` 和 `model.pdiparams`文件。 -本示例可参考[PaddleNLP PP-MiniLM 中文小模型](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples/model_compression/pp-minilm)微调后保存下每个数据集下有最高准确率的模型,也可直接下载以下已微调完成的Inference模型:[afqmc](https://bj.bcebos.com/v1/paddle-slim-models/act/afqmc.tar), [tnews](https://bj.bcebos.com/v1/paddle-slim-models/act/tnews.tar), [iflytek](https://bj.bcebos.com/v1/paddle-slim-models/act/iflytek.tar),[ ocnli](https://bj.bcebos.com/v1/paddle-slim-models/act/ocnli.tar), [cmnli](https://bj.bcebos.com/v1/paddle-slim-models/act/cmnli.tar), [cluewsc2020](https://bj.bcebos.com/v1/paddle-slim-models/act/cluewsc.tar), [csl](https://bj.bcebos.com/v1/paddle-slim-models/act/csl.tar)。其他模型可根据[PaddleNLP文档](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples)导出Inference模型。 +##### 直接下载已微调模型 + +| 模型 | AFQMC | TNEWS | IFLYTEK | CMNLI | OCNLI | CLUEWSC2020 | CSL | +|:------:|:------:|:------:|:------:|:------:|:-----------:|:------:|:------:| +| PP-MiniLM | [afqmc](https://bj.bcebos.com/v1/paddle-slim-models/act/afqmc.tar) | [tnews](https://bj.bcebos.com/v1/paddle-slim-models/act/tnews.tar) | [iflytek](https://bj.bcebos.com/v1/paddle-slim-models/act/iflytek.tar) | [cmnli](https://bj.bcebos.com/v1/paddle-slim-models/act/cmnli.tar) | [ ocnli](https://bj.bcebos.com/v1/paddle-slim-models/act/ocnli.tar) | [cluewsc2020](https://bj.bcebos.com/v1/paddle-slim-models/act/cluewsc.tar) | [csl](https://bj.bcebos.com/v1/paddle-slim-models/act/csl.tar) | +| ERNIE 3.0-Medium | [afqmc]() | [tnews]() | [iflytek]() | [cmnli]() | [ocnli]() | [cluewsc2020]() | [csl]() | + +从上表获得模型超链接, 并用以下命令下载推理模型文件: ```shell wget https://bj.bcebos.com/v1/paddle-slim-models/act/afqmc.tar tar -zxvf afqmc.tar ``` +##### 重新微调模型 + +可参考[PaddleNLP PP-MiniLM 中文小模型](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples/model_compression/pp-minilm)微调后保存下每个数据集下有最高准确率的模型。 +其他模型可根据[PaddleNLP文档](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples)导出Inference模型。 + #### 3.4 自动压缩并产出模型 自动压缩示例通过run.py脚本启动,会使用接口```paddleslim.auto_compression.AutoCompression```对模型进行自动压缩。配置config文件中训练部分的参数,将任务名称、模型类型、数据集名称、压缩参数传入,配置完成后便可对模型进行剪枝、蒸馏训练和离线量化。 @@ -100,7 +117,7 @@ python run.py \ --batch_size=16 \ --max_seq_length=128 \ --task_name='afqmc' \ - --config_path='./configs/afqmc.yaml' + --config_path='./configs/pp-minilm/auto/afqmc.yaml' ``` ## 4. 压缩配置介绍 diff --git a/demo/auto_compression/nlp/configs/ernie3.0/afqmc.yaml b/demo/auto_compression/nlp/configs/ernie3.0/afqmc.yaml new file mode 100644 index 00000000..2f245672 --- /dev/null +++ b/demo/auto_compression/nlp/configs/ernie3.0/afqmc.yaml @@ -0,0 +1,9 @@ +TrainConfig: + epochs: 6 + eval_iter: 1070 + learning_rate: 2.0e-5 + optimizer_builder: + optimizer: + type: AdamW + weight_decay: 0.01 + origin_metric: 0.7334 diff --git a/demo/auto_compression/nlp/configs/ernie3.0/cluewsc.yaml b/demo/auto_compression/nlp/configs/ernie3.0/cluewsc.yaml new file mode 100644 index 00000000..1c4c83fb --- /dev/null +++ b/demo/auto_compression/nlp/configs/ernie3.0/cluewsc.yaml @@ -0,0 +1,9 @@ +TrainConfig: + epochs: 100 + eval_iter: 70 + learning_rate: 1.0e-5 + optimizer_builder: + optimizer: + type: AdamW + weight_decay: 0.01 + origin_metric: 0.7928 diff --git a/demo/auto_compression/nlp/configs/ernie3.0/cmnli.yaml b/demo/auto_compression/nlp/configs/ernie3.0/cmnli.yaml new file mode 100644 index 00000000..531ca703 --- /dev/null +++ b/demo/auto_compression/nlp/configs/ernie3.0/cmnli.yaml @@ -0,0 +1,9 @@ +TrainConfig: + epochs: 6 + eval_iter: 2000 + learning_rate: 3.0e-5 + optimizer_builder: + optimizer: + type: AdamW + weight_decay: 0.01 + origin_metric: 0.8064 diff --git a/demo/auto_compression/nlp/configs/ernie3.0/csl.yaml b/demo/auto_compression/nlp/configs/ernie3.0/csl.yaml new file mode 100644 index 00000000..de726bff --- /dev/null +++ b/demo/auto_compression/nlp/configs/ernie3.0/csl.yaml @@ -0,0 +1,9 @@ +TrainConfig: + epochs: 16 + eval_iter: 1000 + learning_rate: 1.0e-5 + optimizer_builder: + optimizer: + type: AdamW + weight_decay: 0.01 + origin_metric: 0.8160 diff --git a/demo/auto_compression/nlp/configs/ernie3.0/iflytek.yaml b/demo/auto_compression/nlp/configs/ernie3.0/iflytek.yaml new file mode 100644 index 00000000..1dc3066b --- /dev/null +++ b/demo/auto_compression/nlp/configs/ernie3.0/iflytek.yaml @@ -0,0 +1,9 @@ +TrainConfig: + epochs: 12 + eval_iter: 750 + learning_rate: 2.0e-5 + optimizer_builder: + optimizer: + type: AdamW + weight_decay: 0.01 + origin_metric: 0.6067 diff --git a/demo/auto_compression/nlp/configs/ernie3.0/ocnli.yaml b/demo/auto_compression/nlp/configs/ernie3.0/ocnli.yaml new file mode 100644 index 00000000..51170807 --- /dev/null +++ b/demo/auto_compression/nlp/configs/ernie3.0/ocnli.yaml @@ -0,0 +1,9 @@ +TrainConfig: + epochs: 20 + eval_iter: 1050 + learning_rate: 3.0e-5 + optimizer_builder: + optimizer: + type: AdamW + weight_decay: 0.01 + origin_metric: 0.7688 diff --git a/demo/auto_compression/nlp/configs/ernie3.0/tnews.yaml b/demo/auto_compression/nlp/configs/ernie3.0/tnews.yaml new file mode 100644 index 00000000..0ce9bd11 --- /dev/null +++ b/demo/auto_compression/nlp/configs/ernie3.0/tnews.yaml @@ -0,0 +1,9 @@ +TrainConfig: + epochs: 6 + eval_iter: 1110 + learning_rate: 2.0e-5 + optimizer_builder: + optimizer: + type: AdamW + weight_decay: 0.01 + origin_metric: 0.5700 diff --git a/demo/auto_compression/nlp/configs/afqmc.yaml b/demo/auto_compression/nlp/configs/pp-minilm/auto/afqmc.yaml similarity index 100% rename from demo/auto_compression/nlp/configs/afqmc.yaml rename to demo/auto_compression/nlp/configs/pp-minilm/auto/afqmc.yaml diff --git a/demo/auto_compression/nlp/configs/cluewsc.yaml b/demo/auto_compression/nlp/configs/pp-minilm/auto/cluewsc.yaml similarity index 100% rename from demo/auto_compression/nlp/configs/cluewsc.yaml rename to demo/auto_compression/nlp/configs/pp-minilm/auto/cluewsc.yaml diff --git a/demo/auto_compression/nlp/configs/cmnli.yaml b/demo/auto_compression/nlp/configs/pp-minilm/auto/cmnli.yaml similarity index 100% rename from demo/auto_compression/nlp/configs/cmnli.yaml rename to demo/auto_compression/nlp/configs/pp-minilm/auto/cmnli.yaml diff --git a/demo/auto_compression/nlp/configs/csl.yaml b/demo/auto_compression/nlp/configs/pp-minilm/auto/csl.yaml similarity index 100% rename from demo/auto_compression/nlp/configs/csl.yaml rename to demo/auto_compression/nlp/configs/pp-minilm/auto/csl.yaml diff --git a/demo/auto_compression/nlp/configs/iflytek.yaml b/demo/auto_compression/nlp/configs/pp-minilm/auto/iflytek.yaml similarity index 100% rename from demo/auto_compression/nlp/configs/iflytek.yaml rename to demo/auto_compression/nlp/configs/pp-minilm/auto/iflytek.yaml diff --git a/demo/auto_compression/nlp/configs/ocnli.yaml b/demo/auto_compression/nlp/configs/pp-minilm/auto/ocnli.yaml similarity index 100% rename from demo/auto_compression/nlp/configs/ocnli.yaml rename to demo/auto_compression/nlp/configs/pp-minilm/auto/ocnli.yaml diff --git a/demo/auto_compression/nlp/configs/tnews.yaml b/demo/auto_compression/nlp/configs/pp-minilm/auto/tnews.yaml similarity index 100% rename from demo/auto_compression/nlp/configs/tnews.yaml rename to demo/auto_compression/nlp/configs/pp-minilm/auto/tnews.yaml diff --git a/paddleslim/auto_compression/compressor.py b/paddleslim/auto_compression/compressor.py index 7b57e6d9..96e791ca 100644 --- a/paddleslim/auto_compression/compressor.py +++ b/paddleslim/auto_compression/compressor.py @@ -15,6 +15,7 @@ import logging import os import sys +import copy import numpy as np import copy import inspect @@ -300,6 +301,7 @@ class AutoCompression: def _prepare_envs(self): devices = paddle.device.get_device().split(':')[0] places = paddle.device._convert_to_place(devices) + _logger.info(f"devices: {devices}") exe = paddle.static.Executor(places) return exe, places @@ -309,6 +311,7 @@ class AutoCompression: model_filename=model_filename, params_filename=params_filename, executor=exe) _, _, model_type = get_patterns(inference_program) + _logger.info(f"Detect model type: {model_type}") return model_type def _prepare_strategy(self, strategy_config): -- GitLab