diff --git a/example/auto_compression/nlp/README.md b/example/auto_compression/nlp/README.md index 5fabc771bc35e56625dfe1b500cd03b5476a5521..fd83f8de5242c7173f29cd8ef4bd3bfb78bf32d8 100644 --- a/example/auto_compression/nlp/README.md +++ b/example/auto_compression/nlp/README.md @@ -110,10 +110,10 @@ export CUDA_VISIBLE_DEVICES=0 python run.py --config_path='./configs/pp-minilm/auto/afqmc.yaml' --save_dir='./save_afqmc_pruned/' ``` -如仅需验证模型精度,或验证压缩之后模型精度,在启动```run.py```脚本时,将配置文件中模型文件夹 ```model_dir``` 改为压缩之后保存的文件夹路径 ```./output/cola/``` ,命令加上```--eval True```即可: +如仅需验证模型精度,或验证压缩之后模型精度,在启动```run.py```脚本时,将配置文件中模型文件夹 ```model_dir``` 改为压缩之后保存的文件夹路径 ```./save_afqmc_pruned``` ,命令加上```--eval True```即可: ```shell export CUDA_VISIBLE_DEVICES=0 -python run.py --config_path=./configs/cola.yaml --eval True +python run.py --config_path='./configs/pp-minilm/auto/afqmc.yaml' --eval True ``` ## 4. 压缩配置介绍 diff --git a/example/auto_compression/nlp/configs/ernie3.0/afqmc.yaml b/example/auto_compression/nlp/configs/ernie3.0/afqmc.yaml index 261e8635e0be569a8b08821801389c4e07f500a8..b5ba0f14975713c692433575caeb5d70c173c1b1 100644 --- a/example/auto_compression/nlp/configs/ernie3.0/afqmc.yaml +++ b/example/auto_compression/nlp/configs/ernie3.0/afqmc.yaml @@ -1,7 +1,7 @@ Global: model_dir: ./AFQMC - model_filename: inference.pdmodel - params_filename: inference.pdiparams + model_filename: infer.pdmodel + params_filename: infer.pdiparams task_name: afqmc dataset: clue batch_size: 16 diff --git a/example/auto_compression/nlp/configs/ernie3.0/cluewsc.yaml b/example/auto_compression/nlp/configs/ernie3.0/cluewsc.yaml index a7f48f92eb86fc0febce46a4ec627f7902be4403..2d4c0e4de57b9859ba3f202f29974f5656557c9d 100644 --- a/example/auto_compression/nlp/configs/ernie3.0/cluewsc.yaml +++ b/example/auto_compression/nlp/configs/ernie3.0/cluewsc.yaml @@ -1,7 +1,7 @@ Global: model_dir: ./CLUEWSC - model_filename: inference.pdmodel - params_filename: inference.pdiparams + model_filename: infer.pdmodel + params_filename: infer.pdiparams task_name: cluewsc dataset: clue batch_size: 16 diff --git a/example/auto_compression/nlp/configs/ernie3.0/cmnli.yaml b/example/auto_compression/nlp/configs/ernie3.0/cmnli.yaml index 4ccfd53cc67f49655ce29bbc6de5200e8acefa4a..67b9ff0b1612a6733ae8014b96acf2fcb12cf14e 100644 --- a/example/auto_compression/nlp/configs/ernie3.0/cmnli.yaml +++ b/example/auto_compression/nlp/configs/ernie3.0/cmnli.yaml @@ -1,7 +1,7 @@ Global: model_dir: ./CMNLI - model_filename: inference.pdmodel - params_filename: inference.pdiparams + model_filename: infer.pdmodel + params_filename: infer.pdiparams task_name: cmnli dataset: clue batch_size: 16 diff --git a/example/auto_compression/nlp/configs/ernie3.0/csl.yaml b/example/auto_compression/nlp/configs/ernie3.0/csl.yaml index 8b5172f083cc6f1be5a0a805390f9d0acd95ac3d..70ccbea85fdd14ef1993b4990e9518acbd8f2d42 100644 --- a/example/auto_compression/nlp/configs/ernie3.0/csl.yaml +++ b/example/auto_compression/nlp/configs/ernie3.0/csl.yaml @@ -1,7 +1,7 @@ Global: model_dir: ./CSL - model_filename: inference.pdmodel - params_filename: inference.pdiparams + model_filename: infer.pdmodel + params_filename: infer.pdiparams task_name: csl dataset: clue batch_size: 16 diff --git a/example/auto_compression/nlp/configs/ernie3.0/iflytek.yaml b/example/auto_compression/nlp/configs/ernie3.0/iflytek.yaml index 0e766ada10f25c7ad9f174a9092c5b66ab4ddecd..7a74e16d16e85f3307d9aa48e16b17d08d5b2f32 100644 --- a/example/auto_compression/nlp/configs/ernie3.0/iflytek.yaml +++ b/example/auto_compression/nlp/configs/ernie3.0/iflytek.yaml @@ -1,7 +1,7 @@ Global: model_dir: ./IFLYTEK - model_filename: inference.pdmodel - params_filename: inference.pdiparams + model_filename: infer.pdmodel + params_filename: infer.pdiparams task_name: iflytek dataset: clue batch_size: 16 diff --git a/example/auto_compression/nlp/configs/ernie3.0/ocnli.yaml b/example/auto_compression/nlp/configs/ernie3.0/ocnli.yaml index f00a770c89158a13d6ce85cf7e2f986920eb828d..929a2ff181ebfd9c0e34bfc3b937d3b599b10f27 100644 --- a/example/auto_compression/nlp/configs/ernie3.0/ocnli.yaml +++ b/example/auto_compression/nlp/configs/ernie3.0/ocnli.yaml @@ -1,7 +1,7 @@ Global: model_dir: ./OCNLI - model_filename: inference.pdmodel - params_filename: inference.pdiparams + model_filename: infer.pdmodel + params_filename: infer.pdiparams task_name: ocnli dataset: clue batch_size: 16 diff --git a/example/auto_compression/nlp/configs/ernie3.0/tnews.yaml b/example/auto_compression/nlp/configs/ernie3.0/tnews.yaml index 9682f2bb00cfc995e8dfd30eee21f5a015b7aa0b..49093ab872cd5ed1ebd78d5b6d85b400a3e6a568 100644 --- a/example/auto_compression/nlp/configs/ernie3.0/tnews.yaml +++ b/example/auto_compression/nlp/configs/ernie3.0/tnews.yaml @@ -1,7 +1,7 @@ Global: model_dir: ./TNEWS - model_filename: inference.pdmodel - params_filename: inference.pdiparams + model_filename: infer.pdmodel + params_filename: infer.pdiparams task_name: tnews dataset: clue batch_size: 16 diff --git a/example/auto_compression/nlp/run.py b/example/auto_compression/nlp/run.py index 04ad4f293c5fb8c8a8c01df0c4d982bdd31b49dd..e09a92244dc0a3433a440abdff32e875d54d0f66 100644 --- a/example/auto_compression/nlp/run.py +++ b/example/auto_compression/nlp/run.py @@ -306,6 +306,11 @@ def main(): eval_dataloader=eval_dataloader) ac.compress() + for file_name in os.listdir(global_config['model_dir']): + if 'json' in file_name or 'txt' in file_name: + shutil.copy( + os.path.join(global_config['model_dir'], file_name), + args.save_dir) if __name__ == '__main__': diff --git a/example/auto_compression/pytorch_huggingface/README.md b/example/auto_compression/pytorch_huggingface/README.md index 465414c95da70fd78f252a7a40ece1a647ba888e..d2cdd87bda25d1a0043c5c6c43ca253342d0cf0d 100644 --- a/example/auto_compression/pytorch_huggingface/README.md +++ b/example/auto_compression/pytorch_huggingface/README.md @@ -32,13 +32,13 @@ 模型在多个任务上平均精度以及加速对比如下: | bert-base-cased | Accuracy(avg) | 时延(ms) | 加速比 | |:-------:|:----------:|:------------:| :------:| -| 压缩前 | 81.35 | 8.18 | - | -| 压缩后 | 81.53 | 6.35 | 1.29 | +| 压缩前 | 81.35 | 11.60 | - | +| 压缩后 | 81.53 | 4.83 | 2.40 | - Nvidia GPU 测试环境: - 硬件:NVIDIA Tesla T4 单卡 - 软件:CUDA 11.2, cuDNN 8.0, TensorRT 8.4 - - 测试配置:batch_size: 1, seqence length: 128 + - 测试配置:batch_size: 40, seqence length: 128 ## 3. 自动压缩流程 #### 3.1 准备环境 @@ -74,12 +74,6 @@ git checkout develop python setup.py install ``` -安装transformers: -```shell -pip install transformers -``` -注:安装transformers的目的是为了使用transformers中的Tokenizer。 - 安装paddlenlp: ```shell pip install paddlenlp @@ -101,10 +95,10 @@ import torch import numpy as np # 将PyTorch模型设置为eval模式 torch_model.eval() -# 构建输入 -input_ids = torch.unsqueeze(torch.tensor([0] * max_length), 0) -token_type_ids = torch.unsqueeze(torch.tensor([0] * max_length), 0) -attention_msk = torch.unsqueeze(torch.tensor([0] * max_length), 0) +# 构建输入, +input_ids = torch.zeros([batch_size, max_length]).long() +token_type_ids = torch.zeros([batch_size, max_length]).long() +attention_msk = torch.zeros([batch_size, max_length]).long() # 进行转换 from x2paddle.convert import pytorch2paddle pytorch2paddle(torch_model, @@ -120,7 +114,7 @@ PyTorch2Paddle支持trace和script两种方式的转换,均是PyTorch动态图 注意: - 由于自动压缩的是静态图模型,所以这里需要将```jit_type```设置为```trace```,并且注意PyTorch模型中需要设置```pad_to_max_length```,且设置的```max_length```需要和转换时构建的数据相同。 - HuggingFace默认输入```attention_mask```,PaddleNLP默认不输入,这里需要保持一致。可以PaddleNLP中设置```return_attention_mask=True```。 -- 使用PaddleNLP的tokenizer时需要在模型保存的文件夹中加入```model_config.json, special_tokens_map.json, tokenizer_config.json, vocab.txt```这些文件。 +- 使用PaddleNLP的tokenizer时需要在模型保存的文件夹中加入tokenizer的配置文件,可使用PaddleNLP中训练后自动保存的 ```model_config.json,special_tokens_map.json, tokenizer_config.json, vocab.txt```,也可使用Huggingface训练后自动保存的 ```config.json,special_tokens_map.json, tokenizer_config.json, vocab.txt```。 更多Pytorch2Paddle示例可参考[PyTorch模型转换文档](https://github.com/PaddlePaddle/X2Paddle/blob/develop/docs/inference_model_convertor/pytorch2paddle.md)。其他框架转换可参考[X2Paddle模型转换工具](https://github.com/PaddlePaddle/X2Paddle) @@ -191,7 +185,7 @@ export CUDA_VISIBLE_DEVICES=0 python run.py --config_path=./configs/cola.yaml --save_dir='./output/cola/' ``` -如仅需验证模型精度,或验证压缩之后模型精度,在启动```run.py```脚本时,将配置文件中模型文件夹 ```model_dir``` 改为压缩之后保存的文件夹路径 ```./output/cola/``` ,命令加上```--eval True```即可: +如仅需验证模型精度,或验证压缩之后模型精度,在启动```run.py```脚本时,将配置文件中模型文件夹 ```model_dir``` 改为压缩之后保存的文件夹路径 ```./output/cola``` ,命令加上```--eval True```即可: ```shell export CUDA_VISIBLE_DEVICES=0 python run.py --config_path=./configs/cola.yaml --eval True diff --git a/example/auto_compression/pytorch_huggingface/run.py b/example/auto_compression/pytorch_huggingface/run.py index 9b3467e83fb90aeb79b852459a64265b27074976..b723be4d3e3a01ef5a962167857f1197dda29b12 100644 --- a/example/auto_compression/pytorch_huggingface/run.py +++ b/example/auto_compression/pytorch_huggingface/run.py @@ -20,9 +20,10 @@ import paddle import paddle.nn as nn import functools from functools import partial +import shutil from paddle.io import Dataset, BatchSampler, DataLoader from paddle.metric import Metric, Accuracy -from transformers import AutoTokenizer +from paddlenlp.transformers import AutoModelForTokenClassification, AutoTokenizer from paddlenlp.datasets import load_dataset from paddlenlp.data import Stack, Tuple, Pad from paddlenlp.metrics import AccuracyAndF1, Mcc, PearsonAndSpearman @@ -164,7 +165,10 @@ def reader(): ): fn(samples) train_batch_sampler = paddle.io.BatchSampler( - train_ds, batch_size=global_config['batch_size'], shuffle=True) + train_ds, + batch_size=global_config['batch_size'], + shuffle=True, + drop_last=True) feed_list = create_data_holder(global_config['task_name'], global_config['input_names']) @@ -208,7 +212,8 @@ def reader(): dev_batch_sampler_matched = paddle.io.BatchSampler( dev_ds_matched, batch_size=global_config['batch_size'], - shuffle=False) + shuffle=False, + drop_last=True) dev_data_loader_matched = DataLoader( dataset=dev_ds_matched, batch_sampler=dev_batch_sampler_matched, @@ -219,21 +224,26 @@ def reader(): dev_batch_sampler_mismatched = paddle.io.BatchSampler( dev_ds_mismatched, batch_size=global_config['batch_size'], - shuffle=False) + shuffle=False, + drop_last=True) dev_data_loader_mismatched = DataLoader( dataset=dev_ds_mismatched, batch_sampler=dev_batch_sampler_mismatched, collate_fn=batchify_fn, num_workers=0, feed_list=feed_list, - return_list=False) + return_list=False, + drop_last=True) return train_data_loader, dev_data_loader_matched, dev_data_loader_mismatched else: dev_ds = load_dataset( global_config['dataset'], global_config['task_name'], splits='dev') dev_ds = dev_ds.map(dev_trans_func, lazy=True) dev_batch_sampler = paddle.io.BatchSampler( - dev_ds, batch_size=global_config['batch_size'], shuffle=False) + dev_ds, + batch_size=global_config['batch_size'], + shuffle=False, + drop_last=True) dev_data_loader = DataLoader( dataset=dev_ds, batch_sampler=dev_batch_sampler, @@ -355,6 +365,12 @@ def main(): ac.compress() + for file_name in os.listdir(global_config['model_dir']): + if 'json' in file_name or 'txt' in file_name: + shutil.copy( + os.path.join(global_config['model_dir'], file_name), + args.save_dir) + if __name__ == '__main__': paddle.enable_static() diff --git a/example/auto_compression/pytorch_huggingface/run.sh b/example/auto_compression/pytorch_huggingface/run.sh deleted file mode 100644 index eb444ba06f3c18bc3cc19092bb8b1a647d9301a4..0000000000000000000000000000000000000000 --- a/example/auto_compression/pytorch_huggingface/run.sh +++ /dev/null @@ -1,2 +0,0 @@ -export CUDA_VISIBLE_DEVICES=0 -python run.py --config_path=./configs/cola.yaml --save_dir='./output/cola/'