From 91fcce52520e6ecb6b323d70bccce83e72fc5c27 Mon Sep 17 00:00:00 2001 From: Chang Xu Date: Fri, 1 Jul 2022 11:50:26 +0800 Subject: [PATCH] Update NLP & HuggingFace Demo (#1212) * Change Tokenizer in HuggingFace Demo * Change Tokenizer in HuggingFace Demo * Change Tokenizer in HuggingFace Demo * Change Tokenizer in HuggingFace Demo * Change Tokenizer in HuggingFace Demo * Change Tokenizer in HuggingFace Demo --- demo/auto_compression/nlp/README.md | 18 +-- .../nlp/configs/ernie3.0/afqmc.yaml | 8 ++ .../nlp/configs/ernie3.0/cluewsc.yaml | 8 ++ .../nlp/configs/ernie3.0/cmnli.yaml | 8 ++ .../nlp/configs/ernie3.0/csl.yaml | 8 ++ .../nlp/configs/ernie3.0/iflytek.yaml | 8 ++ .../nlp/configs/ernie3.0/ocnli.yaml | 8 ++ .../nlp/configs/ernie3.0/tnews.yaml | 8 ++ .../nlp/configs/pp-minilm/auto/afqmc.yaml | 8 ++ .../nlp/configs/pp-minilm/auto/cluewsc.yaml | 8 ++ .../nlp/configs/pp-minilm/auto/cmnli.yaml | 8 ++ .../nlp/configs/pp-minilm/auto/csl.yaml | 8 ++ .../nlp/configs/pp-minilm/auto/iflytek.yaml | 8 ++ .../nlp/configs/pp-minilm/auto/ocnli.yaml | 8 ++ .../nlp/configs/pp-minilm/auto/tnews.yaml | 8 ++ demo/auto_compression/nlp/run.py | 115 ++++++++++-------- demo/auto_compression/nlp/run.sh | 13 +- .../pytorch_huggingface/README.md | 21 +++- .../pytorch_huggingface/run.py | 7 +- 19 files changed, 203 insertions(+), 83 deletions(-) diff --git a/demo/auto_compression/nlp/README.md b/demo/auto_compression/nlp/README.md index be4d9bae..5fabc771 100644 --- a/demo/auto_compression/nlp/README.md +++ b/demo/auto_compression/nlp/README.md @@ -107,20 +107,14 @@ tar -zxvf afqmc.tar : ```shell export CUDA_VISIBLE_DEVICES=0 -python run.py \ - --model_type='ppminilm' \ - --model_dir='./afqmc' \ - --model_filename='inference.pdmodel' \ - --params_filename='inference.pdiparams' \ - --dataset='clue' \ - --save_dir='./save_afqmc_pruned/' \ - --batch_size=16 \ - --max_seq_length=128 \ - --task_name='afqmc' \ - --config_path='./configs/pp-minilm/auto/afqmc.yaml' +python run.py --config_path='./configs/pp-minilm/auto/afqmc.yaml' --save_dir='./save_afqmc_pruned/' ``` -如仅需验证模型精度,在启动```run.py```脚本时,命令加上```--eval=True```即可。 +如仅需验证模型精度,或验证压缩之后模型精度,在启动```run.py```脚本时,将配置文件中模型文件夹 ```model_dir``` 改为压缩之后保存的文件夹路径 ```./output/cola/``` ,命令加上```--eval True```即可: +```shell +export CUDA_VISIBLE_DEVICES=0 +python run.py --config_path=./configs/cola.yaml --eval True +``` ## 4. 压缩配置介绍 自动压缩需要准备config文件,并传入```config_path```字段,configs文件夹下可查看不同任务的配置文件,以下示例以afqmc数据集为例介绍。训练参数需要自行配置。蒸馏、剪枝和离线量化的相关配置,自动压缩策略可以自动获取得到,也可以自行配置。PaddleNLP模型的自动压缩实验默认使用剪枝、蒸馏和离线量化的策略。 diff --git a/demo/auto_compression/nlp/configs/ernie3.0/afqmc.yaml b/demo/auto_compression/nlp/configs/ernie3.0/afqmc.yaml index 2f245672..261e8635 100644 --- a/demo/auto_compression/nlp/configs/ernie3.0/afqmc.yaml +++ b/demo/auto_compression/nlp/configs/ernie3.0/afqmc.yaml @@ -1,3 +1,11 @@ +Global: + model_dir: ./AFQMC + model_filename: inference.pdmodel + params_filename: inference.pdiparams + task_name: afqmc + dataset: clue + batch_size: 16 + max_seq_length: 128 TrainConfig: epochs: 6 eval_iter: 1070 diff --git a/demo/auto_compression/nlp/configs/ernie3.0/cluewsc.yaml b/demo/auto_compression/nlp/configs/ernie3.0/cluewsc.yaml index 1c4c83fb..a7f48f92 100644 --- a/demo/auto_compression/nlp/configs/ernie3.0/cluewsc.yaml +++ b/demo/auto_compression/nlp/configs/ernie3.0/cluewsc.yaml @@ -1,3 +1,11 @@ +Global: + model_dir: ./CLUEWSC + model_filename: inference.pdmodel + params_filename: inference.pdiparams + task_name: cluewsc + dataset: clue + batch_size: 16 + max_seq_length: 128 TrainConfig: epochs: 100 eval_iter: 70 diff --git a/demo/auto_compression/nlp/configs/ernie3.0/cmnli.yaml b/demo/auto_compression/nlp/configs/ernie3.0/cmnli.yaml index 531ca703..4ccfd53c 100644 --- a/demo/auto_compression/nlp/configs/ernie3.0/cmnli.yaml +++ b/demo/auto_compression/nlp/configs/ernie3.0/cmnli.yaml @@ -1,3 +1,11 @@ +Global: + model_dir: ./CMNLI + model_filename: inference.pdmodel + params_filename: inference.pdiparams + task_name: cmnli + dataset: clue + batch_size: 16 + max_seq_length: 128 TrainConfig: epochs: 6 eval_iter: 2000 diff --git a/demo/auto_compression/nlp/configs/ernie3.0/csl.yaml b/demo/auto_compression/nlp/configs/ernie3.0/csl.yaml index de726bff..8b5172f0 100644 --- a/demo/auto_compression/nlp/configs/ernie3.0/csl.yaml +++ b/demo/auto_compression/nlp/configs/ernie3.0/csl.yaml @@ -1,3 +1,11 @@ +Global: + model_dir: ./CSL + model_filename: inference.pdmodel + params_filename: inference.pdiparams + task_name: csl + dataset: clue + batch_size: 16 + max_seq_length: 128 TrainConfig: epochs: 16 eval_iter: 1000 diff --git a/demo/auto_compression/nlp/configs/ernie3.0/iflytek.yaml b/demo/auto_compression/nlp/configs/ernie3.0/iflytek.yaml index 1dc3066b..0e766ada 100644 --- a/demo/auto_compression/nlp/configs/ernie3.0/iflytek.yaml +++ b/demo/auto_compression/nlp/configs/ernie3.0/iflytek.yaml @@ -1,3 +1,11 @@ +Global: + model_dir: ./IFLYTEK + model_filename: inference.pdmodel + params_filename: inference.pdiparams + task_name: iflytek + dataset: clue + batch_size: 16 + max_seq_length: 128 TrainConfig: epochs: 12 eval_iter: 750 diff --git a/demo/auto_compression/nlp/configs/ernie3.0/ocnli.yaml b/demo/auto_compression/nlp/configs/ernie3.0/ocnli.yaml index 51170807..f00a770c 100644 --- a/demo/auto_compression/nlp/configs/ernie3.0/ocnli.yaml +++ b/demo/auto_compression/nlp/configs/ernie3.0/ocnli.yaml @@ -1,3 +1,11 @@ +Global: + model_dir: ./OCNLI + model_filename: inference.pdmodel + params_filename: inference.pdiparams + task_name: ocnli + dataset: clue + batch_size: 16 + max_seq_length: 128 TrainConfig: epochs: 20 eval_iter: 1050 diff --git a/demo/auto_compression/nlp/configs/ernie3.0/tnews.yaml b/demo/auto_compression/nlp/configs/ernie3.0/tnews.yaml index 0ce9bd11..9682f2bb 100644 --- a/demo/auto_compression/nlp/configs/ernie3.0/tnews.yaml +++ b/demo/auto_compression/nlp/configs/ernie3.0/tnews.yaml @@ -1,3 +1,11 @@ +Global: + model_dir: ./TNEWS + model_filename: inference.pdmodel + params_filename: inference.pdiparams + task_name: tnews + dataset: clue + batch_size: 16 + max_seq_length: 128 TrainConfig: epochs: 6 eval_iter: 1110 diff --git a/demo/auto_compression/nlp/configs/pp-minilm/auto/afqmc.yaml b/demo/auto_compression/nlp/configs/pp-minilm/auto/afqmc.yaml index 2f9b8534..8ee2e8f5 100644 --- a/demo/auto_compression/nlp/configs/pp-minilm/auto/afqmc.yaml +++ b/demo/auto_compression/nlp/configs/pp-minilm/auto/afqmc.yaml @@ -1,3 +1,11 @@ +Global: + model_dir: ./afqmc + model_filename: inference.pdmodel + params_filename: inference.pdiparams + task_name: afqmc + dataset: clue + batch_size: 16 + max_seq_length: 128 TrainConfig: epochs: 6 eval_iter: 1070 diff --git a/demo/auto_compression/nlp/configs/pp-minilm/auto/cluewsc.yaml b/demo/auto_compression/nlp/configs/pp-minilm/auto/cluewsc.yaml index 487667b7..55e41db8 100644 --- a/demo/auto_compression/nlp/configs/pp-minilm/auto/cluewsc.yaml +++ b/demo/auto_compression/nlp/configs/pp-minilm/auto/cluewsc.yaml @@ -1,3 +1,11 @@ +Global: + model_dir: ./cluewsc + model_filename: inference.pdmodel + params_filename: inference.pdiparams + task_name: cluewsc + dataset: clue + batch_size: 16 + max_seq_length: 128 TrainConfig: epochs: 100 eval_iter: 70 diff --git a/demo/auto_compression/nlp/configs/pp-minilm/auto/cmnli.yaml b/demo/auto_compression/nlp/configs/pp-minilm/auto/cmnli.yaml index dd2a2640..81dd5a07 100644 --- a/demo/auto_compression/nlp/configs/pp-minilm/auto/cmnli.yaml +++ b/demo/auto_compression/nlp/configs/pp-minilm/auto/cmnli.yaml @@ -1,3 +1,11 @@ +Global: + model_dir: ./cmnli + model_filename: inference.pdmodel + params_filename: inference.pdiparams + task_name: cmnli + dataset: clue + batch_size: 16 + max_seq_length: 128 TrainConfig: epochs: 6 eval_iter: 2000 diff --git a/demo/auto_compression/nlp/configs/pp-minilm/auto/csl.yaml b/demo/auto_compression/nlp/configs/pp-minilm/auto/csl.yaml index b7afcb68..f51246ae 100644 --- a/demo/auto_compression/nlp/configs/pp-minilm/auto/csl.yaml +++ b/demo/auto_compression/nlp/configs/pp-minilm/auto/csl.yaml @@ -1,3 +1,11 @@ +Global: + model_dir: ./csl + model_filename: inference.pdmodel + params_filename: inference.pdiparams + task_name: csl + dataset: clue + batch_size: 16 + max_seq_length: 128 TrainConfig: epochs: 16 eval_iter: 1000 diff --git a/demo/auto_compression/nlp/configs/pp-minilm/auto/iflytek.yaml b/demo/auto_compression/nlp/configs/pp-minilm/auto/iflytek.yaml index 4581584e..04f99cdc 100644 --- a/demo/auto_compression/nlp/configs/pp-minilm/auto/iflytek.yaml +++ b/demo/auto_compression/nlp/configs/pp-minilm/auto/iflytek.yaml @@ -1,3 +1,11 @@ +Global: + model_dir: ./iflytek + model_filename: inference.pdmodel + params_filename: inference.pdiparams + task_name: iflytek + dataset: clue + batch_size: 16 + max_seq_length: 128 TrainConfig: epochs: 12 eval_iter: 750 diff --git a/demo/auto_compression/nlp/configs/pp-minilm/auto/ocnli.yaml b/demo/auto_compression/nlp/configs/pp-minilm/auto/ocnli.yaml index 2d0f6b5a..c4aab12c 100644 --- a/demo/auto_compression/nlp/configs/pp-minilm/auto/ocnli.yaml +++ b/demo/auto_compression/nlp/configs/pp-minilm/auto/ocnli.yaml @@ -1,3 +1,11 @@ +Global: + model_dir: ./ocnli + model_filename: inference.pdmodel + params_filename: inference.pdiparams + task_name: ocnli + dataset: clue + batch_size: 16 + max_seq_length: 128 TrainConfig: epochs: 20 eval_iter: 1050 diff --git a/demo/auto_compression/nlp/configs/pp-minilm/auto/tnews.yaml b/demo/auto_compression/nlp/configs/pp-minilm/auto/tnews.yaml index 5d85b196..e76a7711 100644 --- a/demo/auto_compression/nlp/configs/pp-minilm/auto/tnews.yaml +++ b/demo/auto_compression/nlp/configs/pp-minilm/auto/tnews.yaml @@ -1,3 +1,11 @@ +Global: + model_dir: ./tnews + model_filename: inference.pdmodel + params_filename: inference.pdiparams + task_name: tnews + dataset: clue + batch_size: 16 + max_seq_length: 128 TrainConfig: epochs: 6 eval_iter: 1110 diff --git a/demo/auto_compression/nlp/run.py b/demo/auto_compression/nlp/run.py index 2d2494f1..04ad4f29 100644 --- a/demo/auto_compression/nlp/run.py +++ b/demo/auto_compression/nlp/run.py @@ -1,43 +1,43 @@ import os import sys -sys.path[0] = os.path.join( - os.path.dirname("__file__"), os.path.pardir, os.path.pardir) import argparse import functools from functools import partial - import numpy as np import paddle import paddle.nn as nn from paddle.io import Dataset, BatchSampler, DataLoader from paddle.metric import Metric, Accuracy, Precision, Recall -from paddlenlp.transformers import PPMiniLMForSequenceClassification, PPMiniLMTokenizer -from paddlenlp.transformers import BertForSequenceClassification, BertTokenizer +from paddlenlp.transformers import AutoModelForTokenClassification, AutoTokenizer + from paddlenlp.datasets import load_dataset from paddlenlp.data import Stack, Tuple, Pad from paddlenlp.data.sampler import SamplerHelper from paddlenlp.metrics import Mcc, PearsonAndSpearman from paddleslim.auto_compression.config_helpers import load_config from paddleslim.auto_compression.compressor import AutoCompression -from utility import add_arguments - -parser = argparse.ArgumentParser(description=__doc__) -add_arg = functools.partial(add_arguments, argparser=parser) - -# yapf: disable -add_arg('model_type', str, None, "model type can be bert or ppminilm.") -add_arg('model_dir', str, None, "inference model directory.") -add_arg('model_filename', str, None, "inference model filename.") -add_arg('params_filename', str, None, "inference params filename.") -add_arg('dataset', str, None, "datset name.") -add_arg('save_dir', str, None, "directory to save compressed model.") -add_arg('max_seq_length', int, 128, "max sequence length after tokenization.") -add_arg('batch_size', int, 1, "train batch size.") -add_arg('task_name', str, 'sst-2', "task name in glue.") -add_arg('config_path', str, None, "path of compression strategy config.") -add_arg('eval', bool, False, "whether validate the model only.") - -# yapf: enable + + +def argsparser(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--config_path', + type=str, + default=None, + help="path of compression strategy config.", + required=True) + parser.add_argument( + '--save_dir', + type=str, + default='output', + help="directory to save compressed model.") + parser.add_argument( + '--eval', + type=bool, + default=False, + help="whether validate the model only.") + return parser + METRIC_CLASSES = { "cola": Mcc, @@ -61,11 +61,11 @@ def convert_example(example, label_list, max_seq_length=512, is_test=False): - assert args.dataset in [ + assert global_config['dataset'] in [ 'glue', 'clue' ], "This demo only supports for dataset glue or clue" """Convert a glue example into necessary features.""" - if args.dataset == 'glue': + if global_config['dataset'] == 'glue': if not is_test: # `label_list == None` is for regression task label_dtype = "int64" if label_list else "float32" @@ -80,7 +80,7 @@ def convert_example(example, else: return example['input_ids'], example['token_type_ids'] - else: #if args.dataset == 'clue': + else: #if global_config['dataset'] == 'clue': if not is_test: # `label_list == None` is for regression task label_dtype = "int64" if label_list else "float32" @@ -151,18 +151,19 @@ def create_data_holder(task_name): def reader(): # Create the tokenizer and dataset - if args.model_type == 'bert': - tokenizer = BertTokenizer.from_pretrained(args.model_dir) - else: # ppminilm - tokenizer = PPMiniLMTokenizer.from_pretrained(args.model_dir) + + tokenizer = AutoTokenizer.from_pretrained(global_config['model_dir']) + train_ds, dev_ds = load_dataset( - args.dataset, args.task_name, splits=('train', 'dev')) + global_config['dataset'], + global_config['task_name'], + splits=('train', 'dev')) trans_func = partial( convert_example, tokenizer=tokenizer, label_list=train_ds.label_list, - max_seq_length=args.max_seq_length, + max_seq_length=global_config['max_seq_length'], is_test=True) train_ds = train_ds.map(trans_func, lazy=True) @@ -173,9 +174,10 @@ def reader(): ): fn(samples) train_batch_sampler = paddle.io.BatchSampler( - train_ds, batch_size=args.batch_size, shuffle=True) + train_ds, batch_size=global_config['batch_size'], shuffle=True) - [input_ids, token_type_ids, labels] = create_data_holder(args.task_name) + [input_ids, token_type_ids, labels] = create_data_holder(global_config[ + 'task_name']) feed_list_name = [] train_data_loader = DataLoader( dataset=train_ds, @@ -189,7 +191,7 @@ def reader(): convert_example, tokenizer=tokenizer, label_list=train_ds.label_list, - max_seq_length=args.max_seq_length) + max_seq_length=global_config['max_seq_length']) dev_batchify_fn = lambda samples, fn=Tuple( Pad(axis=0, pad_val=tokenizer.pad_token_id), # input Pad(axis=0, pad_val=tokenizer.pad_token_type_id), # token_type @@ -197,7 +199,7 @@ def reader(): ): fn(samples) dev_ds = dev_ds.map(dev_trans_func, lazy=True) dev_batch_sampler = paddle.io.BatchSampler( - dev_ds, batch_size=args.batch_size, shuffle=False) + dev_ds, batch_size=global_config['batch_size'], shuffle=False) dev_data_loader = DataLoader( dataset=dev_ds, batch_sampler=dev_batch_sampler, @@ -233,11 +235,11 @@ def eval(): places = paddle.device._convert_to_place(devices) exe = paddle.static.Executor(places) val_program, feed_target_names, fetch_targets = paddle.static.load_inference_model( - args.model_dir, + global_config['model_dir'], exe, - model_filename=args.model_filename, - params_filename=args.params_filename) - print('Loaded model from: {}'.format(args.model_dir)) + model_filename=global_config['model_filename'], + params_filename=global_config['params_filename']) + print('Loaded model from: {}'.format(global_config['model_dir'])) metric.reset() print('Evaluating...') for data in eval_dataloader(): @@ -268,17 +270,23 @@ def apply_decay_param_fun(name): return False -if __name__ == '__main__': - args = parser.parse_args() - paddle.enable_static() +def main(): + all_config = load_config(args.config_path) - if "TrainConfig" in all_config: - all_config["TrainConfig"]["optimizer_builder"][ + global global_config + assert "Global" in all_config, "Key Global not found in config file." + global_config = all_config["Global"] + + if 'TrainConfig' in all_config: + all_config['TrainConfig']['optimizer_builder'][ 'apply_decay_param_fun'] = apply_decay_param_fun + global train_dataloader, eval_dataloader train_dataloader, eval_dataloader = reader() - metric_class = METRIC_CLASSES[args.task_name] + + global metric + metric_class = METRIC_CLASSES[global_config['task_name']] metric = metric_class() if args.eval: @@ -287,9 +295,9 @@ if __name__ == '__main__': sys.exit(0) ac = AutoCompression( - model_dir=args.model_dir, - model_filename=args.model_filename, - params_filename=args.params_filename, + model_dir=global_config['model_dir'], + model_filename=global_config['model_filename'], + params_filename=global_config['params_filename'], save_dir=args.save_dir, config=all_config, train_dataloader=train_dataloader, @@ -298,3 +306,10 @@ if __name__ == '__main__': eval_dataloader=eval_dataloader) ac.compress() + + +if __name__ == '__main__': + paddle.enable_static() + parser = argsparser() + args = parser.parse_args() + main() diff --git a/demo/auto_compression/nlp/run.sh b/demo/auto_compression/nlp/run.sh index a93386aa..e18ee054 100644 --- a/demo/auto_compression/nlp/run.sh +++ b/demo/auto_compression/nlp/run.sh @@ -1,15 +1,4 @@ export CUDA_VISIBLE_DEVICES=0 export FLAGS_cudnn_deterministic=True -python run.py \ - --model_type='ppminilm' \ - --model_dir='./afqmc' \ - --model_filename='inference.pdmodel' \ - --params_filename='inference.pdiparams' \ - --dataset='clue' \ - --save_dir='./save_afqmc_pruned/' \ - --batch_size=16 \ - --max_seq_length=128 \ - --task_name='afqmc' \ - --config_path='./configs/pp-minilm/auto/afqmc.yaml' - +python run.py --config_path='./configs/pp-minilm/auto/afqmc.yaml' --save_dir='./save_afqmc_pruned/' diff --git a/demo/auto_compression/pytorch_huggingface/README.md b/demo/auto_compression/pytorch_huggingface/README.md index a532103d..465414c9 100644 --- a/demo/auto_compression/pytorch_huggingface/README.md +++ b/demo/auto_compression/pytorch_huggingface/README.md @@ -46,6 +46,7 @@ - PaddlePaddle >= 2.3 (可从[Paddle官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)下载安装) - PaddleSlim develop版本或PaddleSlim>=2.3.0 - X2Paddle develop版本 +- transformers >= 4.18.0 - PaddleNLP >= 2.3 - tensorflow == 1.14 (如需压缩TensorFlow模型) - onnx >= 1.6.0 (如需压缩ONNX模型) @@ -73,12 +74,18 @@ git checkout develop python setup.py install ``` +安装transformers: +```shell +pip install transformers +``` +注:安装transformers的目的是为了使用transformers中的Tokenizer。 + 安装paddlenlp: ```shell pip install paddlenlp ``` -注:安装PaddleNLP的目的是为了下载PaddleNLP中的数据集和Tokenizer。 +注:安装PaddleNLP的目的是为了下载PaddleNLP中的数据集。 #### 3.2 准备数据集 @@ -165,11 +172,11 @@ def main(x0, x1, x2): sepc_list = list() sepc_list.append( paddle.static.InputSpec( - shape=[-1, 128], name="x0", dtype="int64"), + shape=[-1, 128], name="x2paddle_input_ids", dtype="int64"), paddle.static.InputSpec( - shape=[-1, 128], name="x1", dtype="int64"), + shape=[-1, 128], name="x2paddle_attention_mask", dtype="int64"), paddle.static.InputSpec( - shape=[-1, 128], name="x2", dtype="int64")) + shape=[-1, 128], name="x2paddle_token_type_ids", dtype="int64")) static_model = paddle.jit.to_static(model, input_spec=sepc_list) paddle.jit.save(static_model, "./x2paddle_cola") ``` @@ -184,10 +191,10 @@ export CUDA_VISIBLE_DEVICES=0 python run.py --config_path=./configs/cola.yaml --save_dir='./output/cola/' ``` -如仅需验证模型精度,在启动```run.py```脚本时,命令加上```--eval True```即可: +如仅需验证模型精度,或验证压缩之后模型精度,在启动```run.py```脚本时,将配置文件中模型文件夹 ```model_dir``` 改为压缩之后保存的文件夹路径 ```./output/cola/``` ,命令加上```--eval True```即可: ```shell export CUDA_VISIBLE_DEVICES=0 -python run.py --config_path=./configs/cola.yaml --save_dir='./output/cola/' --eval True +python run.py --config_path=./configs/cola.yaml --eval True ``` ## 4. 预测部署 @@ -211,4 +218,6 @@ python -u ./infer.py \ - ```fp16```:是否启用```FP16``` +若使用 TesorRT 预测引擎,需安装 ```WITH_TRT=ON``` 的Paddle,下载地址:[Python预测库](https://paddleinference.paddlepaddle.org.cn/master/user_guides/download_lib.html#python) + ## 5. FAQ diff --git a/demo/auto_compression/pytorch_huggingface/run.py b/demo/auto_compression/pytorch_huggingface/run.py index d86f26c5..9b3467e8 100644 --- a/demo/auto_compression/pytorch_huggingface/run.py +++ b/demo/auto_compression/pytorch_huggingface/run.py @@ -22,7 +22,7 @@ import functools from functools import partial from paddle.io import Dataset, BatchSampler, DataLoader from paddle.metric import Metric, Accuracy -from paddlenlp.transformers import BertForSequenceClassification, BertTokenizer +from transformers import AutoTokenizer from paddlenlp.datasets import load_dataset from paddlenlp.data import Stack, Tuple, Pad from paddlenlp.metrics import AccuracyAndF1, Mcc, PearsonAndSpearman @@ -136,7 +136,8 @@ def create_data_holder(task_name, input_names): def reader(): # Create the tokenizer and dataset - tokenizer = BertTokenizer.from_pretrained(global_config['model_dir']) + tokenizer = AutoTokenizer.from_pretrained( + global_config['model_dir'], use_fast=False) train_ds = load_dataset( global_config['dataset'], global_config['task_name'], splits="train") @@ -344,7 +345,7 @@ def main(): model_filename=global_config['model_filename'], params_filename=global_config['params_filename'], save_dir=args.save_dir, - config=args.config_path, + config=all_config, train_dataloader=train_dataloader, eval_callback=eval_function if (len(list(all_config.keys())) == 2 and 'TrainConfig' in all_config) or -- GitLab