From 52a13894c732dd4171fb8d255c43744a050c2d2e Mon Sep 17 00:00:00 2001
From: Chang Xu <molixu7@gmail.com>
Date: Mon, 4 Jul 2022 15:11:13 +0800
Subject: [PATCH] Update HuggingFace Demo / BS=40Speed (#1234)

* Update HuggingFace Demo / BS=40Speed

* Update HuggingFace Demo / BS=40Speed

* Update HuggingFace Demo / BS=40Speed

* Update HuggingFace Demo / BS=40Speed

* Update HuggingFace Demo / BS=40Speed
---
 example/auto_compression/nlp/README.md        |  4 +--
 .../nlp/configs/ernie3.0/afqmc.yaml           |  4 +--
 .../nlp/configs/ernie3.0/cluewsc.yaml         |  4 +--
 .../nlp/configs/ernie3.0/cmnli.yaml           |  4 +--
 .../nlp/configs/ernie3.0/csl.yaml             |  4 +--
 .../nlp/configs/ernie3.0/iflytek.yaml         |  4 +--
 .../nlp/configs/ernie3.0/ocnli.yaml           |  4 +--
 .../nlp/configs/ernie3.0/tnews.yaml           |  4 +--
 example/auto_compression/nlp/run.py           |  5 ++++
 .../pytorch_huggingface/README.md             | 24 ++++++----------
 .../pytorch_huggingface/run.py                | 28 +++++++++++++++----
 .../pytorch_huggingface/run.sh                |  2 --
 12 files changed, 52 insertions(+), 39 deletions(-)
 delete mode 100644 example/auto_compression/pytorch_huggingface/run.sh

diff --git a/example/auto_compression/nlp/README.md b/example/auto_compression/nlp/README.md
index 5fabc771..fd83f8de 100644
--- a/example/auto_compression/nlp/README.md
+++ b/example/auto_compression/nlp/README.md
@@ -110,10 +110,10 @@ export CUDA_VISIBLE_DEVICES=0
 python run.py --config_path='./configs/pp-minilm/auto/afqmc.yaml' --save_dir='./save_afqmc_pruned/'
 ```
 
-如仅需验证模型精度，或验证压缩之后模型精度，在启动```run.py```脚本时，将配置文件中模型文件夹 ```model_dir``` 改为压缩之后保存的文件夹路径 ```./output/cola/``` ，命令加上```--eval True```即可：
+如仅需验证模型精度，或验证压缩之后模型精度，在启动```run.py```脚本时，将配置文件中模型文件夹 ```model_dir``` 改为压缩之后保存的文件夹路径 ```./save_afqmc_pruned``` ，命令加上```--eval True```即可：
 ```shell
 export CUDA_VISIBLE_DEVICES=0
-python run.py --config_path=./configs/cola.yaml  --eval True
+python run.py --config_path='./configs/pp-minilm/auto/afqmc.yaml'  --eval True
 ```
 
 ## 4. 压缩配置介绍
diff --git a/example/auto_compression/nlp/configs/ernie3.0/afqmc.yaml b/example/auto_compression/nlp/configs/ernie3.0/afqmc.yaml
index 261e8635..b5ba0f14 100644
--- a/example/auto_compression/nlp/configs/ernie3.0/afqmc.yaml
+++ b/example/auto_compression/nlp/configs/ernie3.0/afqmc.yaml
@@ -1,7 +1,7 @@
 Global:
   model_dir: ./AFQMC
-  model_filename: inference.pdmodel
-  params_filename: inference.pdiparams
+  model_filename: infer.pdmodel
+  params_filename: infer.pdiparams
   task_name: afqmc
   dataset: clue
   batch_size: 16
diff --git a/example/auto_compression/nlp/configs/ernie3.0/cluewsc.yaml b/example/auto_compression/nlp/configs/ernie3.0/cluewsc.yaml
index a7f48f92..2d4c0e4d 100644
--- a/example/auto_compression/nlp/configs/ernie3.0/cluewsc.yaml
+++ b/example/auto_compression/nlp/configs/ernie3.0/cluewsc.yaml
@@ -1,7 +1,7 @@
 Global:
   model_dir: ./CLUEWSC
-  model_filename: inference.pdmodel
-  params_filename: inference.pdiparams
+  model_filename: infer.pdmodel
+  params_filename: infer.pdiparams
   task_name: cluewsc
   dataset: clue
   batch_size: 16
diff --git a/example/auto_compression/nlp/configs/ernie3.0/cmnli.yaml b/example/auto_compression/nlp/configs/ernie3.0/cmnli.yaml
index 4ccfd53c..67b9ff0b 100644
--- a/example/auto_compression/nlp/configs/ernie3.0/cmnli.yaml
+++ b/example/auto_compression/nlp/configs/ernie3.0/cmnli.yaml
@@ -1,7 +1,7 @@
 Global:
   model_dir: ./CMNLI
-  model_filename: inference.pdmodel
-  params_filename: inference.pdiparams
+  model_filename: infer.pdmodel
+  params_filename: infer.pdiparams
   task_name: cmnli
   dataset: clue
   batch_size: 16
diff --git a/example/auto_compression/nlp/configs/ernie3.0/csl.yaml b/example/auto_compression/nlp/configs/ernie3.0/csl.yaml
index 8b5172f0..70ccbea8 100644
--- a/example/auto_compression/nlp/configs/ernie3.0/csl.yaml
+++ b/example/auto_compression/nlp/configs/ernie3.0/csl.yaml
@@ -1,7 +1,7 @@
 Global:
   model_dir: ./CSL
-  model_filename: inference.pdmodel
-  params_filename: inference.pdiparams
+  model_filename: infer.pdmodel
+  params_filename: infer.pdiparams
   task_name: csl
   dataset: clue
   batch_size: 16
diff --git a/example/auto_compression/nlp/configs/ernie3.0/iflytek.yaml b/example/auto_compression/nlp/configs/ernie3.0/iflytek.yaml
index 0e766ada..7a74e16d 100644
--- a/example/auto_compression/nlp/configs/ernie3.0/iflytek.yaml
+++ b/example/auto_compression/nlp/configs/ernie3.0/iflytek.yaml
@@ -1,7 +1,7 @@
 Global:
   model_dir: ./IFLYTEK
-  model_filename: inference.pdmodel
-  params_filename: inference.pdiparams
+  model_filename: infer.pdmodel
+  params_filename: infer.pdiparams
   task_name: iflytek
   dataset: clue
   batch_size: 16
diff --git a/example/auto_compression/nlp/configs/ernie3.0/ocnli.yaml b/example/auto_compression/nlp/configs/ernie3.0/ocnli.yaml
index f00a770c..929a2ff1 100644
--- a/example/auto_compression/nlp/configs/ernie3.0/ocnli.yaml
+++ b/example/auto_compression/nlp/configs/ernie3.0/ocnli.yaml
@@ -1,7 +1,7 @@
 Global:
   model_dir: ./OCNLI
-  model_filename: inference.pdmodel
-  params_filename: inference.pdiparams
+  model_filename: infer.pdmodel
+  params_filename: infer.pdiparams
   task_name: ocnli
   dataset: clue
   batch_size: 16
diff --git a/example/auto_compression/nlp/configs/ernie3.0/tnews.yaml b/example/auto_compression/nlp/configs/ernie3.0/tnews.yaml
index 9682f2bb..49093ab8 100644
--- a/example/auto_compression/nlp/configs/ernie3.0/tnews.yaml
+++ b/example/auto_compression/nlp/configs/ernie3.0/tnews.yaml
@@ -1,7 +1,7 @@
 Global:
   model_dir: ./TNEWS
-  model_filename: inference.pdmodel
-  params_filename: inference.pdiparams
+  model_filename: infer.pdmodel
+  params_filename: infer.pdiparams
   task_name: tnews
   dataset: clue
   batch_size: 16
diff --git a/example/auto_compression/nlp/run.py b/example/auto_compression/nlp/run.py
index 04ad4f29..e09a9224 100644
--- a/example/auto_compression/nlp/run.py
+++ b/example/auto_compression/nlp/run.py
@@ -306,6 +306,11 @@ def main():
         eval_dataloader=eval_dataloader)
 
     ac.compress()
+    for file_name in os.listdir(global_config['model_dir']):
+        if 'json' in file_name or 'txt' in file_name:
+            shutil.copy(
+                os.path.join(global_config['model_dir'], file_name),
+                args.save_dir)
 
 
 if __name__ == '__main__':
diff --git a/example/auto_compression/pytorch_huggingface/README.md b/example/auto_compression/pytorch_huggingface/README.md
index 465414c9..d2cdd87b 100644
--- a/example/auto_compression/pytorch_huggingface/README.md
+++ b/example/auto_compression/pytorch_huggingface/README.md
@@ -32,13 +32,13 @@
 模型在多个任务上平均精度以及加速对比如下：
 |  bert-base-cased | Accuracy（avg） | 时延(ms) | 加速比 |
 |:-------:|:----------:|:------------:| :------:|
-| 压缩前 |  81.35 | 8.18 | - |
-| 压缩后 |  81.53 | 6.35 | 1.29 |
+| 压缩前 |  81.35 | 11.60 | - |
+| 压缩后 |  81.53 | 4.83 | 2.40 |
 
 - Nvidia GPU 测试环境：
   - 硬件：NVIDIA Tesla T4 单卡
   - 软件：CUDA 11.2, cuDNN 8.0, TensorRT 8.4
-  - 测试配置：batch_size: 1, seqence length: 128
+  - 测试配置：batch_size: 40, seqence length: 128
 
 ## 3. 自动压缩流程
 #### 3.1 准备环境
@@ -74,12 +74,6 @@ git checkout develop
 python setup.py install
 ```
 
-安装transformers：
-```shell
-pip install transformers
-```
-注：安装transformers的目的是为了使用transformers中的Tokenizer。
-
 安装paddlenlp：
 ```shell
 pip install paddlenlp
@@ -101,10 +95,10 @@ import torch
 import numpy as np
 # 将PyTorch模型设置为eval模式
 torch_model.eval()
-# 构建输入
-input_ids = torch.unsqueeze(torch.tensor([0] * max_length), 0)
-token_type_ids = torch.unsqueeze(torch.tensor([0] * max_length), 0)
-attention_msk = torch.unsqueeze(torch.tensor([0] * max_length), 0)
+# 构建输入，
+input_ids = torch.zeros([batch_size, max_length]).long()
+token_type_ids = torch.zeros([batch_size, max_length]).long()
+attention_msk = torch.zeros([batch_size, max_length]).long()
 # 进行转换
 from x2paddle.convert import pytorch2paddle
 pytorch2paddle(torch_model,
@@ -120,7 +114,7 @@ PyTorch2Paddle支持trace和script两种方式的转换，均是PyTorch动态图
 注意：
 - 由于自动压缩的是静态图模型，所以这里需要将```jit_type```设置为```trace```，并且注意PyTorch模型中需要设置```pad_to_max_length```，且设置的```max_length```需要和转换时构建的数据相同。
 - HuggingFace默认输入```attention_mask```，PaddleNLP默认不输入，这里需要保持一致。可以PaddleNLP中设置```return_attention_mask=True```。
-- 使用PaddleNLP的tokenizer时需要在模型保存的文件夹中加入```model_config.json, special_tokens_map.json, tokenizer_config.json, vocab.txt```这些文件。
+- 使用PaddleNLP的tokenizer时需要在模型保存的文件夹中加入tokenizer的配置文件，可使用PaddleNLP中训练后自动保存的 ```model_config.json，special_tokens_map.json, tokenizer_config.json, vocab.txt```，也可使用Huggingface训练后自动保存的 ```config.json，special_tokens_map.json, tokenizer_config.json, vocab.txt```。
 
 
 更多Pytorch2Paddle示例可参考[PyTorch模型转换文档](https://github.com/PaddlePaddle/X2Paddle/blob/develop/docs/inference_model_convertor/pytorch2paddle.md)。其他框架转换可参考[X2Paddle模型转换工具](https://github.com/PaddlePaddle/X2Paddle)
@@ -191,7 +185,7 @@ export CUDA_VISIBLE_DEVICES=0
 python run.py --config_path=./configs/cola.yaml --save_dir='./output/cola/'
 ```
 
-如仅需验证模型精度，或验证压缩之后模型精度，在启动```run.py```脚本时，将配置文件中模型文件夹 ```model_dir``` 改为压缩之后保存的文件夹路径 ```./output/cola/``` ，命令加上```--eval True```即可：
+如仅需验证模型精度，或验证压缩之后模型精度，在启动```run.py```脚本时，将配置文件中模型文件夹 ```model_dir``` 改为压缩之后保存的文件夹路径 ```./output/cola``` ，命令加上```--eval True```即可：
 ```shell
 export CUDA_VISIBLE_DEVICES=0
 python run.py --config_path=./configs/cola.yaml  --eval True
diff --git a/example/auto_compression/pytorch_huggingface/run.py b/example/auto_compression/pytorch_huggingface/run.py
index 9b3467e8..b723be4d 100644
--- a/example/auto_compression/pytorch_huggingface/run.py
+++ b/example/auto_compression/pytorch_huggingface/run.py
@@ -20,9 +20,10 @@ import paddle
 import paddle.nn as nn
 import functools
 from functools import partial
+import shutil
 from paddle.io import Dataset, BatchSampler, DataLoader
 from paddle.metric import Metric, Accuracy
-from transformers import AutoTokenizer
+from paddlenlp.transformers import AutoModelForTokenClassification, AutoTokenizer
 from paddlenlp.datasets import load_dataset
 from paddlenlp.data import Stack, Tuple, Pad
 from paddlenlp.metrics import AccuracyAndF1, Mcc, PearsonAndSpearman
@@ -164,7 +165,10 @@ def reader():
         ): fn(samples)
 
     train_batch_sampler = paddle.io.BatchSampler(
-        train_ds, batch_size=global_config['batch_size'], shuffle=True)
+        train_ds,
+        batch_size=global_config['batch_size'],
+        shuffle=True,
+        drop_last=True)
 
     feed_list = create_data_holder(global_config['task_name'],
                                    global_config['input_names'])
@@ -208,7 +212,8 @@ def reader():
         dev_batch_sampler_matched = paddle.io.BatchSampler(
             dev_ds_matched,
             batch_size=global_config['batch_size'],
-            shuffle=False)
+            shuffle=False,
+            drop_last=True)
         dev_data_loader_matched = DataLoader(
             dataset=dev_ds_matched,
             batch_sampler=dev_batch_sampler_matched,
@@ -219,21 +224,26 @@ def reader():
         dev_batch_sampler_mismatched = paddle.io.BatchSampler(
             dev_ds_mismatched,
             batch_size=global_config['batch_size'],
-            shuffle=False)
+            shuffle=False,
+            drop_last=True)
         dev_data_loader_mismatched = DataLoader(
             dataset=dev_ds_mismatched,
             batch_sampler=dev_batch_sampler_mismatched,
             collate_fn=batchify_fn,
             num_workers=0,
             feed_list=feed_list,
-            return_list=False)
+            return_list=False,
+            drop_last=True)
         return train_data_loader, dev_data_loader_matched, dev_data_loader_mismatched
     else:
         dev_ds = load_dataset(
             global_config['dataset'], global_config['task_name'], splits='dev')
         dev_ds = dev_ds.map(dev_trans_func, lazy=True)
         dev_batch_sampler = paddle.io.BatchSampler(
-            dev_ds, batch_size=global_config['batch_size'], shuffle=False)
+            dev_ds,
+            batch_size=global_config['batch_size'],
+            shuffle=False,
+            drop_last=True)
         dev_data_loader = DataLoader(
             dataset=dev_ds,
             batch_sampler=dev_batch_sampler,
@@ -355,6 +365,12 @@ def main():
 
     ac.compress()
 
+    for file_name in os.listdir(global_config['model_dir']):
+        if 'json' in file_name or 'txt' in file_name:
+            shutil.copy(
+                os.path.join(global_config['model_dir'], file_name),
+                args.save_dir)
+
 
 if __name__ == '__main__':
     paddle.enable_static()
diff --git a/example/auto_compression/pytorch_huggingface/run.sh b/example/auto_compression/pytorch_huggingface/run.sh
deleted file mode 100644
index eb444ba0..00000000
--- a/example/auto_compression/pytorch_huggingface/run.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-export CUDA_VISIBLE_DEVICES=0
-python run.py --config_path=./configs/cola.yaml --save_dir='./output/cola/'
-- 
GitLab