From 541954da526d575b448babf0859c0c7e0d831c12 Mon Sep 17 00:00:00 2001
From: Wenmuzhou <572459439@qq.com>
Date: Wed, 14 Sep 2022 15:30:30 +0800
Subject: [PATCH] add layoutxml kl he pact

---
 deploy/slim/quantization/quant.py             |  6 +--
 deploy/slim/quantization/quant_kl.py          | 24 +++++++--
 .../layoutxlm_ser/train_pact_infer_python.txt | 53 +++++++++++++++++++
 .../layoutxlm_ser/train_ptq_infer_python.txt  | 21 ++++++++
 test_tipc/prepare.sh                          | 31 +++++++++--
 5 files changed, 125 insertions(+), 10 deletions(-)
 create mode 100644 test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt
 create mode 100644 test_tipc/configs/layoutxlm_ser/train_ptq_infer_python.txt

diff --git a/deploy/slim/quantization/quant.py b/deploy/slim/quantization/quant.py
index 64521b5e..ef2c3e28 100755
--- a/deploy/slim/quantization/quant.py
+++ b/deploy/slim/quantization/quant.py
@@ -158,8 +158,7 @@ def main(config, device, logger, vdl_writer):
 
     pre_best_model_dict = dict()
     # load fp32 model to begin quantization
-    if config["Global"]["pretrained_model"] is not None:
-        pre_best_model_dict = load_model(config, model)
+    pre_best_model_dict = load_model(config, model, None, config['Architecture']["model_type"])
 
     freeze_params = False
     if config['Architecture']["algorithm"] in ["Distillation"]:
@@ -184,8 +183,7 @@ def main(config, device, logger, vdl_writer):
         model=model)
 
     # resume PACT training process
-    if config["Global"]["checkpoints"] is not None:
-        pre_best_model_dict = load_model(config, model, optimizer)
+    pre_best_model_dict = load_model(config, model, optimizer, config['Architecture']["model_type"])
 
     # build metric
     eval_class = build_metric(config['Metric'])
diff --git a/deploy/slim/quantization/quant_kl.py b/deploy/slim/quantization/quant_kl.py
index cc3a455b..73e1a957 100755
--- a/deploy/slim/quantization/quant_kl.py
+++ b/deploy/slim/quantization/quant_kl.py
@@ -97,6 +97,17 @@ def sample_generator(loader):
 
     return __reader__
 
+def sample_generator_layoutxlm_ser(loader):
+    def __reader__():
+        for indx, data in enumerate(loader):
+            input_ids = np.array(data[0])
+            bbox = np.array(data[1])
+            attention_mask = np.array(data[2])
+            token_type_ids = np.array(data[3])
+            images = np.array(data[4])
+            yield [input_ids, bbox, attention_mask, token_type_ids, images]
+
+    return __reader__
 
 def main(config, device, logger, vdl_writer):
     # init dist environment
@@ -107,16 +118,18 @@ def main(config, device, logger, vdl_writer):
 
     # build dataloader
     config['Train']['loader']['num_workers'] = 0
+    is_layoutxlm_ser =  config['Architecture']['model_type'] =='kie' and config['Architecture']['Backbone']['name'] == 'LayoutXLMForSer'
     train_dataloader = build_dataloader(config, 'Train', device, logger)
     if config['Eval']:
         config['Eval']['loader']['num_workers'] = 0
         valid_dataloader = build_dataloader(config, 'Eval', device, logger)
+        if is_layoutxlm_ser:
+            train_dataloader = valid_dataloader
     else:
         valid_dataloader = None
 
     paddle.enable_static()
-    place = paddle.CPUPlace()
-    exe = paddle.static.Executor(place)
+    exe = paddle.static.Executor(device)
 
     if 'inference_model' in global_config.keys():  # , 'inference_model'):
         inference_model_dir = global_config['inference_model']
@@ -127,6 +140,11 @@ def main(config, device, logger, vdl_writer):
             raise ValueError(
                 "Please set inference model dir in Global.inference_model or Global.pretrained_model for post-quantazition"
             )
+    
+    if is_layoutxlm_ser:
+        generator = sample_generator_layoutxlm_ser(train_dataloader)
+    else:
+        generator = sample_generator(train_dataloader)
 
     paddleslim.quant.quant_post_static(
         executor=exe,
@@ -134,7 +152,7 @@ def main(config, device, logger, vdl_writer):
         model_filename='inference.pdmodel',
         params_filename='inference.pdiparams',
         quantize_model_path=global_config['save_inference_dir'],
-        sample_generator=sample_generator(train_dataloader),
+        sample_generator=generator,
         save_model_filename='inference.pdmodel',
         save_params_filename='inference.pdiparams',
         batch_size=1,
diff --git a/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt b/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt
new file mode 100644
index 00000000..fbf2a880
--- /dev/null
+++ b/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt
@@ -0,0 +1,53 @@
+===========================train_params===========================
+model_name:layoutxlm_ser_PACT
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:fp32
+Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=17
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=4|whole_train_whole_infer=8
+Architecture.Backbone.checkpoints:pretrain_models/ser_LayoutXLM_xfun_zh
+train_model_name:latest
+train_infer_img_dir:ppstructure/docs/kie/input/zh_val_42.jpg
+null:null
+##
+trainer:pact_train
+norm_train:null
+pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:null
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Architecture.Backbone.checkpoints:
+norm_export:null
+quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o 
+fpgm_export: null
+distill_export:null
+export1:null
+export2:null
+##
+infer_model:null
+infer_export:null
+infer_quant:False
+inference:ppstructure/kie/predict_kie_token_ser.py  --kie_algorithm=LayoutXLM  --ser_dict_path=train_data/XFUND/class_list_xfun.txt --output=output
+--use_gpu:True|False
+--enable_mkldnn:False
+--cpu_threads:6
+--rec_batch_num:1
+--use_tensorrt:False
+--precision:fp32
+--ser_model_dir:
+--image_dir:./ppstructure/docs/kie/input/zh_val_42.jpg
+null:null
+--benchmark:False
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,224,224]}]
diff --git a/test_tipc/configs/layoutxlm_ser/train_ptq_infer_python.txt b/test_tipc/configs/layoutxlm_ser/train_ptq_infer_python.txt
new file mode 100644
index 00000000..47e1e702
--- /dev/null
+++ b/test_tipc/configs/layoutxlm_ser/train_ptq_infer_python.txt
@@ -0,0 +1,21 @@
+===========================train_params===========================
+model_name:layoutxlm_ser_KL
+python:python3.7
+Global.pretrained_model:
+Global.save_inference_dir:null
+infer_model:./inference/ser_LayoutXLM_xfun_zh_infer/
+infer_export:deploy/slim/quantization/quant_kl.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o Train.loader.batch_size_per_card=1 Eval.loader.batch_size_per_card=1
+infer_quant:True
+inference:ppstructure/kie/predict_kie_token_ser.py --kie_algorithm=LayoutXLM --ser_dict_path=./train_data/XFUND/class_list_xfun.txt
+--use_gpu:True|False
+--enable_mkldnn:False
+--cpu_threads:6
+--rec_batch_num:1
+--use_tensorrt:False
+--precision:int8
+--ser_model_dir:
+--image_dir:./ppstructure/docs/kie/input/zh_val_42.jpg
+null:null
+--benchmark:False
+null:null
+null:null
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index 5d50a5ad..fcdd2f05 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -145,7 +145,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
     array=(${python_name_list}) 
     python_name=${array[0]}
     ${python_name} -m pip install -r requirements.txt
-    ${python_name} -m pip install git+https://github.com/LDOUBLEV/AutoLog
+    ${python_name} -m pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
     # pretrain lite train data
     wget -nc -P  ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams  --no-check-certificate
     wget -nc -P ./pretrain_models/  https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar  --no-check-certificate
@@ -257,7 +257,17 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
         wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/rec_r32_gaspin_bilstm_att_train.tar --no-check-certificate
         cd ./pretrain_models/ && tar xf rec_r32_gaspin_bilstm_att_train.tar && cd ../
     fi
-    if [ ${model_name} == "layoutxlm_ser" ] || [ ${model_name} == "vi_layoutxlm_ser" ]; then
+    if [ ${model_name} == "layoutxlm_ser" ]; then
+        ${python_name} -m pip install -r ppstructure/kie/requirements.txt
+        ${python_name} -m pip install opencv-python -U
+        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
+        cd ./train_data/ && tar xf XFUND.tar
+        cd ../
+
+        wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutXLM_xfun_zh.tar --no-check-certificate
+        cd ./pretrain_models/ && tar xf ser_LayoutXLM_xfun_zh.tar  && cd ../
+    fi
+    if [ ${model_name} == "vi_layoutxlm_ser" ]; then
         ${python_name} -m pip install -r ppstructure/kie/requirements.txt
         ${python_name} -m pip install opencv-python -U
         wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
@@ -332,9 +342,18 @@ elif [ ${MODE} = "lite_train_whole_infer" ];then
         cd ./inference/ && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar && cd ../
     fi
 elif [ ${MODE} = "whole_infer" ];then
+    python_name_list=$(func_parser_value "${lines[2]}")
+    array=(${python_name_list}) 
+    python_name=${array[0]}
+    ${python_name} -m pip install paddleslim
+    ${python_name} -m pip install -r requirements.txt 
     wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar --no-check-certificate
     wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar --no-check-certificate
     cd ./inference && tar xf rec_inference.tar  && tar xf ch_det_data_50.tar && cd ../
+    wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
+    cd ./train_data/ && tar xf XFUND.tar && cd ../
+    head -n 2 train_data/XFUND/zh_val/val.json > train_data/XFUND/zh_val/val_lite.json
+    mv train_data/XFUND/zh_val/val_lite.json train_data/XFUND/zh_val/val.json
     if [ ${model_name} = "ch_ppocr_mobile_v2_0_det" ]; then
         eval_model_name="ch_ppocr_mobile_v2.0_det_train"
         rm -rf ./train_data/icdar2015
@@ -500,6 +519,12 @@ elif [ ${MODE} = "whole_infer" ];then
         wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar --no-check-certificate
         cd ./inference/ && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar && cd ../
     fi
+    if [[ ${model_name} =~ "layoutxlm_ser" ]]; then
+        ${python_name} -m pip install -r ppstructure/kie/requirements.txt
+        ${python_name} -m pip install opencv-python -U 
+        wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutXLM_xfun_zh_infer.tar --no-check-certificate
+        cd ./inference/ && tar xf ser_LayoutXLM_xfun_zh_infer.tar & cd ../
+    fi
 fi
 
 if [[ ${model_name} =~ "KL" ]]; then
@@ -667,7 +692,7 @@ if [ ${MODE} = "serving_infer" ];then
     ${python_name} -m pip install paddle-serving-server-gpu
     ${python_name} -m pip install paddle_serving_client
     ${python_name} -m pip install paddle-serving-app
-    ${python_name} -m pip install git+https://github.com/LDOUBLEV/AutoLog
+    ${python_name} -m pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
     # wget model
     if [ ${model_name} == "ch_ppocr_mobile_v2_0_det_KL" ] || [ ${model_name} == "ch_ppocr_mobile_v2.0_rec_KL" ] ; then
         wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/tipc_fake_model/ch_ppocr_mobile_v2.0_det_klquant_infer.tar --no-check-certificate
-- 
GitLab