From 541954da526d575b448babf0859c0c7e0d831c12 Mon Sep 17 00:00:00 2001 From: Wenmuzhou <572459439@qq.com> Date: Wed, 14 Sep 2022 15:30:30 +0800 Subject: [PATCH] add layoutxml kl he pact --- deploy/slim/quantization/quant.py | 6 +-- deploy/slim/quantization/quant_kl.py | 24 +++++++-- .../layoutxlm_ser/train_pact_infer_python.txt | 53 +++++++++++++++++++ .../layoutxlm_ser/train_ptq_infer_python.txt | 21 ++++++++ test_tipc/prepare.sh | 31 +++++++++-- 5 files changed, 125 insertions(+), 10 deletions(-) create mode 100644 test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt create mode 100644 test_tipc/configs/layoutxlm_ser/train_ptq_infer_python.txt diff --git a/deploy/slim/quantization/quant.py b/deploy/slim/quantization/quant.py index 64521b5e..ef2c3e28 100755 --- a/deploy/slim/quantization/quant.py +++ b/deploy/slim/quantization/quant.py @@ -158,8 +158,7 @@ def main(config, device, logger, vdl_writer): pre_best_model_dict = dict() # load fp32 model to begin quantization - if config["Global"]["pretrained_model"] is not None: - pre_best_model_dict = load_model(config, model) + pre_best_model_dict = load_model(config, model, None, config['Architecture']["model_type"]) freeze_params = False if config['Architecture']["algorithm"] in ["Distillation"]: @@ -184,8 +183,7 @@ def main(config, device, logger, vdl_writer): model=model) # resume PACT training process - if config["Global"]["checkpoints"] is not None: - pre_best_model_dict = load_model(config, model, optimizer) + pre_best_model_dict = load_model(config, model, optimizer, config['Architecture']["model_type"]) # build metric eval_class = build_metric(config['Metric']) diff --git a/deploy/slim/quantization/quant_kl.py b/deploy/slim/quantization/quant_kl.py index cc3a455b..73e1a957 100755 --- a/deploy/slim/quantization/quant_kl.py +++ b/deploy/slim/quantization/quant_kl.py @@ -97,6 +97,17 @@ def sample_generator(loader): return __reader__ +def sample_generator_layoutxlm_ser(loader): + def __reader__(): + for indx, data in enumerate(loader): + input_ids = np.array(data[0]) + bbox = np.array(data[1]) + attention_mask = np.array(data[2]) + token_type_ids = np.array(data[3]) + images = np.array(data[4]) + yield [input_ids, bbox, attention_mask, token_type_ids, images] + + return __reader__ def main(config, device, logger, vdl_writer): # init dist environment @@ -107,16 +118,18 @@ def main(config, device, logger, vdl_writer): # build dataloader config['Train']['loader']['num_workers'] = 0 + is_layoutxlm_ser = config['Architecture']['model_type'] =='kie' and config['Architecture']['Backbone']['name'] == 'LayoutXLMForSer' train_dataloader = build_dataloader(config, 'Train', device, logger) if config['Eval']: config['Eval']['loader']['num_workers'] = 0 valid_dataloader = build_dataloader(config, 'Eval', device, logger) + if is_layoutxlm_ser: + train_dataloader = valid_dataloader else: valid_dataloader = None paddle.enable_static() - place = paddle.CPUPlace() - exe = paddle.static.Executor(place) + exe = paddle.static.Executor(device) if 'inference_model' in global_config.keys(): # , 'inference_model'): inference_model_dir = global_config['inference_model'] @@ -127,6 +140,11 @@ def main(config, device, logger, vdl_writer): raise ValueError( "Please set inference model dir in Global.inference_model or Global.pretrained_model for post-quantazition" ) + + if is_layoutxlm_ser: + generator = sample_generator_layoutxlm_ser(train_dataloader) + else: + generator = sample_generator(train_dataloader) paddleslim.quant.quant_post_static( executor=exe, @@ -134,7 +152,7 @@ def main(config, device, logger, vdl_writer): model_filename='inference.pdmodel', params_filename='inference.pdiparams', quantize_model_path=global_config['save_inference_dir'], - sample_generator=sample_generator(train_dataloader), + sample_generator=generator, save_model_filename='inference.pdmodel', save_params_filename='inference.pdiparams', batch_size=1, diff --git a/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt b/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt new file mode 100644 index 00000000..fbf2a880 --- /dev/null +++ b/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt @@ -0,0 +1,53 @@ +===========================train_params=========================== +model_name:layoutxlm_ser_PACT +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:fp32 +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=17 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=4|whole_train_whole_infer=8 +Architecture.Backbone.checkpoints:pretrain_models/ser_LayoutXLM_xfun_zh +train_model_name:latest +train_infer_img_dir:ppstructure/docs/kie/input/zh_val_42.jpg +null:null +## +trainer:pact_train +norm_train:null +pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Architecture.Backbone.checkpoints: +norm_export:null +quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o +fpgm_export: null +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:null +infer_quant:False +inference:ppstructure/kie/predict_kie_token_ser.py --kie_algorithm=LayoutXLM --ser_dict_path=train_data/XFUND/class_list_xfun.txt --output=output +--use_gpu:True|False +--enable_mkldnn:False +--cpu_threads:6 +--rec_batch_num:1 +--use_tensorrt:False +--precision:fp32 +--ser_model_dir: +--image_dir:./ppstructure/docs/kie/input/zh_val_42.jpg +null:null +--benchmark:False +null:null +===========================infer_benchmark_params========================== +random_infer_input:[{float32,[3,224,224]}] diff --git a/test_tipc/configs/layoutxlm_ser/train_ptq_infer_python.txt b/test_tipc/configs/layoutxlm_ser/train_ptq_infer_python.txt new file mode 100644 index 00000000..47e1e702 --- /dev/null +++ b/test_tipc/configs/layoutxlm_ser/train_ptq_infer_python.txt @@ -0,0 +1,21 @@ +===========================train_params=========================== +model_name:layoutxlm_ser_KL +python:python3.7 +Global.pretrained_model: +Global.save_inference_dir:null +infer_model:./inference/ser_LayoutXLM_xfun_zh_infer/ +infer_export:deploy/slim/quantization/quant_kl.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o Train.loader.batch_size_per_card=1 Eval.loader.batch_size_per_card=1 +infer_quant:True +inference:ppstructure/kie/predict_kie_token_ser.py --kie_algorithm=LayoutXLM --ser_dict_path=./train_data/XFUND/class_list_xfun.txt +--use_gpu:True|False +--enable_mkldnn:False +--cpu_threads:6 +--rec_batch_num:1 +--use_tensorrt:False +--precision:int8 +--ser_model_dir: +--image_dir:./ppstructure/docs/kie/input/zh_val_42.jpg +null:null +--benchmark:False +null:null +null:null diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh index 5d50a5ad..fcdd2f05 100644 --- a/test_tipc/prepare.sh +++ b/test_tipc/prepare.sh @@ -145,7 +145,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then array=(${python_name_list}) python_name=${array[0]} ${python_name} -m pip install -r requirements.txt - ${python_name} -m pip install git+https://github.com/LDOUBLEV/AutoLog + ${python_name} -m pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl # pretrain lite train data wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar --no-check-certificate @@ -257,7 +257,17 @@ if [ ${MODE} = "lite_train_lite_infer" ];then wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/rec_r32_gaspin_bilstm_att_train.tar --no-check-certificate cd ./pretrain_models/ && tar xf rec_r32_gaspin_bilstm_att_train.tar && cd ../ fi - if [ ${model_name} == "layoutxlm_ser" ] || [ ${model_name} == "vi_layoutxlm_ser" ]; then + if [ ${model_name} == "layoutxlm_ser" ]; then + ${python_name} -m pip install -r ppstructure/kie/requirements.txt + ${python_name} -m pip install opencv-python -U + wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate + cd ./train_data/ && tar xf XFUND.tar + cd ../ + + wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutXLM_xfun_zh.tar --no-check-certificate + cd ./pretrain_models/ && tar xf ser_LayoutXLM_xfun_zh.tar && cd ../ + fi + if [ ${model_name} == "vi_layoutxlm_ser" ]; then ${python_name} -m pip install -r ppstructure/kie/requirements.txt ${python_name} -m pip install opencv-python -U wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate @@ -332,9 +342,18 @@ elif [ ${MODE} = "lite_train_whole_infer" ];then cd ./inference/ && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar && cd ../ fi elif [ ${MODE} = "whole_infer" ];then + python_name_list=$(func_parser_value "${lines[2]}") + array=(${python_name_list}) + python_name=${array[0]} + ${python_name} -m pip install paddleslim + ${python_name} -m pip install -r requirements.txt wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar --no-check-certificate wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar --no-check-certificate cd ./inference && tar xf rec_inference.tar && tar xf ch_det_data_50.tar && cd ../ + wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate + cd ./train_data/ && tar xf XFUND.tar && cd ../ + head -n 2 train_data/XFUND/zh_val/val.json > train_data/XFUND/zh_val/val_lite.json + mv train_data/XFUND/zh_val/val_lite.json train_data/XFUND/zh_val/val.json if [ ${model_name} = "ch_ppocr_mobile_v2_0_det" ]; then eval_model_name="ch_ppocr_mobile_v2.0_det_train" rm -rf ./train_data/icdar2015 @@ -500,6 +519,12 @@ elif [ ${MODE} = "whole_infer" ];then wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar --no-check-certificate cd ./inference/ && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar && cd ../ fi + if [[ ${model_name} =~ "layoutxlm_ser" ]]; then + ${python_name} -m pip install -r ppstructure/kie/requirements.txt + ${python_name} -m pip install opencv-python -U + wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutXLM_xfun_zh_infer.tar --no-check-certificate + cd ./inference/ && tar xf ser_LayoutXLM_xfun_zh_infer.tar & cd ../ + fi fi if [[ ${model_name} =~ "KL" ]]; then @@ -667,7 +692,7 @@ if [ ${MODE} = "serving_infer" ];then ${python_name} -m pip install paddle-serving-server-gpu ${python_name} -m pip install paddle_serving_client ${python_name} -m pip install paddle-serving-app - ${python_name} -m pip install git+https://github.com/LDOUBLEV/AutoLog + ${python_name} -m pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl # wget model if [ ${model_name} == "ch_ppocr_mobile_v2_0_det_KL" ] || [ ${model_name} == "ch_ppocr_mobile_v2.0_rec_KL" ] ; then wget -nc -P ./inference https://paddleocr.bj.bcebos.com/tipc_fake_model/ch_ppocr_mobile_v2.0_det_klquant_infer.tar --no-check-certificate -- GitLab