From 6460985d8d0068da6402eb59aa2cee1d597b3612 Mon Sep 17 00:00:00 2001 From: littletomatodonkey Date: Tue, 16 Aug 2022 10:47:31 +0800 Subject: [PATCH] add vi-layoutxlm (#7209) --- ppstructure/vqa/predict_vqa_token_ser.py | 6 +- .../vi_layoutxlm_ser/train_infer_python.txt | 59 +++++++++++++++++++ test_tipc/prepare.sh | 4 +- 3 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 test_tipc/configs/vi_layoutxlm_ser/train_infer_python.txt diff --git a/ppstructure/vqa/predict_vqa_token_ser.py b/ppstructure/vqa/predict_vqa_token_ser.py index 855be42d..7647af9d 100644 --- a/ppstructure/vqa/predict_vqa_token_ser.py +++ b/ppstructure/vqa/predict_vqa_token_ser.py @@ -41,7 +41,11 @@ logger = get_logger() class SerPredictor(object): def __init__(self, args): self.ocr_engine = PaddleOCR( - use_angle_cls=False, show_log=False, use_gpu=args.use_gpu) + use_angle_cls=args.use_angle_cls, + det_model_dir=args.det_model_dir, + rec_model_dir=args.rec_model_dir, + show_log=False, + use_gpu=args.use_gpu) pre_process_list = [{ 'VQATokenLabelEncode': { diff --git a/test_tipc/configs/vi_layoutxlm_ser/train_infer_python.txt b/test_tipc/configs/vi_layoutxlm_ser/train_infer_python.txt new file mode 100644 index 00000000..59d34746 --- /dev/null +++ b/test_tipc/configs/vi_layoutxlm_ser/train_infer_python.txt @@ -0,0 +1,59 @@ +===========================train_params=========================== +model_name:vi_layoutxlm_ser +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:fp32 +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=17 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=4|whole_train_whole_infer=8 +Architecture.Backbone.checkpoints:null +train_model_name:latest +train_infer_img_dir:ppstructure/docs/vqa/input/zh_val_42.jpg +null:null +## +trainer:norm_train +norm_train:tools/train.py -c ./configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml -o Global.print_batch_step=1 Global.eval_batch_step=[1000,1000] Train.loader.shuffle=false +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Architecture.Backbone.checkpoints: +norm_export:tools/export_model.py -c ./configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml -o +quant_export: +fpgm_export: +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:null +infer_quant:False +inference:ppstructure/vqa/predict_vqa_token_ser.py --vqa_algorithm=LayoutXLM --ser_dict_path=train_data/XFUND/class_list_xfun.txt --output=output --ocr_order_method=tb-yx +--use_gpu:True|False +--enable_mkldnn:False +--cpu_threads:6 +--rec_batch_num:1 +--use_tensorrt:False +--precision:fp32 +--ser_model_dir: +--image_dir:./ppstructure/docs/vqa/input/zh_val_42.jpg +null:null +--benchmark:False +null:null +===========================infer_benchmark_params========================== +random_infer_input:[{float32,[3,224,224]}] +===========================train_benchmark_params========================== +batch_size:4 +fp_items:fp32|fp16 +epoch:3 +--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile +flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98 diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh index 76543f39..259a1159 100644 --- a/test_tipc/prepare.sh +++ b/test_tipc/prepare.sh @@ -106,7 +106,7 @@ if [ ${MODE} = "benchmark_train" ];then ln -s ./icdar2015_benckmark ./icdar2015 cd ../ fi - if [ ${model_name} == "layoutxlm_ser" ]; then + if [ ${model_name} == "layoutxlm_ser" ] || [ ${model_name} == "vi_layoutxlm_ser" ]; then pip install -r ppstructure/vqa/requirements.txt pip install paddlenlp\>=2.3.5 --force-reinstall -i https://mirrors.aliyun.com/pypi/simple/ wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate @@ -220,7 +220,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/rec_r32_gaspin_bilstm_att_train.tar --no-check-certificate cd ./pretrain_models/ && tar xf rec_r32_gaspin_bilstm_att_train.tar && cd ../ fi - if [ ${model_name} == "layoutxlm_ser" ]; then + if [ ${model_name} == "layoutxlm_ser" ] || [ ${model_name} == "vi_layoutxlm_ser" ]; then pip install -r ppstructure/vqa/requirements.txt pip install paddlenlp\>=2.3.5 --force-reinstall -i https://mirrors.aliyun.com/pypi/simple/ wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate -- GitLab