From 56ada55d0848032952e47bd8e4ef34afccc2500b Mon Sep 17 00:00:00 2001 From: WenmuZhou <572459439@qq.com> Date: Wed, 3 Aug 2022 11:53:01 +0800 Subject: [PATCH] add layoutxlm --- .../train_infer_python.txt | 4 +- .../configs/layoutxlm/train_infer_python.txt | 59 +++++++++++++++++++ test_tipc/docs/benchmark_train.md | 1 + test_tipc/prepare.sh | 20 +++++++ 4 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 test_tipc/configs/layoutxlm/train_infer_python.txt diff --git a/test_tipc/configs/det_r50_vd_sast_totaltext_v2_0/train_infer_python.txt b/test_tipc/configs/det_r50_vd_sast_totaltext_v2_0/train_infer_python.txt index cf176b3c..cd00c749 100644 --- a/test_tipc/configs/det_r50_vd_sast_totaltext_v2_0/train_infer_python.txt +++ b/test_tipc/configs/det_r50_vd_sast_totaltext_v2_0/train_infer_python.txt @@ -27,7 +27,7 @@ null:null ===========================infer_params=========================== Global.save_inference_dir:./output/ Global.checkpoints: -norm_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_sast_totaltext_v2.0/det_r50_vd_sast_totaltext.yml -o +norm_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_sast_totaltext_v2_0/det_r50_vd_sast_totaltext.yml -o quant_export:null fpgm_export:null distill_export:null @@ -35,7 +35,7 @@ export1:null export2:null inference_dir:null train_model:./inference/det_r50_vd_sast_totaltext_v2.0_train/best_accuracy -infer_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_sast_totaltext_v2.0/det_r50_vd_sast_totaltext.yml -o +infer_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_sast_totaltext_v2_0/det_r50_vd_sast_totaltext.yml -o infer_quant:False inference:tools/infer/predict_det.py --use_gpu:True|False diff --git a/test_tipc/configs/layoutxlm/train_infer_python.txt b/test_tipc/configs/layoutxlm/train_infer_python.txt new file mode 100644 index 00000000..32b623a8 --- /dev/null +++ b/test_tipc/configs/layoutxlm/train_infer_python.txt @@ -0,0 +1,59 @@ +===========================train_params=========================== +model_name:layoutxlm +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:fp32 +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=17 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=8|whole_train_whole_infer=8 +Architecture.Backbone.checkpoints:null +train_model_name:latest +train_infer_img_dir:ppstructure/docs/vqa/input/zh_val_42.jpg +null:null +## +trainer:norm_train +norm_train:tools/train.py -c configs/vqa/ser/layoutxlm_xfund_zh.yml -o Global.print_batch_step=1 Global.eval_batch_step=[1000,1000] Train.loader.shuffle=false +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Architecture.Backbone.checkpoints: +norm_export:tools/export_model.py -c configs/vqa/ser/layoutxlm_xfund_zh.yml -o +quant_export: +fpgm_export: +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:null +infer_quant:False +inference:ppstructure/vqa/predict_vqa_token_ser.py --vqa_algorithm=LayoutXLM --ser_dict_path=train_data/XFUND/class_list_xfun.txt --output=output +--use_gpu:True|False +--enable_mkldnn:False +--cpu_threads:6 +--rec_batch_num:1 +--use_tensorrt:False +--precision:fp32 +--ser_model_dir: +--image_dir:./ppstructure/docs/vqa/input/zh_val_42.jpg +null:null +--benchmark:False +null:null +===========================infer_benchmark_params========================== +random_infer_input:[{float32,[3,224,224]}] +===========================train_benchmark_params========================== +batch_size:4 +fp_items:fp32|fp16 +epoch:3 +--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile +flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98 diff --git a/test_tipc/docs/benchmark_train.md b/test_tipc/docs/benchmark_train.md index 0196edc2..9076ccc4 100644 --- a/test_tipc/docs/benchmark_train.md +++ b/test_tipc/docs/benchmark_train.md @@ -69,6 +69,7 @@ train_log/ | det_r50_vd_east_v2.0 |[config](../configs/det_r50_vd_east_v2.0/train_infer_python.txt) | 42.485 | 42.624 / 42.663 / 42.561 |0.00239083 | 67.61 |67.825/ 68.299/ 68.51| 0.00999854 | 10,000| 2,000| | det_r50_vd_pse_v2.0 |[config](../configs/det_r50_vd_pse_v2.0/train_infer_python.txt) | 16.455 | 16.517 / 16.555 / 16.353 |0.012201752 | 27.02 |27.288 / 27.152 / 27.408| 0.009340339 | 10,000| 2,000| | rec_mv3_none_bilstm_ctc_v2.0 |[config](../configs/rec_mv3_none_bilstm_ctc_v2.0/train_infer_python.txt) | 2288.358 | 2291.906 / 2293.725 / 2290.05 |0.001602197 | 2336.17 |2327.042 / 2328.093 / 2344.915| 0.007622025 | 600,000| 160,000| +| layoutxlm |[config](../configs/layoutxlm/train_infer_python.txt) | 18.001 | 18.114 / 18.107 / 18.307 |0.010924783 | 21.982 | 21.507 / 21.116 / 21.406| 0.018180127 | 1490 | 1490| | PP-Structure-table |[config](../configs/en_table_structure/train_infer_python.txt) | 14.151 | 14.077 / 14.23 / 14.25 |0.012140351 | 16.285 | 16.595 / 16.878 / 16.531 | 0.020559308 | 20,000| 5,000| | det_r50_dcn_fce_ctw_v2.0 |[config](../configs/det_r50_dcn_fce_ctw_v2.0/train_infer_python.txt) | 14.057 | 14.029 / 14.02 / 14.014 |0.001069214 | 18.298 |18.411 / 18.376 / 18.331| 0.004345228 | 10,000| 2,000| | ch_PP-OCRv3_det |[config](../configs/ch_PP-OCRv3_det/train_infer_python.txt) | 8.622 | 8.431 / 8.423 / 8.479|0.006604552 | 14.203 |14.346 14.468 14.23| 0.016450097 | 10,000| 2,000| diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh index 88154d61..35b9c73a 100644 --- a/test_tipc/prepare.sh +++ b/test_tipc/prepare.sh @@ -106,6 +106,19 @@ if [ ${MODE} = "benchmark_train" ];then ln -s ./icdar2015_benckmark ./icdar2015 cd ../ fi + if [ ${model_name} == "layoutxlm" ]; then + pip install -r ppstructure/vqa/requirements.txt + pip install paddlenlp>=2.3.5 + wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate + cd ./train_data/ && tar xf XFUND.tar + # expand gt.txt 10 times + cd XFUND/zh_train + for i in `seq 10`;do cp train.json dup$i.txt;done + cat dup* > train.json && rm -rf dup* + cd ../../ + + cd ../ + fi fi if [ ${MODE} = "lite_train_lite_infer" ];then @@ -207,6 +220,13 @@ if [ ${MODE} = "lite_train_lite_infer" ];then wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/rec_r32_gaspin_bilstm_att_train.tar --no-check-certificate cd ./pretrain_models/ && tar xf rec_r32_gaspin_bilstm_att_train.tar && cd ../ fi + if [ ${model_name} == "layoutxlm" ]; then + pip install -r ppstructure/vqa/requirements.txt + pip install paddlenlp>=2.3.5 + wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate + cd ./train_data/ && tar xf XFUND.tar + cd ../ + fi elif [ ${MODE} = "whole_train_whole_infer" ];then wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate -- GitLab