diff --git a/test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml b/test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml new file mode 100644 index 0000000000000000000000000000000000000000..d2be152f0bae7d87129904d87c56c6d777a1f338 --- /dev/null +++ b/test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml @@ -0,0 +1,122 @@ +Global: + use_gpu: True + epoch_num: &epoch_num 200 + log_smooth_window: 10 + print_batch_step: 10 + save_model_dir: ./output/ser_layoutxlm_xfund_zh + save_epoch_step: 2000 + # evaluation is run every 10 iterations after the 0th iteration + eval_batch_step: [ 0, 187 ] + cal_metric_during_train: False + save_inference_dir: + use_visualdl: False + seed: 2022 + infer_img: ppstructure/docs/kie/input/zh_val_42.jpg + save_res_path: ./output/ser_layoutxlm_xfund_zh/res + +Architecture: + model_type: kie + algorithm: &algorithm "LayoutXLM" + Transform: + Backbone: + name: LayoutXLMForSer + pretrained: True + checkpoints: + num_classes: &num_classes 7 + +Loss: + name: VQASerTokenLayoutLMLoss + num_classes: *num_classes + key: "backbone_out" + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + lr: + name: Linear + learning_rate: 0.00005 + epochs: *epoch_num + warmup_epoch: 2 + regularizer: + name: L2 + factor: 0.00000 + +PostProcess: + name: VQASerTokenLayoutLMPostProcess + class_path: &class_path train_data/XFUND/class_list_xfun.txt + +Metric: + name: VQASerTokenMetric + main_indicator: hmean + +Train: + dataset: + name: SimpleDataSet + data_dir: train_data/XFUND/zh_train/image + label_file_list: + - train_data/XFUND/zh_train/train.json + ratio_list: [ 1.0 ] + transforms: + - DecodeImage: # load image + img_mode: RGB + channel_first: False + - VQATokenLabelEncode: # Class handling label + contains_re: False + algorithm: *algorithm + class_path: *class_path + - VQATokenPad: + max_seq_len: &max_seq_len 512 + return_attention_mask: True + - VQASerTokenChunk: + max_seq_len: *max_seq_len + - Resize: + size: [224,224] + - NormalizeImage: + scale: 1 + mean: [ 123.675, 116.28, 103.53 ] + std: [ 58.395, 57.12, 57.375 ] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'image', 'labels'] # dataloader will return list in this order + loader: + shuffle: True + drop_last: False + batch_size_per_card: 8 + num_workers: 4 + +Eval: + dataset: + name: SimpleDataSet + data_dir: train_data/XFUND/zh_val/image + label_file_list: + - train_data/XFUND/zh_val/val.json + transforms: + - DecodeImage: # load image + img_mode: RGB + channel_first: False + - VQATokenLabelEncode: # Class handling label + contains_re: False + algorithm: *algorithm + class_path: *class_path + - VQATokenPad: + max_seq_len: *max_seq_len + return_attention_mask: True + - VQASerTokenChunk: + max_seq_len: *max_seq_len + - Resize: + size: [224,224] + - NormalizeImage: + scale: 1 + mean: [ 123.675, 116.28, 103.53 ] + std: [ 58.395, 57.12, 57.375 ] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'image', 'labels'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 8 + num_workers: 4 diff --git a/test_tipc/configs/layoutxlm_ser/train_infer_python.txt b/test_tipc/configs/layoutxlm_ser/train_infer_python.txt index 549a31e69e367237ec0396778162a5f91c8b7412..d07daa9a1429ec5cd1955ec64ded122a9d1a723d 100644 --- a/test_tipc/configs/layoutxlm_ser/train_infer_python.txt +++ b/test_tipc/configs/layoutxlm_ser/train_infer_python.txt @@ -13,7 +13,7 @@ train_infer_img_dir:ppstructure/docs/kie/input/zh_val_42.jpg null:null ## trainer:norm_train -norm_train:tools/train.py -c configs/kie/layoutlm_series/ser_layoutlm_xfund_zh.yml -o Global.print_batch_step=1 Global.eval_batch_step=[1000,1000] Train.loader.shuffle=false +norm_train:tools/train.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o Global.print_batch_step=1 Global.eval_batch_step=[1000,1000] Train.loader.shuffle=false pact_train:null fpgm_train:null distill_train:null @@ -27,7 +27,7 @@ null:null ===========================infer_params=========================== Global.save_inference_dir:./output/ Architecture.Backbone.checkpoints: -norm_export:tools/export_model.py -c configs/kie/layoutlm_series/ser_layoutlm_xfund_zh.yml -o +norm_export:tools/export_model.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o quant_export: fpgm_export: distill_export:null diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh index bb4b58b4cac900166eeda4d9479fa6bd3fe69e02..728cc96ab8878fee89328b09a2cb7b3dce6940b9 100644 --- a/test_tipc/prepare.sh +++ b/test_tipc/prepare.sh @@ -21,7 +21,10 @@ model_name=$(func_parser_value "${lines[1]}") trainer_list=$(func_parser_value "${lines[14]}") if [ ${MODE} = "benchmark_train" ];then - pip install -r requirements.txt + python_name_list=$(func_parser_value "${lines[2]}") + array=(${python_name_list}) + python_name=${array[0]} + ${python_name} -m pip install -r requirements.txt if [[ ${model_name} =~ "ch_ppocr_mobile_v2_0_det" || ${model_name} =~ "det_mv3_db_v2_0" ]];then wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate rm -rf ./train_data/icdar2015 @@ -107,8 +110,8 @@ if [ ${MODE} = "benchmark_train" ];then cd ../ fi if [ ${model_name} == "layoutxlm_ser" ] || [ ${model_name} == "vi_layoutxlm_ser" ]; then - pip install -r ppstructure/kie/requirements.txt - pip install opencv-python -U + ${python_name} -m pip install -r ppstructure/kie/requirements.txt + ${python_name} -m pip install opencv-python -U wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate cd ./train_data/ && tar xf XFUND.tar # expand gt.txt 10 times @@ -122,6 +125,10 @@ if [ ${MODE} = "benchmark_train" ];then fi if [ ${MODE} = "lite_train_lite_infer" ];then + python_name_list=$(func_parser_value "${lines[2]}") + array=(${python_name_list}) + python_name=${array[0]} + ${python_name} -m pip install -r requirements.txt # pretrain lite train data wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar --no-check-certificate @@ -230,8 +237,8 @@ if [ ${MODE} = "lite_train_lite_infer" ];then cd ./pretrain_models/ && tar xf rec_r32_gaspin_bilstm_att_train.tar && cd ../ fi if [ ${model_name} == "layoutxlm_ser" ] || [ ${model_name} == "vi_layoutxlm_ser" ]; then - pip install -r ppstructure/kie/requirements.txt - pip install opencv-python -U + ${python_name} -m pip install -r ppstructure/kie/requirements.txt + ${python_name} -m pip install opencv-python -U wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate cd ./train_data/ && tar xf XFUND.tar cd ../