diff --git a/test_tipc/configs/slanet/SLANet.yml b/test_tipc/configs/slanet/SLANet.yml new file mode 100644 index 0000000000000000000000000000000000000000..4ebfdd20f7356e004ed9cec24fe27fc7607aeb70 --- /dev/null +++ b/test_tipc/configs/slanet/SLANet.yml @@ -0,0 +1,143 @@ +Global: + use_gpu: true + epoch_num: 100 + log_smooth_window: 20 + print_batch_step: 20 + save_model_dir: ./output/SLANet + save_epoch_step: 400 + # evaluation is run every 1000 iterations after the 0th iteration + eval_batch_step: [0, 1000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: ./output/SLANet/infer + use_visualdl: False + infer_img: doc/table/table.jpg + # for data or label process + character_dict_path: ppocr/utils/dict/table_structure_dict.txt + character_type: en + max_text_length: &max_text_length 500 + box_format: &box_format 'xyxy' # 'xywh', 'xyxy', 'xyxyxyxy' + infer_mode: False + use_sync_bn: True + save_res_path: 'output/infer' + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + clip_norm: 5.0 + lr: + name: Piecewise + learning_rate: 0.001 + decay_epochs : [40, 50] + values : [0.001, 0.0001, 0.00005] + regularizer: + name: 'L2' + factor: 0.00000 + +Architecture: + model_type: table + algorithm: SLANet + Backbone: + name: PPLCNet + scale: 1.0 + pretrained: true + use_ssld: true + Neck: + name: CSPPAN + out_channels: 96 + Head: + name: SLAHead + hidden_size: 256 + max_text_length: *max_text_length + loc_reg_num: &loc_reg_num 4 + +Loss: + name: SLALoss + structure_weight: 1.0 + loc_weight: 2.0 + loc_loss: smooth_l1 + +PostProcess: + name: TableLabelDecode + merge_no_span_structure: &merge_no_span_structure True + +Metric: + name: TableMetric + main_indicator: acc + compute_bbox_metric: False + loc_reg_num: *loc_reg_num + box_format: *box_format + +Train: + dataset: + name: PubTabDataSet + data_dir: ./train_data/pubtabnet/train/ + label_file_list: [./train_data/pubtabnet/train.jsonl] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - TableLabelEncode: + learn_empty_box: False + merge_no_span_structure: *merge_no_span_structure + replace_empty_cell_token: False + loc_reg_num: *loc_reg_num + max_text_length: *max_text_length + - TableBoxEncode: + in_box_format: *box_format + out_box_format: *box_format + - ResizeTableImage: + max_len: 488 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: 'hwc' + - PaddingTableImage: + size: [488, 488] + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ] + loader: + shuffle: True + batch_size_per_card: 48 + drop_last: True + num_workers: 1 + +Eval: + dataset: + name: PubTabDataSet + data_dir: ./train_data/pubtabnet/test/ + label_file_list: [./train_data/pubtabnet/test.jsonl] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - TableLabelEncode: + learn_empty_box: False + merge_no_span_structure: *merge_no_span_structure + replace_empty_cell_token: False + loc_reg_num: *loc_reg_num + max_text_length: *max_text_length + - TableBoxEncode: + in_box_format: *box_format + out_box_format: *box_format + - ResizeTableImage: + max_len: 488 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: 'hwc' + - PaddingTableImage: + size: [488, 488] + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ] + loader: + shuffle: False + drop_last: False + batch_size_per_card: 48 + num_workers: 1 diff --git a/test_tipc/configs/slanet/train_infer_python.txt b/test_tipc/configs/slanet/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..05264360ac95d08ba11157372a9badef23afdc70 --- /dev/null +++ b/test_tipc/configs/slanet/train_infer_python.txt @@ -0,0 +1,59 @@ +===========================train_params=========================== +model_name:slanet +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:fp32 +Global.epoch_num:lite_train_lite_infer=3|whole_train_whole_infer=50 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=16|whole_train_whole_infer=128 +Global.pretrained_model:./pretrain_models/en_ppstructure_mobile_v2.0_SLANet_train/best_accuracy +train_model_name:latest +train_infer_img_dir:./ppstructure/docs/table/table.jpg +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/slanet/SLANet.yml -o Global.print_batch_step=1 Train.loader.shuffle=false +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.checkpoints: +norm_export:tools/export_model.py -c test_tipc/configs/slanet/SLANet.yml -o +quant_export: +fpgm_export: +distill_export:null +export1:null +export2:null +## +infer_model:./inference/en_ppstructure_mobile_v2.0_SLANet_train +infer_export:null +infer_quant:False +inference:ppstructure/table/predict_table.py --det_model_dir=./inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=./inference/en_ppocr_mobile_v2.0_table_rec_infer --rec_char_dict_path=./ppocr/utils/dict/table_dict.txt --table_char_dict_path=./ppocr/utils/dict/table_structure_dict.txt --image_dir=./ppstructure/docs/table/table.jpg --det_limit_side_len=736 --det_limit_type=min --output ./output/table +--use_gpu:True|False +--enable_mkldnn:False +--cpu_threads:6 +--rec_batch_num:1 +--use_tensorrt:False +--precision:fp32 +--table_model_dir: +--image_dir:./ppstructure/docs/table/table.jpg +null:null +--benchmark:False +null:null +===========================infer_benchmark_params========================== +random_infer_input:[{float32,[3,488,488]}] +===========================train_benchmark_params========================== +batch_size:32 +fp_items:fp32|fp16 +epoch:2 +--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile +flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh index 5b5740113fc319accc8150f71c865a3f0465876d..25bfcce2a33ec0c7df1a7c898977f532c92513eb 100644 --- a/test_tipc/prepare.sh +++ b/test_tipc/prepare.sh @@ -139,6 +139,13 @@ if [ ${MODE} = "lite_train_lite_infer" ];then wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar --no-check-certificate cd ./inference/ && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar && cd ../ fi + if [ ${model_name} == "slanet" ];then + wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar --no-check-certificate + cd ./pretrain_models/ && tar xf en_ppstructure_mobile_v2.0_SLANet_train.tar && cd ../ + wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar --no-check-certificate + wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar --no-check-certificate + cd ./inference/ && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar && cd ../ + fi if [[ ${model_name} =~ "det_r50_db_plusplus" ]];then wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/ResNet50_dcn_asf_synthtext_pretrained.pdparams --no-check-certificate fi