diff --git a/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml b/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml index 000d95e892cb8e6dcceeb7c22264c28934d1000c..d0c1de28ced5cca64f13fce75dbe7e1311e3d20d 100644 --- a/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml +++ b/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml @@ -17,6 +17,7 @@ Global: infer_img: doc/imgs_en/img_10.jpg save_res_path: ./checkpoints/det_db/predicts_db.txt distributed: true + d2s_train_image_shape: [3, -1, -1] Architecture: name: DistillationModel diff --git a/configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml b/configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml index b8aa44dde8fd3fdc4ff14bbca20513b95178cdb0..d77951785132cb21b29819317acd27a18c234175 100644 --- a/configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml +++ b/configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml @@ -12,6 +12,7 @@ Global: use_visualdl: False seed: 2022 infer_img: ppstructure/docs/kie/input/zh_val_42.jpg + d2s_train_image_shape: [3, 224, 224] # if you want to predict using the groundtruth ocr info, # you can use the following config # infer_img: train_data/XFUND/zh_val/val.json diff --git a/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml b/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml index 7843f02a23d253fbcbbe65b3e86d3a22c25958de..d8dd54723e128f195d5e0ec0edfb27e8fb0fd40a 100644 --- a/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml +++ b/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml @@ -19,6 +19,7 @@ Global: use_space_char: true distributed: true save_res_path: ./output/rec/predicts_ppocrv3_distillation.txt + d2s_train_image_shape: [3, 48, -1] Optimizer: diff --git a/configs/table/SLANet.yml b/configs/table/SLANet.yml index a896614556e36f77bd784218b6c2f29914219dbe..4a8c35d8b681b793ebcf116c3c568e73fe388aad 100644 --- a/configs/table/SLANet.yml +++ b/configs/table/SLANet.yml @@ -21,6 +21,7 @@ Global: infer_mode: False use_sync_bn: True save_res_path: 'output/infer' + d2s_train_image_shape: [3, -1, -1] Optimizer: name: Adam diff --git a/configs/table/table_master.yml b/configs/table/table_master.yml index df437f7c95523c5fe12f7166d011b4ad8473628b..125162f1889914b7bd27637044497addb580a1aa 100755 --- a/configs/table/table_master.yml +++ b/configs/table/table_master.yml @@ -17,6 +17,7 @@ Global: infer_mode: false max_text_length: &max_text_length 500 box_format: &box_format 'xywh' # 'xywh', 'xyxy', 'xyxyxyxy' + d2s_train_image_shape: [3, 480, 480] Optimizer: diff --git a/ppocr/modeling/architectures/__init__.py b/ppocr/modeling/architectures/__init__.py index 2f8506b72af33ad712480b1cf0706e58b7c60829..1059af237659cdf96a159b4144fee26cded451f6 100755 --- a/ppocr/modeling/architectures/__init__.py +++ b/ppocr/modeling/architectures/__init__.py @@ -38,9 +38,9 @@ def build_model(config): def apply_to_static(model, config, logger): if config["Global"].get("to_static", False) is not True: return model - assert "image_shape" in config[ - "Global"], "image_shape must be assigned for static training mode..." - supported_list = ["DB", "SVTR_LCNet", "TableMaster"] + assert "d2s_train_image_shape" in config[ + "Global"], "d2s_train_image_shape must be assigned for static training mode..." + supported_list = ["DB", "SVTR_LCNet", "TableMaster", "LayoutXLM", "SLANet"] if config["Architecture"]["algorithm"] in ["Distillation"]: algo = list(config["Architecture"]["Models"].values())[0]["algorithm"] else: @@ -49,7 +49,7 @@ def apply_to_static(model, config, logger): specs = [ InputSpec( - [None] + config["Global"]["image_shape"], dtype='float32') + [None] + config["Global"]["d2s_train_image_shape"], dtype='float32') ] if algo == "SVTR_LCNet": @@ -62,7 +62,7 @@ def apply_to_static(model, config, logger): [None], dtype='int64'), InputSpec( [None], dtype='float64') ]) - if algo == "TableMaster": + elif algo == "TableMaster": specs.append( [ InputSpec( @@ -76,6 +76,34 @@ def apply_to_static(model, config, logger): InputSpec( [None, 6], dtype='float32'), ]) + elif algo == "LayoutXLM": + specs = [[ + InputSpec( + shape=[None, 512], dtype="int64"), # input_ids + InputSpec( + shape=[None, 512, 4], dtype="int64"), # bbox + InputSpec( + shape=[None, 512], dtype="int64"), # attention_mask + InputSpec( + shape=[None, 512], dtype="int64"), # token_type_ids + InputSpec( + shape=[None, 3, 224, 224], dtype="float32"), # image + InputSpec( + shape=[None, 512], dtype="int64"), # label + ]] + elif algo == "SLANet": + specs.append([ + InputSpec( + [None, config["Global"]["max_text_length"] + 2], dtype='int64'), + InputSpec( + [None, config["Global"]["max_text_length"] + 2, 4], + dtype='float32'), + InputSpec( + [None, config["Global"]["max_text_length"] + 2, 1], + dtype='float32'), + InputSpec( + [None, 6], dtype='float64'), + ]) model = to_static(model, input_spec=specs) logger.info("Successfully to apply @to_static with specs: {}".format(specs)) return model diff --git a/ppocr/utils/network.py b/ppocr/utils/network.py index 327863f7d4256db445e90287c16889e6bb0ee51d..1d7451aa6764f366a4c9ba0011c58e32b60470ba 100644 --- a/ppocr/utils/network.py +++ b/ppocr/utils/network.py @@ -20,6 +20,8 @@ from tqdm import tqdm from ppocr.utils.logging import get_logger +MODELS_DIR = os.path.expanduser("~/.paddleocr/models/") + def download_with_progressbar(url, save_path): logger = get_logger() diff --git a/test_tipc/configs/ch_PP-OCRv3_det/train_infer_python.txt b/test_tipc/configs/ch_PP-OCRv3_det/train_infer_python.txt index bf10aebe3e9aa67e30ce7a20cb07f376825e39ae..82f00fd7a3c6a1ebad6feabb983d765c9857ffd0 100644 --- a/test_tipc/configs/ch_PP-OCRv3_det/train_infer_python.txt +++ b/test_tipc/configs/ch_PP-OCRv3_det/train_infer_python.txt @@ -17,7 +17,7 @@ norm_train:tools/train.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o pact_train:null fpgm_train:null distill_train:null -null:null +to_static_train:Global.to_static=true null:null ## ===========================eval_params=========================== diff --git a/test_tipc/configs/ch_PP-OCRv3_rec/ch_PP-OCRv3_rec_distillation.yml b/test_tipc/configs/ch_PP-OCRv3_rec/ch_PP-OCRv3_rec_distillation.yml index b61e5e468f49e1404dc92585f79c96a8b5d66929..63362135737f1665fecb16d5b7d6a19c8cd1b8da 100644 --- a/test_tipc/configs/ch_PP-OCRv3_rec/ch_PP-OCRv3_rec_distillation.yml +++ b/test_tipc/configs/ch_PP-OCRv3_rec/ch_PP-OCRv3_rec_distillation.yml @@ -19,6 +19,7 @@ Global: use_space_char: true distributed: true save_res_path: ./output/rec/predicts_ppocrv3_distillation.txt + d2s_train_image_shape: [3, 48, -1] Optimizer: diff --git a/test_tipc/configs/ch_PP-OCRv3_rec/train_infer_python.txt b/test_tipc/configs/ch_PP-OCRv3_rec/train_infer_python.txt index fee08b08ede0f61ae4f57fd42dba303301798a3e..47f8d8a53dc33918370fe44744600cb6a4f58124 100644 --- a/test_tipc/configs/ch_PP-OCRv3_rec/train_infer_python.txt +++ b/test_tipc/configs/ch_PP-OCRv3_rec/train_infer_python.txt @@ -17,7 +17,7 @@ norm_train:tools/train.py -c test_tipc/configs/ch_PP-OCRv3_rec/ch_PP-OCRv3_rec_d pact_train:null fpgm_train:null distill_train:null -null:null +to_static_train:Global.to_static=true null:null ## ===========================eval_params=========================== diff --git a/test_tipc/configs/slanet/SLANet.yml b/test_tipc/configs/slanet/SLANet.yml index 0d55d70d64e29716e942517e9c0d4909e6f70f9b..813363fb180e1eaf8214a19133916fcdeede6648 100644 --- a/test_tipc/configs/slanet/SLANet.yml +++ b/test_tipc/configs/slanet/SLANet.yml @@ -21,6 +21,7 @@ Global: infer_mode: False use_sync_bn: True save_res_path: 'output/infer' + d2s_train_image_shape: [3, -1, -1] Optimizer: name: Adam diff --git a/test_tipc/configs/slanet/train_infer_python.txt b/test_tipc/configs/slanet/train_infer_python.txt index 05264360ac95d08ba11157372a9badef23afdc70..0f51bd49bca3119c10d7b41ef5f84dc681f134ab 100644 --- a/test_tipc/configs/slanet/train_infer_python.txt +++ b/test_tipc/configs/slanet/train_infer_python.txt @@ -17,7 +17,7 @@ norm_train:tools/train.py -c test_tipc/configs/slanet/SLANet.yml -o Global.print pact_train:null fpgm_train:null distill_train:null -null:null +to_static_train:Global.to_static=true null:null ## ===========================eval_params=========================== diff --git a/test_tipc/configs/table_master/table_master.yml b/test_tipc/configs/table_master/table_master.yml index f818a4c51b2890d6d4218ad23184890521da1b24..b27bdae542bf85d8f2932372d9002c2de8d6c652 100644 --- a/test_tipc/configs/table_master/table_master.yml +++ b/test_tipc/configs/table_master/table_master.yml @@ -16,7 +16,7 @@ Global: character_dict_path: ppocr/utils/dict/table_master_structure_dict.txt infer_mode: false max_text_length: 500 - image_shape: [3, 480, 480] + d2s_train_image_shape: [3, 480, 480] Optimizer: diff --git a/test_tipc/configs/vi_layoutxlm_ser/train_infer_python.txt b/test_tipc/configs/vi_layoutxlm_ser/train_infer_python.txt index adad78bb76e34635a632ef7c1b55e212bc4b636a..e64f169b1298f8870bde1a576676fc25414c7502 100644 --- a/test_tipc/configs/vi_layoutxlm_ser/train_infer_python.txt +++ b/test_tipc/configs/vi_layoutxlm_ser/train_infer_python.txt @@ -17,7 +17,7 @@ norm_train:tools/train.py -c ./configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_z pact_train:null fpgm_train:null distill_train:null -null:null +to_static_train:Global.to_static=true null:null ## ===========================eval_params===========================