From bb53c8d1002197a447ea3d8f7c5c2e4044267d39 Mon Sep 17 00:00:00 2001 From: WenmuZhou <572459439@qq.com> Date: Tue, 16 Aug 2022 10:46:09 +0000 Subject: [PATCH] add table model link --- configs/table/SLANet.yml | 2 +- configs/table/table_mv3.yml | 4 +- paddleocr.py | 24 ++++++++-- ppocr/utils/dict/table_structure_dict_ch.txt | 48 ++++++++++++++++++++ ppstructure/docs/models_list.md | 4 +- ppstructure/docs/models_list_en.md | 4 +- ppstructure/table/README.md | 23 ++++++---- ppstructure/table/README_ch.md | 23 ++++++---- ppstructure/utility.py | 2 +- 9 files changed, 107 insertions(+), 27 deletions(-) create mode 100644 ppocr/utils/dict/table_structure_dict_ch.txt diff --git a/configs/table/SLANet.yml b/configs/table/SLANet.yml index 2264eb14..384c9585 100644 --- a/configs/table/SLANet.yml +++ b/configs/table/SLANet.yml @@ -61,7 +61,7 @@ Loss: PostProcess: name: TableLabelDecode - merge_no_span_structure: &merge_no_span_structure False + merge_no_span_structure: &merge_no_span_structure True Metric: name: TableMetric diff --git a/configs/table/table_mv3.yml b/configs/table/table_mv3.yml index 87cda7db..16c14574 100755 --- a/configs/table/table_mv3.yml +++ b/configs/table/table_mv3.yml @@ -96,8 +96,8 @@ Train: Eval: dataset: name: PubTabDataSet - data_dir: /home/zhoujun20/table/PubTabNe/pubtabnet/val/ - label_file_list: [/home/zhoujun20/table/PubTabNe/pubtabnet/val_500.jsonl] + data_dir: train_data/table/pubtabnet/val/ + label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_val.jsonl] transforms: - DecodeImage: # load image img_mode: BGR diff --git a/paddleocr.py b/paddleocr.py index 9a9958ab..fb1427b8 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -275,12 +275,14 @@ MODEL_URLS = { 'PP-Structurev2': { 'table': { 'en': { - 'url': '', + 'url': + 'https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar', 'dict_path': 'ppocr/utils/dict/table_structure_dict.txt' }, 'ch': { - 'url': '', - 'dict_path': 'ppocr/utils/dict/table_structure_dict.txt' + 'url': + 'https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar', + 'dict_path': 'ppocr/utils/dict/table_structure_dict_ch.txt' } }, 'layout': { @@ -565,7 +567,6 @@ class PPStructure(StructureSystem): if params.layout_dict_path is None: params.layout_dict_path = str( Path(__file__).parent / layout_model_config['dict_path']) - logger.debug(params) super().__init__(params) @@ -628,3 +629,18 @@ def main(): for item in result: item.pop('img') logger.info(item) + + +if __name__ == "__main__": + table_engine = PPStructure(layout=False, show_log=True) + + save_folder = './output' + img_path = 'ppstructure/docs/table/table.jpg' + img = cv2.imread(img_path) + result = table_engine(img) + save_structure_res(result, save_folder, + os.path.basename(img_path).split('.')[0]) + + for line in result: + line.pop('img') + print(line) diff --git a/ppocr/utils/dict/table_structure_dict_ch.txt b/ppocr/utils/dict/table_structure_dict_ch.txt new file mode 100644 index 00000000..0c59c0e9 --- /dev/null +++ b/ppocr/utils/dict/table_structure_dict_ch.txt @@ -0,0 +1,48 @@ + + + + + + + + + + colspan="2" + colspan="3" + colspan="4" + colspan="5" + colspan="6" + colspan="7" + colspan="8" + colspan="9" + colspan="10" + colspan="11" + colspan="12" + colspan="13" + colspan="14" + colspan="15" + colspan="16" + colspan="17" + colspan="18" + colspan="19" + colspan="20" + rowspan="2" + rowspan="3" + rowspan="4" + rowspan="5" + rowspan="6" + rowspan="7" + rowspan="8" + rowspan="9" + rowspan="10" + rowspan="11" + rowspan="12" + rowspan="13" + rowspan="14" + rowspan="15" + rowspan="16" + rowspan="17" + rowspan="18" + rowspan="19" + rowspan="20" diff --git a/ppstructure/docs/models_list.md b/ppstructure/docs/models_list.md index 89fa98d3..ef2994ca 100644 --- a/ppstructure/docs/models_list.md +++ b/ppstructure/docs/models_list.md @@ -34,7 +34,9 @@ |模型名称|模型简介|推理模型大小|下载地址| | --- | --- | --- | --- | -|en_ppocr_mobile_v2.0_table_structure|PubTabNet数据集训练的英文表格场景的表格结构预测|18.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar) | +|en_ppocr_mobile_v2.0_table_structure|基于TableRec-RARE在PubTabNet数据集上训练的英文表格识别模型|18.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar) | +|en_ppstructure_mobile_v2.0_SLANet|基于SLANet在PubTabNet数据集上训练的英文表格识别模型|9M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar) | +|ch_ppstructure_mobile_v2.0_SLANet|基于SLANet在PubTabNet数据集上训练的中文表格识别模型|9.3M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) | diff --git a/ppstructure/docs/models_list_en.md b/ppstructure/docs/models_list_en.md index e133a0bb..64a7cdeb 100644 --- a/ppstructure/docs/models_list_en.md +++ b/ppstructure/docs/models_list_en.md @@ -35,7 +35,9 @@ If you need to use other OCR models, you can download the model in [PP-OCR model |model| description |inference model size|download| | --- |-----------------------------------------------------------------------------| --- | --- | -|en_ppocr_mobile_v2.0_table_structure| Table structure model for English table scenes trained on PubTabNet dataset |18.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar) | +|en_ppocr_mobile_v2.0_table_structure| English table recognition model trained on PubTabNet dataset based on TableRec-RARE |18.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar) | +|en_ppstructure_mobile_v2.0_SLANet|English table recognition model trained on PubTabNet dataset based on SLANet|9M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar) | +|ch_ppstructure_mobile_v2.0_SLANet|Chinese table recognition model trained on PubTabNet dataset based on SLANet|9.3M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) | ## 3. VQA diff --git a/ppstructure/table/README.md b/ppstructure/table/README.md index 10308b49..5ac99ac8 100644 --- a/ppstructure/table/README.md +++ b/ppstructure/table/README.md @@ -44,17 +44,24 @@ cd PaddleOCR/ppstructure # download model mkdir inference && cd inference -# Download the detection model of the ultra-lightweight table English OCR model and unzip it -wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar -# Download the recognition model of the ultra-lightweight table English OCR model and unzip it -wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar -# Download the ultra-lightweight English table inch model and unzip it -wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar +# Download the PP-OCRv3 text detection model and unzip it +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf ch_PP-OCRv3_det_slim_infer.tar +# Download the PP-OCRv3 text recognition model and unzip it +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf ch_PP-OCRv3_rec_slim_infer.tar +# Download the PP-Structurev2 form recognition model and unzip it +wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar && tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar cd .. # run -python3 table/predict_table.py --det_model_dir=inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=inference/en_ppocr_mobile_v2.0_table_rec_infer --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer --image_dir=./docs/table/table.jpg --rec_char_dict_path=../ppocr/utils/dict/table_dict.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --det_limit_side_len=736 --det_limit_type=min --output ./output/table +python3.7 table/predict_table.py \ + --det_model_dir=inference/ch_PP-OCRv3_det_slim_infer \ + --rec_model_dir=inference/ch_PP-OCRv3_rec_slim_infer \ + --table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \ + --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \ + --table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \ + --image_dir=docs/table/table.jpg \ + --output=../output/table + ``` -Note: The above model is trained on the PubLayNet dataset and only supports English scanning scenarios. If you need to identify other scenarios, you need to train the model yourself and replace the three fields `det_model_dir`, `rec_model_dir`, `table_model_dir`. After the operation is completed, the excel table of each image will be saved to the directory specified by the output field, and an html file will be produced in the directory to visually view the cell coordinates and the recognized table. diff --git a/ppstructure/table/README_ch.md b/ppstructure/table/README_ch.md index 3f31c010..a16de938 100644 --- a/ppstructure/table/README_ch.md +++ b/ppstructure/table/README_ch.md @@ -54,20 +54,25 @@ cd PaddleOCR/ppstructure # 下载模型 mkdir inference && cd inference -# 下载超轻量级表格英文OCR模型的检测模型并解压 -wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar -# 下载超轻量级表格英文OCR模型的识别模型并解压 -wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar -# 下载超轻量级英文表格英寸模型并解压 -wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar +# 下载PP-OCRv3文本检测模型并解压 +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf ch_PP-OCRv3_det_slim_infer.tar +# 下载PP-OCRv3文本识别模型并解压 +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf ch_PP-OCRv3_rec_slim_infer.tar +# 下载PP-Structurev2表格识别模型并解压 +wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar && tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar cd .. # 执行预测 -python3 table/predict_table.py --det_model_dir=inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=inference/en_ppocr_mobile_v2.0_table_rec_infer --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer --image_dir=./docs/table/table.jpg --rec_char_dict_path=../ppocr/utils/dict/table_dict.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --det_limit_side_len=736 --det_limit_type=min --output ./output/table +python3.7 table/predict_table.py \ + --det_model_dir=inference/ch_PP-OCRv3_det_slim_infer \ + --rec_model_dir=inference/ch_PP-OCRv3_rec_slim_infer \ + --table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \ + --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \ + --table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \ + --image_dir=docs/table/table.jpg \ + --output=../output/table ``` 运行完成后,每张图片的excel表格会保存到output字段指定的目录下,同时在该目录下回生产一个html文件,用于可视化查看单元格坐标和识别的表格。 -note: 上述模型是在 PubLayNet 数据集上训练的表格识别模型,仅支持英文扫描场景,如需识别其他场景需要自己训练模型后替换 `det_model_dir`,`rec_model_dir`,`table_model_dir`三个字段即可。 - ### 3.2 训练 diff --git a/ppstructure/utility.py b/ppstructure/utility.py index 3e5054a7..cda4c063 100644 --- a/ppstructure/utility.py +++ b/ppstructure/utility.py @@ -28,7 +28,7 @@ def init_args(): parser.add_argument("--table_algorithm", type=str, default='TableAttn') parser.add_argument("--table_model_dir", type=str) parser.add_argument( - "--merge_no_span_structure", type=str2bool, default=False) + "--merge_no_span_structure", type=str2bool, default=True) parser.add_argument( "--table_char_dict_path", type=str, -- GitLab