From 27181149a9eaa5ffc2da085fd1ad7eaff2eb68f8 Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Tue, 5 Jul 2022 09:31:53 +0000
Subject: [PATCH] add tablemaster to tipc
---
doc/doc_ch/algorithm.md | 3 +-
doc/doc_ch/algorithm_overview.md | 12 +-
doc/doc_en/algorithm_en.md | 3 +-
doc/doc_en/algorithm_overview_en.md | 10 +-
ppstructure/table/predict_structure.py | 6 +-
.../configs/en_table_structure/table_mv3.yml | 41 +++---
.../configs/table_master/table_master.yml | 136 ++++++++++++++++++
.../table_master/train_infer_python.txt | 53 +++++++
test_tipc/prepare.sh | 4 +
test_tipc/readme.md | 1 +
10 files changed, 237 insertions(+), 32 deletions(-)
create mode 100644 test_tipc/configs/table_master/table_master.yml
create mode 100644 test_tipc/configs/table_master/train_infer_python.txt
diff --git a/doc/doc_ch/algorithm.md b/doc/doc_ch/algorithm.md
index 3056f35d..d50a5aa4 100644
--- a/doc/doc_ch/algorithm.md
+++ b/doc/doc_ch/algorithm.md
@@ -5,9 +5,10 @@ PaddleOCR将**持续新增**支持OCR领域前沿算法与模型,已支持的
- [文本检测算法](./algorithm_overview.md#11-%E6%96%87%E6%9C%AC%E6%A3%80%E6%B5%8B%E7%AE%97%E6%B3%95)
- [文本识别算法](./algorithm_overview.md#12-%E6%96%87%E6%9C%AC%E8%AF%86%E5%88%AB%E7%AE%97%E6%B3%95)
- [端到端算法](./algorithm_overview.md#2-%E6%96%87%E6%9C%AC%E8%AF%86%E5%88%AB%E7%AE%97%E6%B3%95)
+- [表格识别]](./algorithm_overview.md#3-%E8%A1%A8%E6%A0%BC%E8%AF%86%E5%88%AB%E7%AE%97%E6%B3%95)
**欢迎广大开发者合作共建,贡献更多算法,合入有奖🎁!具体可查看[社区常规赛](https://github.com/PaddlePaddle/PaddleOCR/issues/4982)。**
新增算法可参考如下教程:
-- [使用PaddleOCR架构添加新算法](./add_new_algorithm.md)
\ No newline at end of file
+- [使用PaddleOCR架构添加新算法](./add_new_algorithm.md)
diff --git a/doc/doc_ch/algorithm_overview.md b/doc/doc_ch/algorithm_overview.md
index 1efd564c..5c7adc71 100755
--- a/doc/doc_ch/algorithm_overview.md
+++ b/doc/doc_ch/algorithm_overview.md
@@ -1,10 +1,10 @@
# OCR算法
-- [1. 两阶段算法](#1-两阶段算法)
- - [1.1 文本检测算法](#11-文本检测算法)
- - [1.2 文本识别算法](#12-文本识别算法)
-- [2. 端到端算法](#2-端到端算法)
-- [3. 表格识别算法](#3-表格识别算法)
+- [1. 两阶段算法](#1)
+ - [1.1 文本检测算法](#11)
+ - [1.2 文本识别算法](#12)
+- [2. 端到端算法](#2)
+- [3. 表格识别算法](#3)
本文给出了PaddleOCR已支持的OCR算法列表,以及每个算法在**英文公开数据集**上的模型和指标,主要用于算法简介和算法性能对比,更多包括中文在内的其他数据集上的模型请参考[PP-OCR v2.0 系列模型下载](./models_list.md)。
@@ -98,6 +98,8 @@
已支持的端到端OCR算法列表(戳链接获取使用教程):
- [x] [PGNet](./algorithm_e2e_pgnet.md)
+
+
## 3. 表格识别算法
已支持的表格识别算法列表(戳链接获取使用教程):
diff --git a/doc/doc_en/algorithm_en.md b/doc/doc_en/algorithm_en.md
index fa7887eb..c880336b 100644
--- a/doc/doc_en/algorithm_en.md
+++ b/doc/doc_en/algorithm_en.md
@@ -6,5 +6,6 @@ PaddleOCR will add cutting-edge OCR algorithms and models continuously. Check ou
- [text detection algorithms](./algorithm_overview_en.md#11)
- [text recognition algorithms](./algorithm_overview_en.md#12)
- [end-to-end algorithms](./algorithm_overview_en.md#2)
+- [table recognition algorithms](./algorithm_overview_en.md#3)
-Developers are welcome to contribute more algorithms! Please refer to [add new algorithm](./add_new_algorithm_en.md) guideline.
\ No newline at end of file
+Developers are welcome to contribute more algorithms! Please refer to [add new algorithm](./add_new_algorithm_en.md) guideline.
diff --git a/doc/doc_en/algorithm_overview_en.md b/doc/doc_en/algorithm_overview_en.md
index 8b9b3f7b..f3c96b62 100755
--- a/doc/doc_en/algorithm_overview_en.md
+++ b/doc/doc_en/algorithm_overview_en.md
@@ -1,10 +1,10 @@
# OCR Algorithms
-- [1. Two-stage Algorithms](#1-two-stage-algorithms)
- - [1.1 Text Detection Algorithms](#11-text-detection-algorithms)
- - [1.2 Text Recognition Algorithms](#12-text-recognition-algorithms)
-- [2. End-to-end Algorithms](#2-end-to-end-algorithms)
-- [3. Table Recognition Algorithms](#3-table-recognition-algorithms)
+- [1. Two-stage Algorithms](#1)
+ - [1.1 Text Detection Algorithms](#11)
+ - [1.2 Text Recognition Algorithms](#12)
+- [2. End-to-end Algorithms](#2)
+- [3. Table Recognition Algorithms](#3)
This tutorial lists the OCR algorithms supported by PaddleOCR, as well as the models and metrics of each algorithm on **English public datasets**. It is mainly used for algorithm introduction and algorithm performance comparison. For more models on other datasets including Chinese, please refer to [PP-OCR v2.0 models list](./models_list_en.md).
diff --git a/ppstructure/table/predict_structure.py b/ppstructure/table/predict_structure.py
index 00385c76..7a7d3169 100755
--- a/ppstructure/table/predict_structure.py
+++ b/ppstructure/table/predict_structure.py
@@ -118,7 +118,7 @@ class TableStructurer(object):
'', '
', ''
] + structure_str_list + ['
', '', '']
elapse = time.time() - starttime
- return structure_str_list, bbox_list, elapse
+ return (structure_str_list, bbox_list), elapse
def main(args):
@@ -138,8 +138,8 @@ def main(args):
if img is None:
logger.info("error in loading image:{}".format(image_file))
continue
- structure_str_list, bbox_list, elapse = table_structurer(img)
-
+ structure_res, elapse = table_structurer(img)
+ structure_str_list, bbox_list = structure_res
bbox_list_str = json.dumps(bbox_list.tolist())
logger.info("result: {}, {}".format(structure_str_list,
bbox_list_str))
diff --git a/test_tipc/configs/en_table_structure/table_mv3.yml b/test_tipc/configs/en_table_structure/table_mv3.yml
index adf326bd..6df5a1a2 100755
--- a/test_tipc/configs/en_table_structure/table_mv3.yml
+++ b/test_tipc/configs/en_table_structure/table_mv3.yml
@@ -1,24 +1,23 @@
Global:
use_gpu: true
- epoch_num: 10
+ epoch_num: 400
log_smooth_window: 20
print_batch_step: 5
save_model_dir: ./output/table_mv3/
- save_epoch_step: 3
+ save_epoch_step: 400
# evaluation is run every 400 iterations after the 0th iteration
eval_batch_step: [0, 400]
cal_metric_during_train: True
pretrained_model:
- checkpoints:
+ checkpoints:
save_inference_dir:
use_visualdl: False
- infer_img: doc/table/table.jpg
+ infer_img: ppstructure/docs/table/table.jpg
+ save_res_path: output/table_mv3
# for data or label process
character_dict_path: ppocr/utils/dict/table_structure_dict.txt
character_type: en
- max_text_length: 100
- max_elem_length: 800
- max_cell_num: 500
+ max_text_length: 800
infer_mode: False
process_total_num: 0
process_cut_num: 0
@@ -44,11 +43,8 @@ Architecture:
Head:
name: TableAttentionHead
hidden_size: 256
- l2_decay: 0.00001
loc_type: 2
- max_text_length: 100
- max_elem_length: 800
- max_cell_num: 500
+ max_text_length: 800
Loss:
name: TableAttentionLoss
@@ -61,28 +57,34 @@ PostProcess:
Metric:
name: TableMetric
main_indicator: acc
+ compute_bbox_metric: false # cost many time, set False for training
Train:
dataset:
name: PubTabDataSet
data_dir: ./train_data/pubtabnet/train
- label_file_path: ./train_data/pubtabnet/train.jsonl
+ label_file_list: [./train_data/pubtabnet/train.jsonl]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
+ - TableLabelEncode:
+ learn_empty_box: False
+ merge_no_span_structure: False
+ replace_empty_cell_token: False
+ - TableBoxEncode:
- ResizeTableImage:
max_len: 488
- - TableLabelEncode:
- NormalizeImage:
scale: 1./255.
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: 'hwc'
- PaddingTableImage:
+ size: [488, 488]
- ToCHWImage:
- KeepKeys:
- keep_keys: ['image', 'structure', 'bbox_list', 'sp_tokens', 'bbox_list_mask']
+ keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
loader:
shuffle: True
batch_size_per_card: 32
@@ -93,23 +95,28 @@ Eval:
dataset:
name: PubTabDataSet
data_dir: ./train_data/pubtabnet/test/
- label_file_path: ./train_data/pubtabnet/test.jsonl
+ label_file_list: [./train_data/pubtabnet/test.jsonl]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
+ - TableLabelEncode:
+ learn_empty_box: False
+ merge_no_span_structure: False
+ replace_empty_cell_token: False
+ - TableBoxEncode:
- ResizeTableImage:
max_len: 488
- - TableLabelEncode:
- NormalizeImage:
scale: 1./255.
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: 'hwc'
- PaddingTableImage:
+ size: [488, 488]
- ToCHWImage:
- KeepKeys:
- keep_keys: ['image', 'structure', 'bbox_list', 'sp_tokens', 'bbox_list_mask']
+ keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
loader:
shuffle: False
drop_last: False
diff --git a/test_tipc/configs/table_master/table_master.yml b/test_tipc/configs/table_master/table_master.yml
new file mode 100644
index 00000000..c519b5b8
--- /dev/null
+++ b/test_tipc/configs/table_master/table_master.yml
@@ -0,0 +1,136 @@
+Global:
+ use_gpu: true
+ epoch_num: 17
+ log_smooth_window: 20
+ print_batch_step: 100
+ save_model_dir: ./output/table_master/
+ save_epoch_step: 17
+ eval_batch_step: [0, 6259]
+ cal_metric_during_train: true
+ pretrained_model: null
+ checkpoints:
+ save_inference_dir: output/table_master/infer
+ use_visualdl: false
+ infer_img: ppstructure/docs/table/table.jpg
+ save_res_path: ./output/table_master
+ character_dict_path: ppocr/utils/dict/table_master_structure_dict.txt
+ infer_mode: false
+ max_text_length: 500
+ process_total_num: 0
+ process_cut_num: 0
+
+
+Optimizer:
+ name: Adam
+ beta1: 0.9
+ beta2: 0.999
+ lr:
+ name: MultiStepDecay
+ learning_rate: 0.001
+ milestones: [12, 15]
+ gamma: 0.1
+ warmup_epoch: 0.02
+ regularizer:
+ name: L2
+ factor: 0.0
+
+Architecture:
+ model_type: table
+ algorithm: TableMaster
+ Backbone:
+ name: TableResNetExtra
+ gcb_config:
+ ratio: 0.0625
+ headers: 1
+ att_scale: False
+ fusion_type: channel_add
+ layers: [False, True, True, True]
+ layers: [1,2,5,3]
+ Head:
+ name: TableMasterHead
+ hidden_size: 512
+ headers: 8
+ dropout: 0
+ d_ff: 2024
+ max_text_length: 500
+
+Loss:
+ name: TableMasterLoss
+ ignore_index: 42 # set to len of dict + 3
+
+PostProcess:
+ name: TableMasterLabelDecode
+ box_shape: pad
+
+Metric:
+ name: TableMetric
+ main_indicator: acc
+ compute_bbox_metric: False
+
+Train:
+ dataset:
+ name: PubTabDataSet
+ data_dir: ./train_data/pubtabnet/train
+ label_file_list: [./train_data/pubtabnet/train.jsonl]
+ transforms:
+ - DecodeImage:
+ img_mode: BGR
+ channel_first: False
+ - TableMasterLabelEncode:
+ learn_empty_box: False
+ merge_no_span_structure: True
+ replace_empty_cell_token: True
+ - ResizeTableImage:
+ max_len: 480
+ resize_bboxes: True
+ - PaddingTableImage:
+ size: [480, 480]
+ - TableBoxEncode:
+ use_xywh: True
+ - NormalizeImage:
+ scale: 1./255.
+ mean: [0.5, 0.5, 0.5]
+ std: [0.5, 0.5, 0.5]
+ order: hwc
+ - ToCHWImage: null
+ - KeepKeys:
+ keep_keys: [image, structure, bboxes, bbox_masks, shape]
+ loader:
+ shuffle: True
+ batch_size_per_card: 10
+ drop_last: True
+ num_workers: 8
+
+Eval:
+ dataset:
+ name: PubTabDataSet
+ data_dir: ./train_data/pubtabnet/test/
+ label_file_list: [./train_data/pubtabnet/test.jsonl]
+ transforms:
+ - DecodeImage:
+ img_mode: BGR
+ channel_first: False
+ - TableMasterLabelEncode:
+ learn_empty_box: False
+ merge_no_span_structure: True
+ replace_empty_cell_token: True
+ - ResizeTableImage:
+ max_len: 480
+ resize_bboxes: True
+ - PaddingTableImage:
+ size: [480, 480]
+ - TableBoxEncode:
+ use_xywh: True
+ - NormalizeImage:
+ scale: 1./255.
+ mean: [0.5, 0.5, 0.5]
+ std: [0.5, 0.5, 0.5]
+ order: hwc
+ - ToCHWImage: null
+ - KeepKeys:
+ keep_keys: [image, structure, bboxes, bbox_masks, shape]
+ loader:
+ shuffle: False
+ drop_last: False
+ batch_size_per_card: 10
+ num_workers: 8
\ No newline at end of file
diff --git a/test_tipc/configs/table_master/train_infer_python.txt b/test_tipc/configs/table_master/train_infer_python.txt
new file mode 100644
index 00000000..56b8e636
--- /dev/null
+++ b/test_tipc/configs/table_master/train_infer_python.txt
@@ -0,0 +1,53 @@
+===========================train_params===========================
+model_name:table_master
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:fp32
+Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=17
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4
+Global.pretrained_model:./pretrain_models/table_structure_tablemaster_train/best_accuracy
+train_model_name:latest
+train_infer_img_dir:./ppstructure/docs/table/table.jpg
+null:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c test_tipc/configs/table_master/table_master.yml -o Global.print_batch_step=10
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:null
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:tools/export_model.py -c test_tipc/configs/table_master/table_master.yml -o
+quant_export:
+fpgm_export:
+distill_export:null
+export1:null
+export2:null
+##
+infer_model:null
+infer_export:null
+infer_quant:False
+inference:ppstructure/table/predict_structure.py --table_char_dict_path=./ppocr/utils/dict/table_master_structure_dict.txt --image_dir=./ppstructure/docs/table/table.jpg --output ./output/table --table_algorithm=TableMaster --table_max_len=480
+--use_gpu:True|False
+--enable_mkldnn:False
+--cpu_threads:6
+--rec_batch_num:1
+--use_tensorrt:False
+--precision:fp32
+--table_model_dir:
+--image_dir:./ppstructure/docs/table/table.jpg
+null:null
+--benchmark:False
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,480,480]}]
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index 32df8e78..593cd872 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -62,6 +62,10 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
if [[ ${model_name} =~ "det_r50_db++" ]];then
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/ResNet50_dcn_asf_synthtext_pretrained.pdparams --no-check-certificate
fi
+ if [ ${model_name} == "table_master" ];then
+ wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/ppstructure/models/tablemaster/table_structure_tablemaster_train.tar --no-check-certificate
+ cd ./pretrain_models/ && tar xf table_structure_tablemaster_train.tar && cd ../
+ fi
cd ./pretrain_models/ && tar xf det_mv3_db_v2.0_train.tar && cd ../
rm -rf ./train_data/icdar2015
rm -rf ./train_data/ic15_data
diff --git a/test_tipc/readme.md b/test_tipc/readme.md
index effb2f16..1c637d76 100644
--- a/test_tipc/readme.md
+++ b/test_tipc/readme.md
@@ -54,6 +54,7 @@
| NRTR |rec_mtb_nrtr | 识别 | 支持 | 多机多卡
混合精度 | - | - |
| SAR |rec_r31_sar | 识别 | 支持 | 多机多卡
混合精度 | - | - |
| PGNet |rec_r34_vd_none_none_ctc_v2.0 | 端到端| 支持 | 多机多卡
混合精度 | - | - |
+| TableMaster |table_structure_tablemaster_train | 表格识别| 支持 | 多机多卡
混合精度 | - | - |
--
GitLab