From 27181149a9eaa5ffc2da085fd1ad7eaff2eb68f8 Mon Sep 17 00:00:00 2001
From: WenmuZhou <572459439@qq.com>
Date: Tue, 5 Jul 2022 09:31:53 +0000
Subject: [PATCH] add tablemaster to tipc

---
 doc/doc_ch/algorithm.md                       |   3 +-
 doc/doc_ch/algorithm_overview.md              |  12 +-
 doc/doc_en/algorithm_en.md                    |   3 +-
 doc/doc_en/algorithm_overview_en.md           |  10 +-
 ppstructure/table/predict_structure.py        |   6 +-
 .../configs/en_table_structure/table_mv3.yml  |  41 +++---
 .../configs/table_master/table_master.yml     | 136 ++++++++++++++++++
 .../table_master/train_infer_python.txt       |  53 +++++++
 test_tipc/prepare.sh                          |   4 +
 test_tipc/readme.md                           |   1 +
 10 files changed, 237 insertions(+), 32 deletions(-)
 create mode 100644 test_tipc/configs/table_master/table_master.yml
 create mode 100644 test_tipc/configs/table_master/train_infer_python.txt
diff --git a/doc/doc_ch/algorithm.md b/doc/doc_ch/algorithm.md
index 3056f35d..d50a5aa4 100644
--- a/doc/doc_ch/algorithm.md
+++ b/doc/doc_ch/algorithm.md
@@ -5,9 +5,10 @@ PaddleOCR将**持续新增**支持OCR领域前沿算法与模型，已支持的
 - [文本检测算法](./algorithm_overview.md#11-%E6%96%87%E6%9C%AC%E6%A3%80%E6%B5%8B%E7%AE%97%E6%B3%95)
 - [文本识别算法](./algorithm_overview.md#12-%E6%96%87%E6%9C%AC%E8%AF%86%E5%88%AB%E7%AE%97%E6%B3%95)
 - [端到端算法](./algorithm_overview.md#2-%E6%96%87%E6%9C%AC%E8%AF%86%E5%88%AB%E7%AE%97%E6%B3%95)
+- [表格识别]](./algorithm_overview.md#3-%E8%A1%A8%E6%A0%BC%E8%AF%86%E5%88%AB%E7%AE%97%E6%B3%95)
 
 **欢迎广大开发者合作共建，贡献更多算法，合入有奖🎁！具体可查看[社区常规赛](https://github.com/PaddlePaddle/PaddleOCR/issues/4982)。**
 
 新增算法可参考如下教程：
 
-- [使用PaddleOCR架构添加新算法](./add_new_algorithm.md)
\ No newline at end of file
+- [使用PaddleOCR架构添加新算法](./add_new_algorithm.md)
diff --git a/doc/doc_ch/algorithm_overview.md b/doc/doc_ch/algorithm_overview.md
index 1efd564c..5c7adc71 100755
--- a/doc/doc_ch/algorithm_overview.md
+++ b/doc/doc_ch/algorithm_overview.md
@@ -1,10 +1,10 @@
 # OCR算法
 
-- [1. 两阶段算法](#1-两阶段算法)
-  - [1.1 文本检测算法](#11-文本检测算法)
-  - [1.2 文本识别算法](#12-文本识别算法)
-- [2. 端到端算法](#2-端到端算法)
-- [3. 表格识别算法](#3-表格识别算法)
+- [1. 两阶段算法](#1)
+  - [1.1 文本检测算法](#11)
+  - [1.2 文本识别算法](#12)
+- [2. 端到端算法](#2)
+- [3. 表格识别算法](#3)
 
 
 本文给出了PaddleOCR已支持的OCR算法列表，以及每个算法在**英文公开数据集**上的模型和指标，主要用于算法简介和算法性能对比，更多包括中文在内的其他数据集上的模型请参考[PP-OCR v2.0 系列模型下载](./models_list.md)。
@@ -98,6 +98,8 @@
 已支持的端到端OCR算法列表（戳链接获取使用教程）：
 - [x]  [PGNet](./algorithm_e2e_pgnet.md)
 
+<a name="3"></a>
+
 ## 3. 表格识别算法
 
 已支持的表格识别算法列表（戳链接获取使用教程）：
diff --git a/doc/doc_en/algorithm_en.md b/doc/doc_en/algorithm_en.md
index fa7887eb..c880336b 100644
--- a/doc/doc_en/algorithm_en.md
+++ b/doc/doc_en/algorithm_en.md
@@ -6,5 +6,6 @@ PaddleOCR will add cutting-edge OCR algorithms and models continuously. Check ou
 - [text detection algorithms](./algorithm_overview_en.md#11)
 - [text recognition algorithms](./algorithm_overview_en.md#12)
 - [end-to-end algorithms](./algorithm_overview_en.md#2)
+- [table recognition algorithms](./algorithm_overview_en.md#3)
 
-Developers are welcome to contribute more algorithms! Please refer to [add new algorithm](./add_new_algorithm_en.md) guideline.
\ No newline at end of file
+Developers are welcome to contribute more algorithms! Please refer to [add new algorithm](./add_new_algorithm_en.md) guideline.
diff --git a/doc/doc_en/algorithm_overview_en.md b/doc/doc_en/algorithm_overview_en.md
index 8b9b3f7b..f3c96b62 100755
--- a/doc/doc_en/algorithm_overview_en.md
+++ b/doc/doc_en/algorithm_overview_en.md
@@ -1,10 +1,10 @@
 # OCR Algorithms
 
-- [1. Two-stage Algorithms](#1-two-stage-algorithms)
-  - [1.1 Text Detection Algorithms](#11-text-detection-algorithms)
-  - [1.2 Text Recognition Algorithms](#12-text-recognition-algorithms)
-- [2. End-to-end Algorithms](#2-end-to-end-algorithms)
-- [3. Table Recognition Algorithms](#3-table-recognition-algorithms)
+- [1. Two-stage Algorithms](#1)
+  - [1.1 Text Detection Algorithms](#11)
+  - [1.2 Text Recognition Algorithms](#12)
+- [2. End-to-end Algorithms](#2)
+- [3. Table Recognition Algorithms](#3)
 
 
 This tutorial lists the OCR algorithms supported by PaddleOCR, as well as the models and metrics of each algorithm on **English public datasets**. It is mainly used for algorithm introduction and algorithm performance comparison. For more models on other datasets including Chinese, please refer to [PP-OCR v2.0 models list](./models_list_en.md).
diff --git a/ppstructure/table/predict_structure.py b/ppstructure/table/predict_structure.py
index 00385c76..7a7d3169 100755
--- a/ppstructure/table/predict_structure.py
+++ b/ppstructure/table/predict_structure.py
@@ -118,7 +118,7 @@ class TableStructurer(object):
             '<html>', '<body>', '<table>'
         ] + structure_str_list + ['</table>', '</body>', '</html>']
         elapse = time.time() - starttime
-        return structure_str_list, bbox_list, elapse
+        return (structure_str_list, bbox_list), elapse
 
 
 def main(args):
@@ -138,8 +138,8 @@ def main(args):
             if img is None:
                 logger.info("error in loading image:{}".format(image_file))
                 continue
-            structure_str_list, bbox_list, elapse = table_structurer(img)
-
+            structure_res, elapse = table_structurer(img)
+            structure_str_list, bbox_list = structure_res
             bbox_list_str = json.dumps(bbox_list.tolist())
             logger.info("result: {}, {}".format(structure_str_list,
                                                 bbox_list_str))
diff --git a/test_tipc/configs/en_table_structure/table_mv3.yml b/test_tipc/configs/en_table_structure/table_mv3.yml
index adf326bd..6df5a1a2 100755
--- a/test_tipc/configs/en_table_structure/table_mv3.yml
+++ b/test_tipc/configs/en_table_structure/table_mv3.yml
@@ -1,24 +1,23 @@
 Global:
   use_gpu: true
-  epoch_num: 10
+  epoch_num: 400
   log_smooth_window: 20
   print_batch_step: 5
   save_model_dir: ./output/table_mv3/
-  save_epoch_step: 3
+  save_epoch_step: 400
   # evaluation is run every 400 iterations after the 0th iteration
   eval_batch_step: [0, 400]
   cal_metric_during_train: True
   pretrained_model:
-  checkpoints: 
+  checkpoints:
   save_inference_dir:
   use_visualdl: False
-  infer_img: doc/table/table.jpg
+  infer_img: ppstructure/docs/table/table.jpg
+  save_res_path: output/table_mv3
   # for data or label process
   character_dict_path: ppocr/utils/dict/table_structure_dict.txt
   character_type: en
-  max_text_length: 100
-  max_elem_length: 800
-  max_cell_num: 500
+  max_text_length: 800
   infer_mode: False
   process_total_num: 0
   process_cut_num: 0
@@ -44,11 +43,8 @@ Architecture:
   Head:
     name: TableAttentionHead
     hidden_size: 256
-    l2_decay: 0.00001
     loc_type: 2
-    max_text_length: 100
-    max_elem_length: 800
-    max_cell_num: 500
+    max_text_length: 800
 
 Loss:
   name: TableAttentionLoss
@@ -61,28 +57,34 @@ PostProcess:
 Metric:
   name: TableMetric
   main_indicator: acc
+  compute_bbox_metric: false # cost many time, set False for training
 
 Train:
   dataset:
     name: PubTabDataSet
     data_dir: ./train_data/pubtabnet/train
-    label_file_path: ./train_data/pubtabnet/train.jsonl
+    label_file_list: [./train_data/pubtabnet/train.jsonl]
     transforms:
       - DecodeImage: # load image
           img_mode: BGR
           channel_first: False
+      - TableLabelEncode:
+          learn_empty_box: False
+          merge_no_span_structure: False
+          replace_empty_cell_token: False
+      - TableBoxEncode:
       - ResizeTableImage:
           max_len: 488
-      - TableLabelEncode:
       - NormalizeImage:
           scale: 1./255.
           mean: [0.485, 0.456, 0.406]
           std: [0.229, 0.224, 0.225]
           order: 'hwc'
       - PaddingTableImage:
+          size: [488, 488]
       - ToCHWImage:
       - KeepKeys:
-          keep_keys: ['image', 'structure', 'bbox_list', 'sp_tokens', 'bbox_list_mask']
+          keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
   loader:
     shuffle: True
     batch_size_per_card: 32
@@ -93,23 +95,28 @@ Eval:
   dataset:
     name: PubTabDataSet
     data_dir: ./train_data/pubtabnet/test/
-    label_file_path: ./train_data/pubtabnet/test.jsonl
+    label_file_list: [./train_data/pubtabnet/test.jsonl]
     transforms:
       - DecodeImage: # load image
           img_mode: BGR
           channel_first: False
+      - TableLabelEncode:
+          learn_empty_box: False
+          merge_no_span_structure: False
+          replace_empty_cell_token: False
+      - TableBoxEncode:
       - ResizeTableImage:
           max_len: 488
-      - TableLabelEncode:
       - NormalizeImage:
           scale: 1./255.
           mean: [0.485, 0.456, 0.406]
           std: [0.229, 0.224, 0.225]
           order: 'hwc'
       - PaddingTableImage:
+          size: [488, 488]
       - ToCHWImage:
       - KeepKeys:
-          keep_keys: ['image', 'structure', 'bbox_list', 'sp_tokens', 'bbox_list_mask']
+          keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
   loader:
     shuffle: False
     drop_last: False
diff --git a/test_tipc/configs/table_master/table_master.yml b/test_tipc/configs/table_master/table_master.yml
new file mode 100644
index 00000000..c519b5b8
--- /dev/null
+++ b/test_tipc/configs/table_master/table_master.yml
@@ -0,0 +1,136 @@
+Global:
+  use_gpu: true
+  epoch_num: 17
+  log_smooth_window: 20
+  print_batch_step: 100
+  save_model_dir: ./output/table_master/
+  save_epoch_step: 17
+  eval_batch_step: [0,  6259]
+  cal_metric_during_train: true
+  pretrained_model: null
+  checkpoints: 
+  save_inference_dir: output/table_master/infer
+  use_visualdl: false
+  infer_img: ppstructure/docs/table/table.jpg
+  save_res_path: ./output/table_master
+  character_dict_path: ppocr/utils/dict/table_master_structure_dict.txt
+  infer_mode: false
+  max_text_length: 500
+  process_total_num: 0
+  process_cut_num: 0
+
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    name: MultiStepDecay
+    learning_rate: 0.001
+    milestones: [12, 15]
+    gamma: 0.1
+    warmup_epoch: 0.02
+  regularizer:
+    name: L2
+    factor: 0.0
+
+Architecture:
+  model_type: table
+  algorithm: TableMaster
+  Backbone:
+    name: TableResNetExtra
+    gcb_config:
+      ratio: 0.0625
+      headers: 1
+      att_scale: False
+      fusion_type: channel_add
+      layers: [False, True, True, True]
+    layers: [1,2,5,3]
+  Head:
+    name: TableMasterHead
+    hidden_size: 512
+    headers: 8
+    dropout: 0
+    d_ff: 2024
+    max_text_length: 500
+
+Loss:
+  name: TableMasterLoss
+  ignore_index: 42 # set to len of dict + 3
+
+PostProcess:
+  name: TableMasterLabelDecode
+  box_shape: pad
+
+Metric:
+  name: TableMetric
+  main_indicator: acc
+  compute_bbox_metric: False
+
+Train:
+  dataset:
+    name: PubTabDataSet
+    data_dir: ./train_data/pubtabnet/train
+    label_file_list: [./train_data/pubtabnet/train.jsonl]
+    transforms:
+      - DecodeImage:
+          img_mode: BGR
+          channel_first: False
+      - TableMasterLabelEncode:
+          learn_empty_box: False
+          merge_no_span_structure: True
+          replace_empty_cell_token: True
+      - ResizeTableImage:
+          max_len: 480
+          resize_bboxes: True
+      - PaddingTableImage:
+          size: [480, 480]
+      - TableBoxEncode:
+          use_xywh: True
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.5, 0.5, 0.5]
+          std: [0.5, 0.5, 0.5]
+          order: hwc
+      - ToCHWImage: null
+      - KeepKeys:
+          keep_keys: [image, structure, bboxes, bbox_masks, shape]
+  loader:
+    shuffle: True
+    batch_size_per_card: 10
+    drop_last: True
+    num_workers: 8
+
+Eval:
+  dataset:
+    name: PubTabDataSet
+    data_dir: ./train_data/pubtabnet/test/
+    label_file_list: [./train_data/pubtabnet/test.jsonl]
+    transforms:
+      - DecodeImage:
+          img_mode: BGR
+          channel_first: False
+      - TableMasterLabelEncode:
+          learn_empty_box: False
+          merge_no_span_structure: True
+          replace_empty_cell_token: True
+      - ResizeTableImage:
+          max_len: 480
+          resize_bboxes: True
+      - PaddingTableImage:
+          size: [480, 480]
+      - TableBoxEncode:
+          use_xywh: True
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.5, 0.5, 0.5]
+          std: [0.5, 0.5, 0.5]
+          order: hwc
+      - ToCHWImage: null
+      - KeepKeys:
+          keep_keys: [image, structure, bboxes, bbox_masks, shape]
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 10
+    num_workers: 8
\ No newline at end of file
diff --git a/test_tipc/configs/table_master/train_infer_python.txt b/test_tipc/configs/table_master/train_infer_python.txt
new file mode 100644
index 00000000..56b8e636
--- /dev/null
+++ b/test_tipc/configs/table_master/train_infer_python.txt
@@ -0,0 +1,53 @@
+===========================train_params===========================
+model_name:table_master
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:fp32
+Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=17
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4
+Global.pretrained_model:./pretrain_models/table_structure_tablemaster_train/best_accuracy
+train_model_name:latest
+train_infer_img_dir:./ppstructure/docs/table/table.jpg
+null:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c test_tipc/configs/table_master/table_master.yml -o Global.print_batch_step=10
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:null
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:tools/export_model.py -c test_tipc/configs/table_master/table_master.yml -o 
+quant_export:
+fpgm_export: 
+distill_export:null
+export1:null
+export2:null
+##
+infer_model:null
+infer_export:null
+infer_quant:False
+inference:ppstructure/table/predict_structure.py  --table_char_dict_path=./ppocr/utils/dict/table_master_structure_dict.txt --image_dir=./ppstructure/docs/table/table.jpg --output ./output/table --table_algorithm=TableMaster --table_max_len=480 
+--use_gpu:True|False
+--enable_mkldnn:False
+--cpu_threads:6
+--rec_batch_num:1
+--use_tensorrt:False
+--precision:fp32
+--table_model_dir:
+--image_dir:./ppstructure/docs/table/table.jpg
+null:null
+--benchmark:False
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,480,480]}]
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index 32df8e78..593cd872 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -62,6 +62,10 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
     if [[ ${model_name} =~ "det_r50_db++" ]];then
         wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/ResNet50_dcn_asf_synthtext_pretrained.pdparams --no-check-certificate
     fi
+    if [ ${model_name} == "table_master" ];then
+        wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/ppstructure/models/tablemaster/table_structure_tablemaster_train.tar --no-check-certificate
+        cd ./pretrain_models/ && tar xf table_structure_tablemaster_train.tar  && cd ../
+    fi
     cd ./pretrain_models/ && tar xf det_mv3_db_v2.0_train.tar && cd ../
     rm -rf ./train_data/icdar2015
     rm -rf ./train_data/ic15_data
diff --git a/test_tipc/readme.md b/test_tipc/readme.md
index effb2f16..1c637d76 100644
--- a/test_tipc/readme.md
+++ b/test_tipc/readme.md
@@ -54,6 +54,7 @@
 | NRTR   |rec_mtb_nrtr                   | 识别  | 支持 | 多机多卡 <br> 混合精度 | - | - |
 | SAR    |rec_r31_sar                    | 识别  | 支持 | 多机多卡 <br> 混合精度 | - | - |
 | PGNet  |rec_r34_vd_none_none_ctc_v2.0  | 端到端| 支持 | 多机多卡 <br> 混合精度 | - | - |
+| TableMaster  |table_structure_tablemaster_train  | 表格识别| 支持 | 多机多卡 <br> 混合精度 | - | - |
 
 
 
-- 
GitLab