diff --git a/doc/doc_ch/algorithm_overview.md b/doc/doc_ch/algorithm_overview.md index 84af5fdcce6ac332af8014b1a5a7d98206489607..1efd564ce10ac4d92dccda0b844e6d6d07a17906 100755 --- a/doc/doc_ch/algorithm_overview.md +++ b/doc/doc_ch/algorithm_overview.md @@ -107,4 +107,4 @@ |模型|骨干网络|配置文件|acc|下载链接| |---|---|---|---|---| -|TableMaster|TableResNetExtra|[configs/table/table_master.yml](../../configs/table/table_master.yml)|77.47%|[训练模型]|[训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/tablemaster/table_structure_tablemaster_train.tar)/[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/tablemaster/table_structure_tablemaster_infer.tar)| +|TableMaster|TableResNetExtra|[configs/table/table_master.yml](../../configs/table/table_master.yml)|77.47%|[训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/tablemaster/table_structure_tablemaster_train.tar) / [推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/tablemaster/table_structure_tablemaster_infer.tar)| diff --git a/doc/doc_ch/algorithm_table_master.md b/doc/doc_ch/algorithm_table_master.md index 6bfd0f36fc05a4adf81d6bb2d306a74d1525650f..36455ed9f94581c31fb849ebe121a3d7ecdb7acb 100644 --- a/doc/doc_ch/algorithm_table_master.md +++ b/doc/doc_ch/algorithm_table_master.md @@ -55,7 +55,7 @@ python3 tools/export_model.py -c configs/table/table_master.yml -o Global.pretra 转换成功后,在目录下有三个文件: ``` -/inference/table_master/ +./inference/table_master/ ├── inference.pdiparams # 识别inference模型的参数文件 ├── inference.pdiparams.info # 识别inference模型的参数信息,可忽略 └── inference.pdmodel # 识别inference模型的program文件 diff --git a/doc/doc_en/algorithm_overview_en.md b/doc/doc_en/algorithm_overview_en.md index cd277c74a38adb11121f353f7bfd07d7c255f61e..8b9b3f7b0bf08b8dbfc7b50edca501c35319ee06 100755 --- a/doc/doc_en/algorithm_overview_en.md +++ b/doc/doc_en/algorithm_overview_en.md @@ -1,10 +1,10 @@ # OCR Algorithms -- [1. Two-stage Algorithms](#1) - * [1.1 Text Detection Algorithms](#11) - * [1.2 Text Recognition Algorithms](#12) -- [2. End-to-end Algorithms](#2) -- [3. Table Recognition Algorithms](#3) +- [1. Two-stage Algorithms](#1-two-stage-algorithms) + - [1.1 Text Detection Algorithms](#11-text-detection-algorithms) + - [1.2 Text Recognition Algorithms](#12-text-recognition-algorithms) +- [2. End-to-end Algorithms](#2-end-to-end-algorithms) +- [3. Table Recognition Algorithms](#3-table-recognition-algorithms) This tutorial lists the OCR algorithms supported by PaddleOCR, as well as the models and metrics of each algorithm on **English public datasets**. It is mainly used for algorithm introduction and algorithm performance comparison. For more models on other datasets including Chinese, please refer to [PP-OCR v2.0 models list](./models_list_en.md). @@ -107,4 +107,4 @@ On the PubTabNet dataset, the algorithm result is as follows: |Model|Backbone|Config|Acc|Download link| |---|---|---|---|---| -|TableMaster|TableResNetExtra|[configs/table/table_master.yml](../../configs/table/table_master.yml)|77.47%|[训练模型]|[训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/tablemaster/table_structure_tablemaster_train.tar)/[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/tablemaster/table_structure_tablemaster_infer.tar)| +|TableMaster|TableResNetExtra|[configs/table/table_master.yml](../../configs/table/table_master.yml)|77.47%|[trained](https://paddleocr.bj.bcebos.com/ppstructure/models/tablemaster/table_structure_tablemaster_train.tar) / [inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/tablemaster/table_structure_tablemaster_infer.tar)| diff --git a/doc/doc_en/algorithm_table_master_en.md b/doc/doc_en/algorithm_table_master_en.md index a557a609017baf69da8777d062f4b34e6e2c5973..e9249a2a05d3e79f4358366d46cf02a14a223f5f 100644 --- a/doc/doc_en/algorithm_table_master_en.md +++ b/doc/doc_en/algorithm_table_master_en.md @@ -1,4 +1,4 @@ -# Torm Recognition Algorithm-TableMASTER +# Table Recognition Algorithm-TableMASTER - [1. Introduction](#1-introduction) - [2. Environment](#2-environment) @@ -24,7 +24,7 @@ On the PubTabNet table recognition public data set, the algorithm reproduction a |Model|Backbone|Cnnfig|Acc|Download link| | --- | --- | --- | --- | --- | -|TableMaster|TableResNetExtra|[configs/table/table_master.yml](../../configs/table/table_master.yml)|77.47%|[train model](https://paddleocr.bj.bcebos.com/ppstructure/models/tablemaster/table_structure_tablemaster_train.tar)/[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/tablemaster/table_structure_tablemaster_infer.tar)| +|TableMaster|TableResNetExtra|[configs/table/table_master.yml](../../configs/table/table_master.yml)|77.47%|[trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/tablemaster/table_structure_tablemaster_train.tar)/[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/tablemaster/table_structure_tablemaster_infer.tar)| diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py index 96f3c98674ce3e3be973b0e2989ba555ba1a6389..7cc4cef46a8298001ac4089dbd5e32dbca009caf 100644 --- a/ppocr/data/imaug/label_ops.py +++ b/ppocr/data/imaug/label_ops.py @@ -671,7 +671,7 @@ class TableLabelEncode(AttnLabelEncode): def _merge_no_span_structure(self, structure): """ - This fun code is refer from: + This code is refer from: https://github.com/JiaquanYe/TableMASTER-mmocr/blob/master/table_recognition/data_preprocess.py """ new_structure = [] diff --git a/ppocr/losses/table_master_loss.py b/ppocr/losses/table_master_loss.py index 216c4e521a9580e0fc8d897f918aedbb679fbc1b..dca982dbd43e2c14f15503e1e98d6fe6c18878c5 100644 --- a/ppocr/losses/table_master_loss.py +++ b/ppocr/losses/table_master_loss.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -This fun code is refer from: +This code is refer from: https://github.com/JiaquanYe/TableMASTER-mmocr/tree/master/mmocr/models/textrecog/losses """ diff --git a/ppocr/metrics/table_metric.py b/ppocr/metrics/table_metric.py index fb0075f7cbecad7d58679c5338390e7bf6d99a08..fd2631e442b8d111c64d5cf4b34ea9063d8c60dd 100644 --- a/ppocr/metrics/table_metric.py +++ b/ppocr/metrics/table_metric.py @@ -31,8 +31,6 @@ class TableStructureMetric(object): gt_structure_batch_list): pred_str = ''.join(pred) target_str = ''.join(target) - # pred_str = pred_str.replace('','').replace('','').replace('','').replace('','') - # target_str = target_str.replace('','').replace('','').replace('','').replace('','') if pred_str == target_str: correct_num += 1 all_num += 1 @@ -55,8 +53,6 @@ class TableStructureMetric(object): self.len_acc_num = 0 self.token_nums = 0 self.anys_dict = dict() - from collections import defaultdict - self.error_num_dict = defaultdict(int) class TableMetric(object): diff --git a/ppocr/modeling/backbones/table_master_resnet.py b/ppocr/modeling/backbones/table_master_resnet.py index f1c506ca45d3f73cda91f48ac1ba6f3ebbe87bda..dacf5ed26e5374b3c93c1a983be1d7b5b4c471fc 100644 --- a/ppocr/modeling/backbones/table_master_resnet.py +++ b/ppocr/modeling/backbones/table_master_resnet.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -This fun code is refer from: +This code is refer from: https://github.com/JiaquanYe/TableMASTER-mmocr/blob/master/mmocr/models/textrecog/backbones/table_resnet_extra.py """ @@ -193,47 +193,43 @@ class TableResNetExtra(nn.Layer): def forward(self, x): f = [] - x = self.conv1(x) # 1,64,480,480 + x = self.conv1(x) x = self.bn1(x) x = self.relu1(x) - x = self.conv2(x) # 1,128,480,480 + x = self.conv2(x) x = self.bn2(x) x = self.relu2(x) - # (48, 160) - x = self.maxpool1(x) # 1,64,240,240 + x = self.maxpool1(x) x = self.layer1(x) - x = self.conv3(x) # 1,256,240,240 + x = self.conv3(x) x = self.bn3(x) x = self.relu3(x) f.append(x) - # (24, 80) - x = self.maxpool2(x) # 1,256,120,120 + x = self.maxpool2(x) x = self.layer2(x) - x = self.conv4(x) # 1,256,120,120 + x = self.conv4(x) x = self.bn4(x) x = self.relu4(x) f.append(x) - # (12, 40) - x = self.maxpool3(x) # 1,256,60,60 + x = self.maxpool3(x) - x = self.layer3(x) # 1,512,60,60 - x = self.conv5(x) # 1,512,60,60 + x = self.layer3(x) + x = self.conv5(x) x = self.bn5(x) x = self.relu5(x) - x = self.layer4(x) # 1,512,60,60 - x = self.conv6(x) # 1,512,60,60 + x = self.layer4(x) + x = self.conv6(x) x = self.bn6(x) x = self.relu6(x) f.append(x) - # (6, 40) return f diff --git a/ppocr/modeling/heads/table_master_head.py b/ppocr/modeling/heads/table_master_head.py index 887630a87e6d04b6b993b9afd068461f70df158f..fddbcc63fcd6d5380f9fdd96f9ca85756d666442 100644 --- a/ppocr/modeling/heads/table_master_head.py +++ b/ppocr/modeling/heads/table_master_head.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -This fun code is refer from: +This code is refer from: https://github.com/JiaquanYe/TableMASTER-mmocr/blob/master/mmocr/models/textrecog/decoders/master_decoder.py """ @@ -135,7 +135,7 @@ class TableMasterHead(nn.Layer): batch_size = out_enc.shape[0] SOS = paddle.zeros([batch_size, 1], dtype='int64') + self.SOS output, bbox_output = self.greedy_forward(SOS, out_enc) - # output = F.softmax(output) + output = F.softmax(output) return {'structure_probs': output, 'loc_preds': bbox_output} def forward(self, feat, targets=None): diff --git a/ppocr/utils/visual.py b/ppocr/utils/visual.py index 235eb572a3975b4446ae2f2c9ad9c8558d5c5ad8..b20222bdbd76a81ccb0b3c38c9a3412e443f3dd1 100644 --- a/ppocr/utils/visual.py +++ b/ppocr/utils/visual.py @@ -110,3 +110,16 @@ def draw_re_results(image, img_new = Image.blend(image, img_new, 0.5) return np.array(img_new) + + +def draw_rectangle(img_path, boxes, use_xywh=False): + img = cv2.imread(img_path) + img_show = img.copy() + for box in boxes.astype(int): + if use_xywh: + x, y, w, h = box + x1, y1, x2, y2 = x - w // 2, y - h // 2, x + w // 2, y + h // 2 + else: + x1, y1, x2, y2 = box + cv2.rectangle(img_show, (x1, y1), (x2, y2), (255, 0, 0), 2) + return img_show \ No newline at end of file diff --git a/ppstructure/table/predict_structure.py b/ppstructure/table/predict_structure.py index 17ec909582a0d8ae70829730da23c7580104eb68..00385c76658ebd65843e351fda7da8807561d518 100755 --- a/ppstructure/table/predict_structure.py +++ b/ppstructure/table/predict_structure.py @@ -30,6 +30,7 @@ from ppocr.data import create_operators, transform from ppocr.postprocess import build_post_process from ppocr.utils.logging import get_logger from ppocr.utils.utility import get_image_file_list, check_and_read_gif +from ppocr.utils.visual import draw_rectangle from ppstructure.utility import parse_args logger = get_logger() @@ -120,19 +121,6 @@ class TableStructurer(object): return structure_str_list, bbox_list, elapse -def draw_rectangle(img_path, boxes, use_xywh=False): - img = cv2.imread(img_path) - img_show = img.copy() - for box in boxes.astype(int): - if use_xywh: - x, y, w, h = box - x1, y1, x2, y2 = x - w // 2, y - h // 2, x + w // 2, y + h // 2 - else: - x1, y1, x2, y2 = box - cv2.rectangle(img_show, (x1, y1), (x2, y2), (255, 0, 0), 2) - return img_show - - def main(args): image_file_list = get_image_file_list(args.image_dir) table_structurer = TableStructurer(args) diff --git a/tools/infer_table.py b/tools/infer_table.py index 58e7455cbb7feb0d87d72238aba52c72abc6f87b..6c02dd8640c9345c267e56d6e5a0c14bde121b7e 100644 --- a/tools/infer_table.py +++ b/tools/infer_table.py @@ -36,6 +36,7 @@ from ppocr.modeling.architectures import build_model from ppocr.postprocess import build_post_process from ppocr.utils.save_load import load_model from ppocr.utils.utility import get_image_file_list +from ppocr.utils.visual import draw_rectangle import tools.program as program import cv2 @@ -111,19 +112,6 @@ def main(config, device, logger, vdl_writer): logger.info("success!") -def draw_rectangle(img_path, boxes, use_xywh=False): - img = cv2.imread(img_path) - img_show = img.copy() - for box in boxes.astype(int): - if use_xywh: - x, y, w, h = box - x1, y1, x2, y2 = x - w // 2, y - h // 2, x + w // 2, y + h // 2 - else: - x1, y1, x2, y2 = box - cv2.rectangle(img_show, (x1, y1), (x2, y2), (255, 0, 0), 2) - return img_show - - if __name__ == '__main__': config, device, logger, vdl_writer = program.preprocess() main(config, device, logger, vdl_writer)