From 2a1966a080f2b76c7f077406b0b99c567e5d29e3 Mon Sep 17 00:00:00 2001 From: WenmuZhou Date: Fri, 11 Jun 2021 14:17:59 +0800 Subject: [PATCH] add en doc --- ppstructure/README.md | 70 ++++++++++++++++++++++++++++++++++ ppstructure/README_ch.md | 14 +++---- ppstructure/table/README.md | 49 ++++++++++++++++++++++++ ppstructure/table/README_ch.md | 22 ++++++----- 4 files changed, 138 insertions(+), 17 deletions(-) diff --git a/ppstructure/README.md b/ppstructure/README.md index e69de29b..60cf18f4 100644 --- a/ppstructure/README.md +++ b/ppstructure/README.md @@ -0,0 +1,70 @@ +# PaddleStructure + +## 1. Introduction to pipeline + +PaddleStructure is a toolkit for complex layout text OCR, the process is as follows + +![pipeline](../doc/table/pipeline.png) + +In PaddleStructure, the image will be analyzed by layoutparser first. In the layout analysis, the area in the image will be classified, and the OCR process will be carried out according to the category. + +Currently layoutparser will output five categories: +1. Text +2. Title +3. Figure +4. List +5. Table + +Types 1-4 follow the traditional OCR process, and 5 follow the Table OCR process. + +## 2. LayoutParser + + +## 3. Table OCR + +[doc](table/README.md) + +## 4. PaddleStructure whl package introduction + +### 4.1 Use + +4.1.1 Use by code +```python +import cv2 +from paddlestructure import PaddleStructure,draw_result + +table_engine = PaddleStructure( + output='./output/table', + show_log=True) + +img_path = '../doc/table/1.png' +img = cv2.imread(img_path) +result = table_engine(img) +for line in result: + print(line) + +from PIL import Image + +font_path = 'path/tp/PaddleOCR/doc/fonts/simfang.ttf' +image = Image.open(img_path).convert('RGB') +im_show = draw_result(image, result,font_path=font_path) +im_show = Image.fromarray(im_show) +im_show.save('result.jpg') +``` + +4.1.2 Use by command line +```bash +paddlestructure --image_dir=../doc/table/1.png +``` + +### 参数说明 +大部分参数和paddleocr whl包保持一致,见 [whl包文档](../doc/doc_ch/whl.md) + +| 字段 | 说明 | 默认值 | +|------------------------|------------------------------------------------------|------------------| +| output | excel和识别结果保存的地址 | ./output/table | +| structure_max_len | structure模型预测时,图像的长边resize尺度 | 488 | +| structure_model_dir | structure inference 模型地址 | None | +| structure_char_type | structure 模型所用字典地址 | ../ppocr/utils/dict/table_structure_dict.tx | + + diff --git a/ppstructure/README_ch.md b/ppstructure/README_ch.md index 4fce135b..c3a09a3d 100644 --- a/ppstructure/README_ch.md +++ b/ppstructure/README_ch.md @@ -1,6 +1,6 @@ # PaddleStructure -## pipeline介绍 +## 1. pipeline介绍 PaddleStructure 是一个用于复杂板式文字OCR的工具包,流程如下 ![pipeline](../doc/table/pipeline.png) @@ -16,18 +16,18 @@ PaddleStructure 是一个用于复杂板式文字OCR的工具包,流程如下 1-4类走传统的OCR流程,5走表格的OCR流程。 -## LayoutParser +## 2. LayoutParser -## Table OCR +## 3. Table OCR [文档](table/README_ch.md) -## PaddleStructure whl包介绍 +## 4. PaddleStructure whl包介绍 -### 使用 +### 4.1 使用 -1. 代码使用 +4.1.1 代码使用 ```python import cv2 from paddlestructure import PaddleStructure,draw_result @@ -51,7 +51,7 @@ im_show = Image.fromarray(im_show) im_show.save('result.jpg') ``` -2. 命令行使用 +4.1.2 命令行使用 ```bash paddlestructure --image_dir=../doc/table/1.png ``` diff --git a/ppstructure/table/README.md b/ppstructure/table/README.md index e69de29b..2ee8bbaf 100644 --- a/ppstructure/table/README.md +++ b/ppstructure/table/README.md @@ -0,0 +1,49 @@ +# Table structure and content prediction + +## 1. pipeline +The ocr of the table mainly contains three models +1. Single line text detection-DB +2. Single line text recognition-CRNN +3. Table structure and cell coordinate prediction-RARE + +The table ocr flow chart is as follows + +![tableocr_pipeline](../../doc/table/tableocr_pipeline.png) + +1. The coordinates of single-line text is detected by DB model, and then sends it to the recognition model to get the recognition result. +2. The table structure and cell coordinates is predicted by RARE model. +3. The recognition result of the cell is combined by the coordinates, recognition result of the single line and the coordinates of the cell. +4. The cell recognition result and the table structure together construct the html string of the table. + +## 2. How to use + + +### 2.1 Train +TBD + +### 2.2 Eval +First cd to the PaddleOCR/ppstructure directory + +The table uses TEDS (Tree-Edit-Distance-based Similarity) as the evaluation metric of the model. Before the model evaluation, the three models in the pipeline need to be exported as inference models (we have provided them), and the gt for evaluation needs to be prepared. Examples of gt are as follows: +```json +{"PMC4289340_004_00.png": [["", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "
", "", "", "
", "", "", "
", "", ""], [[1, 4, 29, 13], [137, 4, 161, 13], [215, 4, 236, 13], [1, 17, 30, 27], [137, 17, 147, 27], [215, 17, 225, 27]], [["", "F", "e", "a", "t", "u", "r", "e", ""], ["", "G", "b", "3", " ", "+", ""], ["", "G", "b", "3", " ", "-", ""], ["", "P", "a", "t", "i", "e", "n", "t", "s", ""], ["6", "2"], ["4", "5"]]]} +``` +In gt json, the key is the image name, the value is the corresponding gt, and gt is a list composed of four items, and each item is +1. HTML string list of table structure +2. The coordinates of each cell (not including the empty text in the cell) +3. The text information in each cell (not including the empty text in the cell) +4. The text information in each cell (including the empty text in the cell) + +Use the following command to evaluate. After the evaluation is completed, the teds indicator will be output. +```python +python3 table/eval_table.py --det_model_dir=path/to/det_model_dir --rec_model_dir=path/to/rec_model_dir --structure_model_dir=path/to/structure_model_dir --image_dir=../doc/table/1.png --rec_char_dict_path=../ppocr/utils/dict/table_dict.txt --structure_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_type=EN --det_limit_side_len=736 --det_limit_type=min --gt_path=path/to/gt.json +``` + + +### 2.3 Inference +First cd to the PaddleOCR/ppstructure directory + +```python +python3 table/predict_table.py --det_model_dir=path/to/det_model_dir --rec_model_dir=path/to/rec_model_dir --structure_model_dir=path/to/structure_model_dir --image_dir=../doc/table/1.png --rec_char_dict_path=../ppocr/utils/dict/table_dict.txt --structure_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_type=EN --det_limit_side_len=736 --det_limit_type=min --output ../output/table +``` +After running, the excel sheet of each picture will be saved in the directory specified by the table_output field \ No newline at end of file diff --git a/ppstructure/table/README_ch.md b/ppstructure/table/README_ch.md index a32807d0..29f88bb5 100644 --- a/ppstructure/table/README_ch.md +++ b/ppstructure/table/README_ch.md @@ -1,6 +1,6 @@ # 表格结构和内容预测 -## pipeline +## 1. pipeline 表格的ocr主要包含三个模型 1. 单行文本检测-DB 2. 单行文本识别-CRNN @@ -10,27 +10,29 @@ ![tableocr_pipeline](../../doc/table/tableocr_pipeline.png) -1. 图片由单行文字检测检测到单行文字的坐标,然后送入识别模型拿到识别结果。 -2. 图片由表格结构和cell坐标预测拿到表格的结构信息和单元格的坐标信息。 +1. 图片由单行文字检测检测模型到单行文字的坐标,然后送入识别模型拿到识别结果。 +2. 图片由表格结构和cell坐标预测模型拿到表格的结构信息和单元格的坐标信息。 3. 由单行文字的坐标、识别结果和单元格的坐标一起组合出单元格的识别结果。 4. 单元格的识别结果和表格结构一起构造表格的html字符串。 -## 使用 +## 2. 使用 -### 训练 +### 2.1 训练 TBD -### 评估 +### 2.2 评估 先cd到PaddleOCR/ppstructure目录下 表格使用 TEDS(Tree-Edit-Distance-based Similarity) 作为模型的评估指标。在进行模型评估之前,需要将pipeline中的三个模型分别导出为inference模型(我们已经提供好),还需要准备评估的gt, gt示例如下: ```json {"PMC4289340_004_00.png": [["", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "
", "", "", "
", "", "", "
", "", ""], [[1, 4, 29, 13], [137, 4, 161, 13], [215, 4, 236, 13], [1, 17, 30, 27], [137, 17, 147, 27], [215, 17, 225, 27]], [["", "F", "e", "a", "t", "u", "r", "e", ""], ["", "G", "b", "3", " ", "+", ""], ["", "G", "b", "3", " ", "-", ""], ["", "P", "a", "t", "i", "e", "n", "t", "s", ""], ["6", "2"], ["4", "5"]]]} ``` -示例对应的表格如下 - -![tableocr_pipeline](../../doc/table/table_example.png) +json 中,key为图片名,value为对于的gt,gt是一个由四个item组成的list,每个item分别为 +1. 表格结构的html字符串list +2. 每个cell的坐标 (不包括cell里文字为空的) +3. 每个cell里的文字信息 (不包括cell里文字为空的) +4. 每个cell里的文字信息 (包括cell里文字为空的) 准备完成后使用如下命令进行评估,评估完成后会输出teds指标。 ```python @@ -38,7 +40,7 @@ python3 table/eval_table.py --det_model_dir=path/to/det_model_dir --rec_model_di ``` -### 预测 +### 2.3 预测 先cd到PaddleOCR/ppstructure目录下 ```python -- GitLab