diff --git a/doc/table/PaddleDetection_config.png b/doc/table/PaddleDetection_config.png new file mode 100644 index 0000000000000000000000000000000000000000..d18932b66cc148b7796fe4b319ad9eb82c2a2868 Binary files /dev/null and b/doc/table/PaddleDetection_config.png differ diff --git a/doc/table/paper-image.jpg b/doc/table/paper-image.jpg new file mode 100644 index 0000000000000000000000000000000000000000..db7246b314556d73cd49d049b9b480887b6ef994 Binary files /dev/null and b/doc/table/paper-image.jpg differ diff --git a/doc/table/result_all.jpg b/doc/table/result_all.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3dd9840643989f1049c228c201b43f9ed89a5fcb Binary files /dev/null and b/doc/table/result_all.jpg differ diff --git a/doc/table/result_text.jpg b/doc/table/result_text.jpg new file mode 100644 index 0000000000000000000000000000000000000000..94c9bce4a73b2764bb9791972f62a3a5b37fed45 Binary files /dev/null and b/doc/table/result_text.jpg differ diff --git a/test/MANIFEST.in b/test1/MANIFEST.in similarity index 100% rename from test/MANIFEST.in rename to test1/MANIFEST.in diff --git a/test/__init__.py b/test1/__init__.py similarity index 100% rename from test/__init__.py rename to test1/__init__.py diff --git a/test/api.md b/test1/api.md similarity index 92% rename from test/api.md rename to test1/api.md index 60cf18f4405922df3fe19fcc209dc8b727d71c14..7283595d0966e1abfda2409bd7096e8425e7efe5 100644 --- a/test/api.md +++ b/test1/api.md @@ -1,5 +1,11 @@ # PaddleStructure +install layoutparser +```sh +wget https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl +pip3 install layoutparser-0.0.0-py3-none-any.whl +``` + ## 1. Introduction to pipeline PaddleStructure is a toolkit for complex layout text OCR, the process is as follows diff --git a/test/api_ch.md b/test1/api_ch.md similarity index 91% rename from test/api_ch.md rename to test1/api_ch.md index c3a09a3d78085d0afee3ea7ebfba0a5a63d58842..7fafe700139c00dfb65d19c1d3f634add360b890 100644 --- a/test/api_ch.md +++ b/test1/api_ch.md @@ -1,5 +1,11 @@ # PaddleStructure +安装layoutparser +```sh +wget https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl +pip3 install layoutparser-0.0.0-py3-none-any.whl +``` + ## 1. pipeline介绍 PaddleStructure 是一个用于复杂板式文字OCR的工具包,流程如下 @@ -18,6 +24,7 @@ PaddleStructure 是一个用于复杂板式文字OCR的工具包,流程如下 ## 2. LayoutParser +[文档](layout/README.md) ## 3. Table OCR diff --git a/test1/layout/README.md b/test1/layout/README.md new file mode 100644 index 0000000000000000000000000000000000000000..274a8c63a58543d3769bbd4b11133496e74f405a --- /dev/null +++ b/test1/layout/README.md @@ -0,0 +1,133 @@ +# 版面分析使用说明 + +* [1. 安装whl包](#安装whl包) +* [2. 使用](#使用) +* [3. 后处理](#后处理) +* [4. 指标](#指标) +* [5. 训练版面分析模型](#训练版面分析模型) + + + +## 1. 安装whl包 +```bash +wget https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl +pip install -U layoutparser-0.0.0-py3-none-any.whl +``` + + + +## 2. 使用 + +使用layoutparser识别给定文档的布局: + +```python +import layoutparser as lp +image = cv2.imread("imags/paper-image.jpg") +image = image[..., ::-1] + +# 加载模型 +model = lp.PaddleDetectionLayoutModel(config_path="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config", + threshold=0.5, + label_map={0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"}, + enforce_cpu=False, + enable_mkldnn=True) +# 检测 +layout = model.detect(image) + +# 显示结果 +lp.draw_box(image, layout, box_width=3, show_element_type=True) +``` + +下图展示了结果,不同颜色的检测框表示不同的类别,并通过`show_element_type`在框的左上角显示具体类别: + +
+ +
+ +`PaddleDetectionLayoutModel`函数参数说明如下: + +| 参数 | 含义 | 默认值 | 备注 | +| :------------: | :-------------------------: | :---------: | :----------------------------------------------------------: | +| config_path | 模型配置路径 | None | 指定config_path会自动下载模型(仅第一次,之后模型存在,不会再下载) | +| model_path | 模型路径 | None | 本地模型路径,config_path和model_path必须设置一个,不能同时为None | +| threshold | 预测得分的阈值 | 0.5 | \ | +| input_shape | reshape之后图片尺寸 | [3,640,640] | \ | +| batch_size | 测试batch size | 1 | \ | +| label_map | 类别映射表 | None | 设置config_path时,可以为None,根据数据集名称自动获取label_map | +| enforce_cpu | 代码是否使用CPU运行 | False | 设置为False表示使用GPU,True表示强制使用CPU | +| enforce_mkldnn | CPU预测中是否开启MKLDNN加速 | True | \ | +| thread_num | 设置CPU线程数 | 10 | \ | + +目前支持以下几种模型配置和label map,您可以通过修改 `--config_path`和 `--label_map`使用这些模型,从而检测不同类型的内容: + +| dataset | config_path | label_map | +| ------------------------------------------------------------ | ------------------------------------------------------------ | --------------------------------------------------------- | +| [TableBank](https://doc-analysis.github.io/tablebank-page/index.html) word | lp://TableBank/ppyolov2_r50vd_dcn_365e_tableBank_word/config | {0:"Table"} | +| TableBank latex | lp://TableBank/ppyolov2_r50vd_dcn_365e_tableBank_latex/config | {0:"Table"} | +| [PubLayNet](https://github.com/ibm-aur-nlp/PubLayNet) | lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config | {0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"} | + +* TableBank word和TableBank latex分别在word文档、latex文档数据集训练; +* 下载TableBank数据集同时包含word和latex。 + + + +## 3. 后处理 + +版面分析检测包含多个类别,如果只想获取指定类别(如"Text"类别)的检测框、可以使用下述代码: + +```python +# 首先过滤特定文本类型的区域 +text_blocks = lp.Layout([b for b in layout if b.type=='Text']) +figure_blocks = lp.Layout([b for b in layout if b.type=='Figure']) + +# 因为在图像区域内可能检测到文本区域,所以只需要删除它们 +text_blocks = lp.Layout([b for b in text_blocks \ + if not any(b.is_in(b_fig) for b_fig in figure_blocks)]) + +# 对文本区域排序并分配id +h, w = image.shape[:2] + +left_interval = lp.Interval(0, w/2*1.05, axis='x').put_on_canvas(image) + +left_blocks = text_blocks.filter_by(left_interval, center=True) +left_blocks.sort(key = lambda b:b.coordinates[1]) + +right_blocks = [b for b in text_blocks if b not in left_blocks] +right_blocks.sort(key = lambda b:b.coordinates[1]) + +# 最终合并两个列表,并按顺序添加索引 +text_blocks = lp.Layout([b.set(id = idx) for idx, b in enumerate(left_blocks + right_blocks)]) + +# 显示结果 +lp.draw_box(image, text_blocks, + box_width=3, + show_element_id=True) +``` + +显示只有"Text"类别的结果: + +
+ +
+ + + +## 4. 指标 + +| Dataset | mAP | CPU time cost | GPU time cost | +| --------- | ---- | ------------- | ------------- | +| PubLayNet | 93.6 | 1713.7ms | 66.6ms | +| TableBank | 96.2 | 1968.4ms | 65.1ms | + +**Envrionment:** + +​ **CPU:** Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz,24core + +​ **GPU:** a single NVIDIA Tesla P40 + + + +## 5. 训练版面分析模型 + +上述模型基于[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection) 训练,如果您想训练自己的版面分析模型,请参考:[train_layoutparser_model](train_layoutparser_model.md) + diff --git a/test1/layout/train_layoutparser_model.md b/test1/layout/train_layoutparser_model.md new file mode 100644 index 0000000000000000000000000000000000000000..0a4554e12d9e565fa8e3de4a83cbd2eb5b515c6e --- /dev/null +++ b/test1/layout/train_layoutparser_model.md @@ -0,0 +1,188 @@ +# 训练版面分析 + +* [1. 安装](#安装) + * [1.1 环境要求](#环境要求) + * [1.2 安装PaddleDetection](#安装PaddleDetection) +* [2. 准备数据](#准备数据) +* [3. 配置文件改动和说明](#配置文件改动和说明) +* [4. PaddleDetection训练](#训练) +* [5. PaddleDetection预测](#预测) +* [6. 预测部署](#预测部署) + * [6.1 模型导出](#模型导出) + * [6.2 layout parser预测](#layout_parser预测) + + + +## 1. 安装 + + + +### 1.1 环境要求 + +- PaddlePaddle 2.1 +- OS 64 bit +- Python 3(3.5.1+/3.6/3.7/3.8/3.9),64 bit +- pip/pip3(9.0.1+), 64 bit +- CUDA >= 10.1 +- cuDNN >= 7.6 + + + +### 1.2 安装PaddleDetection + +```bash +# 克隆PaddleDetection仓库 +cd +git clone https://github.com/PaddlePaddle/PaddleDetection.git + +cd PaddleDetection +# 安装其他依赖 +pip install -r requirements.txt +``` + +更多安装教程,请参考: [Install doc](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/docs/tutorials/INSTALL_cn.md) + + + +## 2. 准备数据 + +下载 [PubLayNet](https://github.com/ibm-aur-nlp/PubLayNet) 数据集: + +```bash +cd PaddleDetection/dataset/ +mkdir publaynet +# 执行命令,下载 +wget -O publaynet.tar.gz https://dax-cdn.cdn.appdomain.cloud/dax-publaynet/1.0.0/publaynet.tar.gz?_ga=2.104193024.1076900768.1622560733-649911202.1622560733 +# 解压 +tar -xvf publaynet.tar.gz +``` + +解压之后PubLayNet目录结构: + +| File or Folder | Description | num | +| :------------- | :----------------------------------------------- | ------- | +| `train/` | Images in the training subset | 335,703 | +| `val/` | Images in the validation subset | 11,245 | +| `test/` | Images in the testing subset | 11,405 | +| `train.json` | Annotations for training images | | +| `val.json` | Annotations for validation images | | +| `LICENSE.txt` | Plaintext version of the CDLA-Permissive license | | +| `README.txt` | Text file with the file names and description | | + +如果使用其它数据集,请参考[准备训练数据](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/docs/tutorials/PrepareDataSet.md) + + + +## 3. 配置文件改动和说明 + +我们使用 `configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml`配置进行训练,配置文件摘要如下: + +
+ +
+ +从上图看到 `ppyolov2_r50vd_dcn_365e_coco.yml` 配置需要依赖其他的配置文件,在该例子中需要依赖: + +``` +coco_detection.yml:主要说明了训练数据和验证数据的路径 + +runtime.yml:主要说明了公共的运行参数,比如是否使用GPU、每多少个epoch存储checkpoint等 + +optimizer_365e.yml:主要说明了学习率和优化器的配置 + +ppyolov2_r50vd_dcn.yml:主要说明模型和主干网络的情况 + +ppyolov2_reader.yml:主要说明数据读取器配置,如batch size,并发加载子进程数等,同时包含读取后预处理操作,如resize、数据增强等等 +``` + +根据实际情况,修改上述文件,比如数据集路径、batch size等。 + + + +## 4. PaddleDetection训练 + +PaddleDetection提供了单卡/多卡训练模式,满足用户多种训练需求 + +* GPU 单卡训练 + +```bash +export CUDA_VISIBLE_DEVICES=0 #windows和Mac下不需要执行该命令 +python tools/train.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml +``` + +* GPU多卡训练 + +```bash +export CUDA_VISIBLE_DEVICES=0,1,2,3 +python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --eval +``` + +--eval:表示边训练边验证 + +* 模型恢复训练 + +在日常训练过程中,有的用户由于一些原因导致训练中断,用户可以使用-r的命令恢复训练: + +```bash +export CUDA_VISIBLE_DEVICES=0,1,2,3 +python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --eval -r output/ppyolov2_r50vd_dcn_365e_coco/10000 +``` + +注意:如果遇到 "`Out of memory error`" 问题, 尝试在 `ppyolov2_reader.yml` 文件中调小`batch_size` + + + +## 5. PaddleDetection预测 + +设置参数,使用PaddleDetection预测: + +```bash +export CUDA_VISIBLE_DEVICES=0 +python tools/infer.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --infer_img=images/paper-image.jpg --output_dir=infer_output/ --draw_threshold=0.5 -o weights=output/ppyolov2_r50vd_dcn_365e_coco/model_final --use_vdl=Ture +``` + +`--draw_threshold` 是个可选参数. 根据 [NMS](https://ieeexplore.ieee.org/document/1699659) 的计算,不同阈值会产生不同的结果 `keep_top_k`表示设置输出目标的最大数量,默认值为100,用户可以根据自己的实际情况进行设定。 + + + +## 6. 预测部署 + +在layout parser中使用自己训练好的模型, + + + +### 6.1 模型导出 + +在模型训练过程中保存的模型文件是包含前向预测和反向传播的过程,在实际的工业部署则不需要反向传播,因此需要将模型进行导成部署需要的模型格式。 在PaddleDetection中提供了 `tools/export_model.py`脚本来导出模型。 + +导出模型名称默认是`model.*`,layout parser代码模型名称是`inference.*`, 所以修改[PaddleDetection/ppdet/engine/trainer.py ](https://github.com/PaddlePaddle/PaddleDetection/blob/b87a1ea86fa18ce69e44a17ad1b49c1326f19ff9/ppdet/engine/trainer.py#L512) (点开链接查看详细代码行),将`model`改为`inference`即可。 + +执行导出模型脚本: + +```bash +python tools/export_model.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --output_dir=./inference -o weights=output/ppyolov2_r50vd_dcn_365e_coco/model_final.pdparams +``` + +预测模型会导出到`inference/ppyolov2_r50vd_dcn_365e_coco`目录下,分别为`infer_cfg.yml`(预测不需要), `inference.pdiparams`, `inference.pdiparams.info`,`inference.pdmodel` 。 + +更多模型导出教程,请参考:[EXPORT_MODEL](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/deploy/EXPORT_MODEL.md) + + + +### 6.2 layout_parser预测 + +`model_path`指定训练好的模型路径,使用layout parser进行预测: + +```bash +import layoutparser as lp +model = lp.PaddleDetectionLayoutModel(model_path="inference/ppyolov2_r50vd_dcn_365e_coco", threshold=0.5,label_map={0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"},enforce_cpu=True,enable_mkldnn=True) +``` + + + +*** + +更多PaddleDetection训练教程,请参考:[PaddleDetection训练](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/docs/tutorials/GETTING_STARTED_cn.md) + +*** + diff --git a/test1/layoutparser-0.0.0-py3-none-any.whl b/test1/layoutparser-0.0.0-py3-none-any.whl new file mode 100644 index 0000000000000000000000000000000000000000..e3fa17fc4dab24440761ce04deac64f36d16591e Binary files /dev/null and b/test1/layoutparser-0.0.0-py3-none-any.whl differ diff --git a/test/paddlestructure.py b/test1/paddlestructure.py similarity index 97% rename from test/paddlestructure.py rename to test1/paddlestructure.py index 67fd85cf37f6833e9d1229bda6c039500c901cba..171d55b38ab06f0fc6a874df597281e20de0483a 100644 --- a/test/paddlestructure.py +++ b/test1/paddlestructure.py @@ -24,9 +24,9 @@ import numpy as np from pathlib import Path from ppocr.utils.logging import get_logger -from test.predict_system import OCRSystem, save_res -from test.table.predict_table import to_excel -from test.utility import init_args, draw_result +from test1.predict_system import OCRSystem, save_res +from test1.table.predict_table import to_excel +from test1.utility import init_args, draw_result logger = get_logger() from ppocr.utils.utility import check_and_read_gif, get_image_file_list diff --git a/test/predict_system.py b/test1/predict_system.py similarity index 97% rename from test/predict_system.py rename to test1/predict_system.py index 94ad4d80306c0c5191b36d6a686e7446a7878765..9e99a48cdf033f1cdb2263fc7a655a26a53ded92 100644 --- a/test/predict_system.py +++ b/test1/predict_system.py @@ -31,8 +31,8 @@ import layoutparser as lp from ppocr.utils.utility import get_image_file_list, check_and_read_gif from ppocr.utils.logging import get_logger from tools.infer.predict_system import TextSystem -from test.table.predict_table import TableSystem, to_excel -from test.utility import parse_args, draw_result +from test1.table.predict_table import TableSystem, to_excel +from test1.utility import parse_args, draw_result logger = get_logger() diff --git a/test/setup.py b/test1/setup.py similarity index 93% rename from test/setup.py rename to test1/setup.py index 7a2c3676d618463b6083c7dc09f09cb94ff255e6..0b092c49a4db98def28a7c2942993806b0ffc27c 100644 --- a/test/setup.py +++ b/test1/setup.py @@ -30,9 +30,9 @@ def readme(): return README -shutil.copytree('/table', './test/table') -shutil.copyfile('/predict_system.py', './test/predict_system.py') -shutil.copyfile('/utility.py', './test/utility.py') +shutil.copytree('./table', './test1/table') +shutil.copyfile('./predict_system.py', './test1/predict_system.py') +shutil.copyfile('./utility.py', './test1/utility.py') shutil.copytree('../ppocr', './ppocr') shutil.copytree('../tools', './tools') shutil.copyfile('../LICENSE', './LICENSE') @@ -68,5 +68,5 @@ setup( shutil.rmtree('ppocr') shutil.rmtree('tools') -shutil.rmtree('test') +shutil.rmtree('test1') os.remove('LICENSE') diff --git a/test/table/README.md b/test1/table/README.md similarity index 100% rename from test/table/README.md rename to test1/table/README.md diff --git a/test/table/README_ch.md b/test1/table/README_ch.md similarity index 100% rename from test/table/README_ch.md rename to test1/table/README_ch.md diff --git a/test/table/__init__.py b/test1/table/__init__.py similarity index 100% rename from test/table/__init__.py rename to test1/table/__init__.py diff --git a/test/table/eval_table.py b/test1/table/eval_table.py similarity index 94% rename from test/table/eval_table.py rename to test1/table/eval_table.py index a027a45f7c80109a8fafc1fc1f0c8fd2aa03d70b..dc63e34e2a85657a6487e7abb081854e937cf669 100755 --- a/test/table/eval_table.py +++ b/test1/table/eval_table.py @@ -20,9 +20,9 @@ sys.path.append(os.path.abspath(os.path.join(__dir__, '../..'))) import cv2 import json from tqdm import tqdm -from test.table.table_metric import TEDS -from test.table.predict_table import TableSystem -from test.utility import init_args +from test1.table.table_metric import TEDS +from test1.table.predict_table import TableSystem +from test1.utility import init_args from ppocr.utils.logging import get_logger logger = get_logger() diff --git a/test/table/matcher.py b/test1/table/matcher.py similarity index 100% rename from test/table/matcher.py rename to test1/table/matcher.py diff --git a/test/table/predict_structure.py b/test1/table/predict_structure.py similarity index 99% rename from test/table/predict_structure.py rename to test1/table/predict_structure.py index eacf257719a88a672d5c25a50cf12867d344c21c..455bf7e7b93bc7a97635027c73900ffb39f50f95 100755 --- a/test/table/predict_structure.py +++ b/test1/table/predict_structure.py @@ -32,7 +32,7 @@ from ppocr.data import create_operators, transform from ppocr.postprocess import build_post_process from ppocr.utils.logging import get_logger from ppocr.utils.utility import get_image_file_list, check_and_read_gif -from test.utility import parse_args +from test1.utility import parse_args logger = get_logger() diff --git a/test/table/predict_table.py b/test1/table/predict_table.py similarity index 98% rename from test/table/predict_table.py rename to test1/table/predict_table.py index 66c0895d3a889e7244608da81e5090639494667d..b06a4f4d53402ca809f0ab846f83176795ca7217 100644 --- a/test/table/predict_table.py +++ b/test1/table/predict_table.py @@ -30,9 +30,9 @@ import tools.infer.predict_rec as predict_rec import tools.infer.predict_det as predict_det from ppocr.utils.utility import get_image_file_list, check_and_read_gif from ppocr.utils.logging import get_logger -from test.table.matcher import distance, compute_iou -from test.utility import parse_args -import test.table.predict_structure as predict_strture +from test1.table.matcher import distance, compute_iou +from test1.utility import parse_args +import test1.table.predict_structure as predict_strture logger = get_logger() diff --git a/test/table/table_metric/__init__.py b/test1/table/table_metric/__init__.py similarity index 100% rename from test/table/table_metric/__init__.py rename to test1/table/table_metric/__init__.py diff --git a/test/table/table_metric/parallel.py b/test1/table/table_metric/parallel.py similarity index 100% rename from test/table/table_metric/parallel.py rename to test1/table/table_metric/parallel.py diff --git a/test/table/table_metric/table_metric.py b/test1/table/table_metric/table_metric.py similarity index 100% rename from test/table/table_metric/table_metric.py rename to test1/table/table_metric/table_metric.py diff --git a/test/table/tablepyxl/__init__.py b/test1/table/tablepyxl/__init__.py similarity index 100% rename from test/table/tablepyxl/__init__.py rename to test1/table/tablepyxl/__init__.py diff --git a/test/table/tablepyxl/style.py b/test1/table/tablepyxl/style.py similarity index 100% rename from test/table/tablepyxl/style.py rename to test1/table/tablepyxl/style.py diff --git a/test/table/tablepyxl/tablepyxl.py b/test1/table/tablepyxl/tablepyxl.py similarity index 100% rename from test/table/tablepyxl/tablepyxl.py rename to test1/table/tablepyxl/tablepyxl.py diff --git a/test/utility.py b/test1/utility.py similarity index 100% rename from test/utility.py rename to test1/utility.py