diff --git a/modelcenter/PP-OCRv2/info.yaml b/modelcenter/PP-OCRv2/info.yaml index 57380ffb493d3f10f2a23b0336cd707476c68445..b3900e8968c52ac543a0e985ae84e1c444364da4 100644 --- a/modelcenter/PP-OCRv2/info.yaml +++ b/modelcenter/PP-OCRv2/info.yaml @@ -1,27 +1,33 @@ --- Model_Info: name: "PP-OCRv2" - description: "" - description_en: "" + description: "PP-OCRv2文字检测识别系统" + description_en: "PP-OCRv2 text detection and recognition system" icon: "@后续UE统一设计之后,会存到bos上某个位置" from_repo: "PaddleOCR" Task: - tag_en: "CV" tag: "计算机视觉" - sub_tag_en: "Character Recognition" - sub_tag: "文字识别" + sub_tag_en: "Text Detection, Character Recognition, Optical Character Recognition" + sub_tag: "文字检测,文字识别,OCR" Example: - title: "《动手学OCR》系列课程之:PP-OCRv2预测部署实战" url: "https://aistudio.baidu.com/aistudio/projectdetail/3552922?channelType=0&channel=0" + title_en: "Dive into OCR series of courses: PP-OCRv2 prediction and deployment" + url_en: "https://aistudio.baidu.com/aistudio/projectdetail/3552922?channelType=0&channel=0" - title: "《动手学OCR》系列课程之:OCR文本识别实战" url: "https://aistudio.baidu.com/aistudio/projectdetail/3552051?channelType=0&channel=0" + title_en: "Dive into OCR series of courses: text recognition in practice" + url_en: "https://aistudio.baidu.com/aistudio/projectdetail/3552051?channelType=0&channel=0" - title: "《动手学OCR》系列课程之:OCR文本检测实践" url: "https://aistudio.baidu.com/aistudio/projectdetail/3551779?channelType=0&channel=0" + title_en: "Dive into OCR series of courses: text detection in practice" + url_en: "https://aistudio.baidu.com/aistudio/projectdetail/3551779?channelType=0&channel=0" Datasets: "ICDAR 2015, ICDAR2019-LSVT,ICDAR2017-RCTW-17,Total-Text,ICDAR2019-ArT" Pulisher: "Baidu" License: "apache.2.0" Paper: - title: "PP-OCRv2: Bag of Tricks for Ultra Lightweight OCR System" - url: "https://arxiv.org/pdf/2109.03144v2.pdf" + url: "https://arxiv.org/abs/2109.03144" IfTraining: 0 IfOnlineDemo: 1 diff --git a/modelcenter/PP-OCRv2/introduction_cn.ipynb b/modelcenter/PP-OCRv2/introduction_cn.ipynb index 3305c7a473d6850c1ee153727ca610948c28101b..93987ba99392a5bcc9940c1a11b49600415406aa 100644 --- a/modelcenter/PP-OCRv2/introduction_cn.ipynb +++ b/modelcenter/PP-OCRv2/introduction_cn.ipynb @@ -196,7 +196,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3.8.13 ('py38')", "language": "python", "name": "python3" }, @@ -210,7 +210,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.8" + "version": "3.8.13" + }, + "vscode": { + "interpreter": { + "hash": "58fd1890da6594cebec461cf98c6cb9764024814357f166387d10d267624ecd6" + } } }, "nbformat": 4, diff --git a/modelcenter/PP-OCRv2/introduction_en.ipynb b/modelcenter/PP-OCRv2/introduction_en.ipynb index 93a6c849bc09d617ae9c55a5ac62929dcb38626f..28b844bf7c3a8f066d0b26885e92ee731dca1d1b 100644 --- a/modelcenter/PP-OCRv2/introduction_en.ipynb +++ b/modelcenter/PP-OCRv2/introduction_en.ipynb @@ -112,7 +112,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 3.2 Train the model.\n", + "### 3.2 Train the model\n", "The PP-OCR system consists of a text detection model, an angle classifier and a text recognition model. For the three model training tutorials, please refer to the following documents:\n", "1. text detection model: [text detection training tutorial](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.6/doc/doc_ch/detection.md)\n", "1. angle classifier: [angle classifier training tutorial](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.6/doc/doc_ch/angle_class.md)\n", @@ -130,6 +130,7 @@ "source": [ "## 4. Model Principles\n", "\n", + "The enhancement strategies are as follows\n", "\n", "1. Text detection enhancement strategies\n", "- Adopt CML (Collaborative Mutual Learning) collaborative mutual learning knowledge distillation strategy.\n", @@ -193,7 +194,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3.8.13 ('py38')", "language": "python", "name": "python3" }, @@ -207,7 +208,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.8" + "version": "3.8.13" + }, + "vscode": { + "interpreter": { + "hash": "58fd1890da6594cebec461cf98c6cb9764024814357f166387d10d267624ecd6" + } } }, "nbformat": 4, diff --git a/modelcenter/PP-OCRv3/info.yaml b/modelcenter/PP-OCRv3/info.yaml index d4ebfdd62ed7a3996bce32d46f80e9a8745bb72c..664e01ec3fc924e62cde19cdcfabc8354c884cf7 100644 --- a/modelcenter/PP-OCRv3/info.yaml +++ b/modelcenter/PP-OCRv3/info.yaml @@ -1,22 +1,28 @@ --- Model_Info: name: "PP-OCRv3" - description: "" - description_en: "" + description: "PP-OCRv3文字检测识别系统" + description_en: "PP-OCRv3 text detection and recognition system" icon: "@后续UE统一设计之后,会存到bos上某个位置" from_repo: "PaddleOCR" Task: - tag_en: "CV" tag: "计算机视觉" - sub_tag_en: "Character Recognition" - sub_tag: "文字识别" + sub_tag_en: "Text Detection, Character Recognition, Optical Character Recognition" + sub_tag: "文字检测,文字识别,OCR" Example: - - title: "《【官方】十分钟完成 PP-OCRv3 识别全流程实战" + - title: "【官方】十分钟完成 PP-OCRv3 识别全流程实战" url: "https://aistudio.baidu.com/aistudio/projectdetail/3916206?channelType=0&channel=0" + title_en: "[Official] Complete the whole process of PP-OCRv3 identification in ten minutes" + url_en: "https://aistudio.baidu.com/aistudio/projectdetail/3916206?channelType=0&channel=0" - title: "鸟枪换炮!基于PP-OCRv3的电表检测识别" url: "https://aistudio.baidu.com/aistudio/projectdetail/511591?channelType=0&channel=0" + title_en: "Swap the shotgun! Detection and recognition electricity meters based on PP-OCRv3" + url_en: "https://aistudio.baidu.com/aistudio/projectdetail/511591?channelType=0&channel=0" - title: "基于PP-OCRv3实现PCB字符识别" url: "https://aistudio.baidu.com/aistudio/projectdetail/4008973?channelType=0&channel=0" + title_en: "PCB character recognition based on PP-OCRv3" + url_en: "https://aistudio.baidu.com/aistudio/projectdetail/4008973?channelType=0&channel=0" Datasets: "ICDAR 2015, ICDAR2019-LSVT,ICDAR2017-RCTW-17,Total-Text,ICDAR2019-ArT" Pulisher: "Baidu" License: "apache.2.0" diff --git a/modelcenter/PP-OCRv3/introduction_cn.ipynb b/modelcenter/PP-OCRv3/introduction_cn.ipynb index a054c2c5172af8c25cdccee13298f94e2553b493..60711e9f192953e88db20b0451f2f3672e32ad69 100644 --- a/modelcenter/PP-OCRv3/introduction_cn.ipynb +++ b/modelcenter/PP-OCRv3/introduction_cn.ipynb @@ -67,7 +67,7 @@ "source": [ "## 3. 模型如何使用\n", "\n", - "### 3.1 模型推理:\n", + "### 3.1 模型推理\n", "* 安装PaddleOCR whl包" ] }, @@ -96,7 +96,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "scrolled": true, "tags": [] @@ -136,7 +136,7 @@ "模型训练完成后,可以通过指定模型路径的方式串联使用\n", "命令参考如下:\n", "```python\n", - "paddleocr --image_dir 11.jpg --use_angle_cls true --ocr_version PP-OCRv2 --det_model_dir=/path/to/det_inference_model --cls_model_dir=/path/to/cls_inference_model --rec_model_dir=/path/to/rec_inference_model\n", + "paddleocr --image_dir 11.jpg --use_angle_cls true --det_model_dir=/path/to/det_inference_model --cls_model_dir=/path/to/cls_inference_model --rec_model_dir=/path/to/rec_inference_model\n", "```" ] }, @@ -228,36 +228,11 @@ "source": [ "## 6. 相关论文以及引用信息\n", "```\n", - "@article{du2021pp,\n", - " title={PP-OCRv2: bag of tricks for ultra lightweight OCR system},\n", - " author={Du, Yuning and Li, Chenxia and Guo, Ruoyu and Cui, Cheng and Liu, Weiwei and Zhou, Jun and Lu, Bin and Yang, Yehua and Liu, Qiwen and Hu, Xiaoguang and others},\n", - " journal={arXiv preprint arXiv:2109.03144},\n", - " year={2021}\n", - "}\n", - "\n", - "@inproceedings{zhang2018deep,\n", - " title={Deep mutual learning},\n", - " author={Zhang, Ying and Xiang, Tao and Hospedales, Timothy M and Lu, Huchuan},\n", - " booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},\n", - " pages={4320--4328},\n", - " year={2018}\n", - "}\n", - "\n", - "@inproceedings{hu2020gtc,\n", - " title={Gtc: Guided training of ctc towards efficient and accurate scene text recognition},\n", - " author={Hu, Wenyang and Cai, Xiaocong and Hou, Jun and Yi, Shuai and Lin, Zhiping},\n", - " booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},\n", - " volume={34},\n", - " number={07},\n", - " pages={11005--11012},\n", - " year={2020}\n", - "}\n", - "\n", - "@inproceedings{zhang2022context,\n", - " title={Context-based Contrastive Learning for Scene Text Recognition},\n", - " author={Zhang, Xinyun and Zhu, Binwu and Yao, Xufeng and Sun, Qi and Li, Ruiyu and Yu, Bei},\n", - " year={2022},\n", - " organization={AAAI}\n", + "@article{li2022pp,\n", + " title={PP-OCRv3: More Attempts for the Improvement of Ultra Lightweight OCR System},\n", + " author={Li, Chenxia and Liu, Weiwei and Guo, Ruoyu and Yin, Xiaoting and Jiang, Kaitao and Du, Yongkun and Du, Yuning and Zhu, Lingfeng and Lai, Baohua and Hu, Xiaoguang and others},\n", + " journal={arXiv preprint arXiv:2206.03001},\n", + " year={2022}\n", "}\n", "```\n" ] @@ -265,7 +240,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3.8.13 ('py38')", "language": "python", "name": "python3" }, @@ -279,7 +254,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.8" + "version": "3.8.13" + }, + "vscode": { + "interpreter": { + "hash": "58fd1890da6594cebec461cf98c6cb9764024814357f166387d10d267624ecd6" + } } }, "nbformat": 4, diff --git a/modelcenter/PP-OCRv3/introduction_en.ipynb b/modelcenter/PP-OCRv3/introduction_en.ipynb index 92692d2f7289524174df21db1710a180ada020b7..2e567314f8df4f21a584f059f96ab107f8c2039b 100644 --- a/modelcenter/PP-OCRv3/introduction_en.ipynb +++ b/modelcenter/PP-OCRv3/introduction_en.ipynb @@ -129,7 +129,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 3.2 Train the model.\n", + "### 3.2 Train the model\n", "The PP-OCR system consists of a text detection model, an angle classifier and a text recognition model. For the three model training tutorials, please refer to the following documents:\n", "1. text detection model: [text detection training tutorial](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.6/doc/doc_ch/detection.md)\n", "1. angle classifier: [angle classifier training tutorial](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.6/doc/doc_ch/angle_class.md)\n", @@ -137,7 +137,7 @@ "\n", "After the model training is completed, it can be used in series by specifying the model path. The command reference is as follows:\n", "```python\n", - "paddleocr --image_dir 11.jpg --use_angle_cls true --ocr_version PP-OCRv2 --det_model_dir=/path/to/det_inference_model --cls_model_dir=/path/to/cls_inference_model --rec_model_dir=/path/to/rec_inference_model\n", + "paddleocr --image_dir 11.jpg --use_angle_cls true --det_model_dir=/path/to/det_inference_model --cls_model_dir=/path/to/cls_inference_model --rec_model_dir=/path/to/rec_inference_model\n", "```" ] }, @@ -147,7 +147,7 @@ "source": [ "## 4. Model Principles\n", "\n", - "The optimization ideas are as follows\n", + "The enhancement strategies are as follows\n", "\n", "1. Text detection enhancement strategies\n", "- LK-PAN: a PAN module with large receptive field\n", @@ -231,36 +231,11 @@ "source": [ "## 6. Related papers and citations\n", "```\n", - "@article{du2021pp,\n", - " title={PP-OCRv2: bag of tricks for ultra lightweight OCR system},\n", - " author={Du, Yuning and Li, Chenxia and Guo, Ruoyu and Cui, Cheng and Liu, Weiwei and Zhou, Jun and Lu, Bin and Yang, Yehua and Liu, Qiwen and Hu, Xiaoguang and others},\n", - " journal={arXiv preprint arXiv:2109.03144},\n", - " year={2021}\n", - "}\n", - "\n", - "@inproceedings{zhang2018deep,\n", - " title={Deep mutual learning},\n", - " author={Zhang, Ying and Xiang, Tao and Hospedales, Timothy M and Lu, Huchuan},\n", - " booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},\n", - " pages={4320--4328},\n", - " year={2018}\n", - "}\n", - "\n", - "@inproceedings{hu2020gtc,\n", - " title={Gtc: Guided training of ctc towards efficient and accurate scene text recognition},\n", - " author={Hu, Wenyang and Cai, Xiaocong and Hou, Jun and Yi, Shuai and Lin, Zhiping},\n", - " booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},\n", - " volume={34},\n", - " number={07},\n", - " pages={11005--11012},\n", - " year={2020}\n", - "}\n", - "\n", - "@inproceedings{zhang2022context,\n", - " title={Context-based Contrastive Learning for Scene Text Recognition},\n", - " author={Zhang, Xinyun and Zhu, Binwu and Yao, Xufeng and Sun, Qi and Li, Ruiyu and Yu, Bei},\n", - " year={2022},\n", - " organization={AAAI}\n", + "@article{li2022pp,\n", + " title={PP-OCRv3: More Attempts for the Improvement of Ultra Lightweight OCR System},\n", + " author={Li, Chenxia and Liu, Weiwei and Guo, Ruoyu and Yin, Xiaoting and Jiang, Kaitao and Du, Yongkun and Du, Yuning and Zhu, Lingfeng and Lai, Baohua and Hu, Xiaoguang and others},\n", + " journal={arXiv preprint arXiv:2206.03001},\n", + " year={2022}\n", "}\n", "```\n" ] @@ -268,7 +243,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3.8.13 ('py38')", "language": "python", "name": "python3" }, @@ -282,7 +257,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.8" + "version": "3.8.13" + }, + "vscode": { + "interpreter": { + "hash": "58fd1890da6594cebec461cf98c6cb9764024814357f166387d10d267624ecd6" + } } }, "nbformat": 4, diff --git a/modelcenter/PP-StructureV2/APP/app.py b/modelcenter/PP-StructureV2/APP/app.py new file mode 100644 index 0000000000000000000000000000000000000000..3637161a77141b4c82841f14ac61366afac7a212 --- /dev/null +++ b/modelcenter/PP-StructureV2/APP/app.py @@ -0,0 +1,51 @@ +import gradio as gr +import base64 +from io import BytesIO +from PIL import Image + +from paddleocr import PPStructure + +table_engine = PPStructure(layout=False, show_log=True) + + +def image_to_base64(image): + # 输入为PIL读取的图片,输出为base64格式 + byte_data = BytesIO() # 创建一个字节流管道 + image.save(byte_data, format="JPEG") # 将图片数据存入字节流管道 + byte_data = byte_data.getvalue() # 从字节流管道中获取二进制 + base64_str = base64.b64encode(byte_data).decode("ascii") # 二进制转base64 + return base64_str + + +# UGC: Define the inference fn() for your models +def model_inference(image): + result = table_engine(image) + res = result[0]['res']['html'] + json_out = {"result": res} + return res, json_out + + +def clear_all(): + return None, None, None + + +with gr.Blocks() as demo: + gr.Markdown("PP-StructureV2") + + with gr.Column(scale=1, min_width=100): + img_in = gr.Image( + value="https://user-images.githubusercontent.com/12406017/200574299-32537341-c329-42a5-ae41-35ee4bd43f2f.png", + label="Input") + + with gr.Row(): + btn1 = gr.Button("Clear") + btn2 = gr.Button("Submit") + + html_out = gr.HTML(label="Output") + json_out = gr.JSON(label="jsonOutput") + + btn2.click(fn=model_inference, inputs=img_in, outputs=[html_out, json_out]) + btn1.click(fn=clear_all, inputs=None, outputs=[img_in, html_out, json_out]) + gr.Button.style(1) + +demo.launch() diff --git a/modelcenter/PP-StructureV2/APP/app.yml b/modelcenter/PP-StructureV2/APP/app.yml new file mode 100644 index 0000000000000000000000000000000000000000..fba9a810275f02b32155890e7ac3de00cc08af2d --- /dev/null +++ b/modelcenter/PP-StructureV2/APP/app.yml @@ -0,0 +1,11 @@ +【PP-StructureV2-App-YAML】 + +APP_Info: + title: PP-StructureV2-App + colorFrom: blue + colorTo: yellow + sdk: gradio + sdk_version: 3.4.1 + app_file: app.py + license: apache-2.0 + device: cpu \ No newline at end of file diff --git a/modelcenter/PP-StructureV2/APP/requirements.txt b/modelcenter/PP-StructureV2/APP/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..aacc153b7006bf6a08742f34579afbff6ffe2b05 --- /dev/null +++ b/modelcenter/PP-StructureV2/APP/requirements.txt @@ -0,0 +1,3 @@ +gradio +paddlepaddle +paddleocr>=2.6.1.0 diff --git a/modelcenter/PP-StructureV2/.gitkeep b/modelcenter/PP-StructureV2/benchmark_cn.md similarity index 100% rename from modelcenter/PP-StructureV2/.gitkeep rename to modelcenter/PP-StructureV2/benchmark_cn.md diff --git a/modelcenter/PP-StructureV2/benchmark_en.md b/modelcenter/PP-StructureV2/benchmark_en.md new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modelcenter/PP-StructureV2/download_cn.md b/modelcenter/PP-StructureV2/download_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..d782a8f2cdf02c180b5f76e7b6d6877a13aad2db --- /dev/null +++ b/modelcenter/PP-StructureV2/download_cn.md @@ -0,0 +1,34 @@ +# 模型列表 + +## 1. 版面分析模型 + +|模型名称|模型简介|推理模型大小|下载地址|dict path| +| --- | --- | --- | --- | --- | +| picodet_lcnet_x1_0_fgd_layout | 基于PicoDet LCNet_x1_0和FGD蒸馏在PubLayNet 数据集训练的英文版面分析模型,可以划分**文字、标题、表格、图片以及列表**5类区域 | 9.7M | [推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout.pdparams) | [PubLayNet dict](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6/ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt) | +| ppyolov2_r50vd_dcn_365e_publaynet | 基于PP-YOLOv2在PubLayNet数据集上训练的英文版面分析模型 | 221.0M | [推理模型](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_publaynet.tar) / [训练模型](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_publaynet_pretrained.pdparams) | 同上 | +| picodet_lcnet_x1_0_fgd_layout_cdla | CDLA数据集训练的中文版面分析模型,可以划分为**表格、图片、图片标题、表格、表格标题、页眉、脚本、引用、公式**10类区域 | 9.7M | [推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla.pdparams) | [CDLA dict](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6/ppocr/utils/dict/layout_dict/layout_cdla_dict.txt) | +| picodet_lcnet_x1_0_fgd_layout_table | 表格数据集训练的版面分析模型,支持中英文文档表格区域的检测 | 9.7M | [推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_table_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_table.pdparams) | [Table dict](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6/ppocr/utils/dict/layout_dict/layout_table_dict.txt) | +| ppyolov2_r50vd_dcn_365e_tableBank_word | 基于PP-YOLOv2在TableBank Word 数据集训练的版面分析模型,支持英文文档表格区域的检测 | 221.0M | [推理模型](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_tableBank_word.tar) | 同上 | +| ppyolov2_r50vd_dcn_365e_tableBank_latex | 基于PP-YOLOv2在TableBank Latex数据集训练的版面分析模型,支持英文文档表格区域的检测 | 221.0M | [推理模型](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_tableBank_latex.tar) | 同上 | + + +## 2. OCR和表格识别模型 + +### 2.1 OCR + +|模型名称|模型简介|推理模型大小|下载地址| +| --- | --- | --- | --- | +|en_ppocr_mobile_v2.0_table_det|PubTabNet数据集训练的英文表格场景的文字检测|4.7M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_det_train.tar) | +|en_ppocr_mobile_v2.0_table_rec|PubTabNet数据集训练的英文表格场景的文字识别|6.9M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_rec_train.tar) | + +如需要使用其他OCR模型,可以在 [PP-OCR model_list](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6/doc/doc_ch/models_list.md) 下载模型或者使用自己训练好的模型配置到 `det_model_dir`, `rec_model_dir`两个字段即可。 + +### 2.2 表格识别模型 + +|模型名称|模型简介|推理模型大小|下载地址| +| --- | --- | --- | --- | +|en_ppocr_mobile_v2.0_table_structure|基于TableRec-RARE在PubTabNet数据集上训练的英文表格识别模型|6.8M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar) | +|en_ppstructure_mobile_v2.0_SLANet|基于SLANet在PubTabNet数据集上训练的英文表格识别模型|9.2M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar) | +|ch_ppstructure_mobile_v2.0_SLANet|基于SLANet的中文表格识别模型|9.3M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) | + + diff --git a/modelcenter/PP-StructureV2/download_en.md b/modelcenter/PP-StructureV2/download_en.md new file mode 100644 index 0000000000000000000000000000000000000000..376ec636a02b52b69c6a2342fae4c661a023ddd2 --- /dev/null +++ b/modelcenter/PP-StructureV2/download_en.md @@ -0,0 +1,32 @@ +# Model list + +## 1. Layout Analysis + +|model name|description | inference model size |download|dict path| +| --- |---| --- | --- | --- | +| picodet_lcnet_x1_0_fgd_layout | The layout analysis English model trained on the PubLayNet dataset based on PicoDet LCNet_x1_0 and FGD . the model can recognition 5 types of areas such as **Text, Title, Table, Picture and List** | 9.7M | [inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout.pdparams) | [PubLayNet dict](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6/ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt) | +| ppyolov2_r50vd_dcn_365e_publaynet | The layout analysis English model trained on the PubLayNet dataset based on PP-YOLOv2 | 221.0M | [inference_moel](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_publaynet.tar) / [trained model](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_publaynet_pretrained.pdparams) | same as above | +| picodet_lcnet_x1_0_fgd_layout_cdla | The layout analysis Chinese model trained on the CDLA dataset, the model can recognition 10 types of areas such as **Table、Figure、Figure caption、Table、Table caption、Header、Footer、Reference、Equation** | 9.7M | [inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla.pdparams) | [CDLA dict](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6/ppocr/utils/dict/layout_dict/layout_cdla_dict.txt) | +| picodet_lcnet_x1_0_fgd_layout_table | The layout analysis model trained on the table dataset, the model can detect tables in Chinese and English documents | 9.7M | [inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_table_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_table.pdparams) | [Table dict](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6/ppocr/utils/dict/layout_dict/layout_table_dict.txt) | +| ppyolov2_r50vd_dcn_365e_tableBank_word | The layout analysis model trained on the TableBank Word dataset based on PP-YOLOv2, the model can detect tables in English documents | 221.0M | [inference model](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_tableBank_word.tar) | same as above | +| ppyolov2_r50vd_dcn_365e_tableBank_latex | The layout analysis model trained on the TableBank Latex dataset based on PP-YOLOv2, the model can detect tables in English documents | 221.0M | [inference model](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_tableBank_latex.tar) | same as above | + +## 2. OCR and Table Recognition + +### 2.1 OCR + +|model name| description | inference model size |download| +| --- |---|---| --- | +|en_ppocr_mobile_v2.0_table_det| Text detection model of English table scenes trained on PubTabNet dataset | 4.7M |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_det_train.tar) | +|en_ppocr_mobile_v2.0_table_rec| Text recognition model of English table scenes trained on PubTabNet dataset | 6.9M |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_rec_train.tar) | + +If you need to use other OCR models, you can download the model in [PP-OCR model_list](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6/doc/doc_en/models_list_en.md) or use the model you trained yourself to configure to `det_model_dir`, `rec_model_dir` field. + + +### 2.2 Table Recognition + +|model| description |inference model size|download| +| --- |-----------------------------------------------------------------------------| --- | --- | +|en_ppocr_mobile_v2.0_table_structure| English table recognition model trained on PubTabNet dataset based on TableRec-RARE |6.8M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar) | +|en_ppstructure_mobile_v2.0_SLANet|English table recognition model trained on PubTabNet dataset based on SLANet|9.2M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar) | +|ch_ppstructure_mobile_v2.0_SLANet|Chinese table recognition model based on SLANet|9.3M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) | diff --git a/modelcenter/PP-StructureV2/info.yaml b/modelcenter/PP-StructureV2/info.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c52fe17e0a32c67eb21aff9ba850f37ae5d8e99 --- /dev/null +++ b/modelcenter/PP-StructureV2/info.yaml @@ -0,0 +1,29 @@ +--- +Model_Info: + name: "PP-StructureV2" + description: "PP-StructureV2文档分析系统,包含版面分析,表格识别,版面恢复和关键信息抽取" + description_en: "PP-StructureV2 document analysis system, including layout analysis, table recognition, layout recovery and key information extraction" + icon: "@后续UE统一设计之后,会存到bos上某个位置" + from_repo: "PaddleOCR" +Task: + - tag_en: "CV" + tag: "计算机视觉" + sub_tag_en: "Layout Analysis, Table Recognition, Layout Recovery, Key Information Extraction" + sub_tag: "版面分析,表格识别,版面恢复,关键信息提取" +Example: + - title: "表格识别实战" + url: "https://aistudio.baidu.com/aistudio/projectdetail/4770296?channelType=0&channel=0" + title_en: "table recognition" + url_en: "https://aistudio.baidu.com/aistudio/projectdetail/4770296?channelType=0&channel=0" + - title: "OCR发票关键信息抽取" + url: "https://aistudio.baidu.com/aistudio/projectdetail/4823162?channelType=0&channel=0" + title_en: "Invoice key information extraction" + url_en: "https://aistudio.baidu.com/aistudio/projectdetail/4823162?channelType=0&channel=0" +Datasets: "ICDAR 2015, ICDAR2019-LSVT,ICDAR2017-RCTW-17,Total-Text,ICDAR2019-ArT" +Pulisher: "Baidu" +License: "apache.2.0" +Paper: + - title: "PP-StructureV2: A Stronger Document Analysis System" + url: "https://arxiv.org/abs/2210.05391v2" +IfTraining: 0 +IfOnlineDemo: 1 diff --git a/modelcenter/PP-StructureV2/introduction_cn.ipynb b/modelcenter/PP-StructureV2/introduction_cn.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..eed00659e02ed371c7533dfc2c56c3da429001b3 --- /dev/null +++ b/modelcenter/PP-StructureV2/introduction_cn.ipynb @@ -0,0 +1,382 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. PP-StructureV2模型简介\n", + "\n", + "PP-StructureV2在PP-StructureV1的基础上进一步改进,主要有以下3个方面升级:\n", + "\n", + " * **系统功能升级** :新增图像矫正和版面恢复模块,图像转word/pdf、关键信息抽取能力全覆盖!\n", + " * **系统性能优化** :\n", + "\t * 版面分析:发布轻量级版面分析模型,速度提升**11倍**,平均CPU耗时仅需**41ms**!\n", + "\t * 表格识别:设计3大优化策略,预测耗时不变情况下,模型精度提升**6%**。\n", + "\t * 关键信息抽取:设计视觉无关模型结构,语义实体识别精度提升**2.8%**,关系抽取精度提升**9.1%**。\n", + " * **中文场景适配** :完成对版面分析与表格识别的中文场景适配,开源**开箱即用**的中文场景版面结构化模型!\n", + "\n", + "PP-StructureV2系统流程图如下所示,文档图像首先经过图像矫正模块,判断整图方向并完成转正,随后可以完成版面信息分析与关键信息抽取2类任务。版面分析任务中,图像首先经过版面分析模型,将图像划分为文本、表格、图像等不同区域,随后对这些区域分别进行识别,如,将表格区域送入表格识别模块进行结构化识别,将文本区域送入OCR引擎进行文字识别,最后使用版面恢复模块将其恢复为与原始图像布局一致的word或者pdf格式的文件;关键信息抽取任务中,首先使用OCR引擎提取文本内容,然后由语义实体识别模块获取图像中的语义实体,最后经关系抽取模块获取语义实体之间的对应关系,从而提取需要的关键信息。\n", + "\n", + "