diff --git a/PPOCRLabel/README.md b/PPOCRLabel/README.md
index 19e54ab14663ab86285a45680ac2b6421420e4d4..9c6ce120974701b372fb091fcd40038f790444d3 100644
--- a/PPOCRLabel/README.md
+++ b/PPOCRLabel/README.md
@@ -79,7 +79,7 @@ PPOCRLabel # run
```bash
cd PaddleOCR/PPOCRLabel
-python3 setup.py bdist_wheel
+python3 setup.py bdist_wheel
pip3 install dist/PPOCRLabel-1.0.2-py2.py3-none-any.whl
```
@@ -171,7 +171,7 @@ python PPOCRLabel.py
- Model language switching: Changing the built-in model language is supportable by clicking "PaddleOCR"-"Choose OCR Model" in the menu bar. Currently supported languagesinclude French, German, Korean, and Japanese.
For specific model download links, please refer to [PaddleOCR Model List](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/models_list_en.md#multilingual-recognition-modelupdating)
-- **Custom Model**: If users want to replace the built-in model with their own inference model, they can follow the [Custom Model Code Usage](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/doc/doc_en/whl_en.md#31-use-by-code) by modifying PPOCRLabel.py for [Instantiation of PaddleOCR class](https://github.com/PaddlePaddle/PaddleOCR/blob/release/ 2.3/PPOCRLabel/PPOCRLabel.py#L116) :
+- **Custom Model**: If users want to replace the built-in model with their own inference model, they can follow the [Custom Model Code Usage](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/doc/doc_en/whl_en.md#31-use-by-code) by modifying PPOCRLabel.py for [Instantiation of PaddleOCR class](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/PPOCRLabel/PPOCRLabel.py#L86) :
add parameter `det_model_dir` in `self.ocr = PaddleOCR(use_pdserving=False, use_angle_cls=True, det=True, cls=True, use_gpu=gpu, lang=lang) `
@@ -235,4 +235,4 @@ For some data that are difficult to recognize, the recognition results will not
### 4. Related
-1.[Tzutalin. LabelImg. Git code (2015)](https://github.com/tzutalin/labelImg)
\ No newline at end of file
+1.[Tzutalin. LabelImg. Git code (2015)](https://github.com/tzutalin/labelImg)
diff --git a/README.md b/README.md
index 8936fbaa27c92fc64a7098a9e79cc0fe923910fb..b1d464879bdbe64c8812a7ce335023ba5cca9727 100644
--- a/README.md
+++ b/README.md
@@ -92,7 +92,7 @@ Mobile DEMO experience (based on EasyEdge and Paddle-Lite, supports iOS and Andr
| ------------------------------------------------------------ | ---------------------------- | ----------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| Chinese and English ultra-lightweight PP-OCRv2 model(11.6M) | ch_PP-OCRv2_xx |Mobile & Server|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar)| [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar)|
| Chinese and English ultra-lightweight PP-OCR model (9.4M) | ch_ppocr_mobile_v2.0_xx | Mobile & server |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) |
-| Chinese and English general PP-OCR model (143.4M) | ch_ppocr_server_v2.0_xx | Server |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_traingit.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) |
+| Chinese and English general PP-OCR model (143.4M) | ch_ppocr_server_v2.0_xx | Server |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) |
For more model downloads (including multiple languages), please refer to [PP-OCR series model downloads](./doc/doc_en/models_list_en.md).
diff --git a/README_ch.md b/README_ch.md
index f39f3cb9acdb921773368170cae355bd9d29d4ce..bf89a62ba816cb734b8ff19a5a88ff8b48f235ce 100755
--- a/README_ch.md
+++ b/README_ch.md
@@ -99,7 +99,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力
- [PP-Structure信息提取](./ppstructure/README_ch.md)
- [版面分析](./ppstructure/layout/README_ch.md)
- [表格识别](./ppstructure/table/README_ch.md)
- - [DocVQA](./ppstructure/vqa/README_ch.md)
+ - [DocVQA](./ppstructure/vqa/README.md)
- [关键信息提取](./ppstructure/docs/kie.md)
- OCR学术圈
- [两阶段模型介绍与下载](./doc/doc_ch/algorithm_overview.md)
diff --git a/deploy/lite/readme.md b/deploy/lite/readme.md
index 29a1e165eb0a5850a3e9651dff50d094b7ff1f8e..365cb02d529bdabcb2346ed576ba3bd3b076e2db 100644
--- a/deploy/lite/readme.md
+++ b/deploy/lite/readme.md
@@ -42,7 +42,7 @@ git checkout release/v2.9
注意:编译Paddle-Lite获得预测库时,需要打开`--with_cv=ON --with_extra=ON`两个选项,`--arch`表示`arm`版本,这里指定为armv8,
更多编译命令
-介绍请参考 [链接](https://paddle-lite.readthedocs.io/zh/latest/source_compile/compile_andriod.html) 。
+介绍请参考 [链接](https://paddle-lite.readthedocs.io/zh/release-v2.10_a/source_compile/linux_x86_compile_android.html) 。
直接下载预测库并解压后,可以得到`inference_lite_lib.android.armv8/`文件夹,通过编译Paddle-Lite得到的预测库位于
`Paddle-Lite/build.lite.android.armv8.gcc/inference_lite_lib.android.armv8/`文件夹下。
diff --git a/deploy/lite/readme_en.md b/deploy/lite/readme_en.md
index 65dd10b710ed23ceb6ba01cd8f29b1bd01f1cf09..d200a615ceef391c17542d10d6812367bb9a822a 100644
--- a/deploy/lite/readme_en.md
+++ b/deploy/lite/readme_en.md
@@ -44,7 +44,7 @@ git checkout release/v2.8
Note: When compiling Paddle-Lite to obtain the Paddle-Lite library, you need to turn on the two options `--with_cv=ON --with_extra=ON`, `--arch` means the `arm` version, here is designated as armv8,
-More compilation commands refer to the introduction [link](https://paddle-lite.readthedocs.io/zh/latest/source_compile/compile_andriod.html) 。
+More compilation commands refer to the introduction [link](https://paddle-lite.readthedocs.io/zh/release-v2.10_a/source_compile/linux_x86_compile_android.html) 。
After directly downloading the Paddle-Lite library and decompressing it, you can get the `inference_lite_lib.android.armv8/` folder, and the Paddle-Lite library obtained by compiling Paddle-Lite is located
`Paddle-Lite/build.lite.android.armv8.gcc/inference_lite_lib.android.armv8/` folder.
diff --git a/deploy/pdserving/README.md b/deploy/pdserving/README.md
index c461fd5e54d3a51ad3427f83a1fca35cbe3ab2d8..37b97589c469ce434e03dd994d06a04b8bff3541 100644
--- a/deploy/pdserving/README.md
+++ b/deploy/pdserving/README.md
@@ -19,10 +19,14 @@ The introduction and tutorial of Paddle Serving service deployment framework ref
## Contents
-- [Environmental preparation](#environmental-preparation)
-- [Model conversion](#model-conversion)
-- [Paddle Serving pipeline deployment](#paddle-serving-pipeline-deployment)
-- [FAQ](#faq)
+- [OCR Pipeline WebService](#ocr-pipeline-webservice)
+- [Service deployment based on PaddleServing](#service-deployment-based-on-paddleserving)
+ - [Contents](#contents)
+ - [Environmental preparation](#environmental-preparation)
+ - [Model conversion](#model-conversion)
+ - [Paddle Serving pipeline deployment](#paddle-serving-pipeline-deployment)
+ - [WINDOWS Users](#windows-users)
+ - [FAQ](#faq)
## Environmental preparation
@@ -201,7 +205,7 @@ The recognition model is the same.
## WINDOWS Users
-Windows does not support Pipeline Serving, if we want to lauch paddle serving on Windows, we should use Web Service, for more infomation please refer to [Paddle Serving for Windows Users](https://github.com/PaddlePaddle/Serving/blob/develop/doc/WINDOWS_TUTORIAL.md)
+Windows does not support Pipeline Serving, if we want to lauch paddle serving on Windows, we should use Web Service, for more infomation please refer to [Paddle Serving for Windows Users](https://github.com/PaddlePaddle/Serving/blob/develop/doc/Windows_Tutorial_EN.md)
**WINDOWS user can only use version 0.5.0 CPU Mode**
diff --git a/doc/doc_ch/thirdparty.md b/doc/doc_ch/thirdparty.md
index e0b9797dce9bf1e3ca2769e3dc2bde6c10203950..baf7e41c1e258f560cbf1841173678ad3b2c1377 100644
--- a/doc/doc_ch/thirdparty.md
+++ b/doc/doc_ch/thirdparty.md
@@ -28,9 +28,12 @@ PaddleOCR希望可以通过AI的力量助力任何一位有梦想的开发者实
| 垂类工具 | [id_card_ocr](https://github.com/baseli/id_card_ocr) | 身份证复印件识别 | [baseli](https://github.com/baseli) |
| 垂类工具 | [Paddle_Table_Image_Reader](https://github.com/thunder95/Paddle_Table_Image_Reader) | 能看懂表格图片的数据助手 | [thunder95](https://github.com/thunder95]) |
| 垂类工具 | [AI Studio项目](https://aistudio.baidu.com/aistudio/projectdetail/3382897) | OCR流程中对手写体进行过滤 | [daassh](https://github.com/daassh) |
+| 垂类场景调优 | [AI Studio项目](https://aistudio.baidu.com/aistudio/projectdetail/2803693) | 电表读数和编号识别 | [深渊上的坑](https://github.com/edencfc) |
+| 垂类工具 | [AI Studio项目](https://aistudio.baidu.com/aistudio/projectdetail/3284199) | LCD液晶字符检测 | [Dream拒杰](https://github.com/zhangyingying520) |
| 前后处理 | [paddleOCRCorrectOutputs](https://github.com/yuranusduke/paddleOCRCorrectOutputs) | 获取OCR识别结果的key-value | [yuranusduke](https://github.com/yuranusduke) |
|前处理| [optlab](https://github.com/GreatV/optlab) |OCR前处理工具箱,基于Qt和Leptonica。|[GreatV](https://github.com/GreatV)|
|应用部署| [PaddleOCRSharp](https://github.com/raoyutian/PaddleOCRSharp) |PaddleOCR的.NET封装与应用部署。|[raoyutian](https://github.com/raoyutian/PaddleOCRSharp)|
+|应用部署| [PaddleSharp](https://github.com/sdcb/PaddleSharp) |PaddleOCR的.NET封装与应用部署,支持跨平台、GPU|[sdcb](https://github.com/sdcb)|
| 学术前沿模型训练与推理 | [AI Studio项目](https://aistudio.baidu.com/aistudio/projectdetail/3397137) | StarNet-MobileNetV3算法–中文训练 | [xiaoyangyang2](https://github.com/xiaoyangyang2) |
### 1.2 为PaddleOCR新增功能
diff --git a/doc/doc_en/models_list_en.md b/doc/doc_en/models_list_en.md
index 157b4fe31c6b55e4bffc8fb9abe1d067b9a60e51..4c02c56e03c56d9ad85789e5cbb20c0f630153b2 100644
--- a/doc/doc_en/models_list_en.md
+++ b/doc/doc_en/models_list_en.md
@@ -4,13 +4,14 @@
> 2. Compared with [models 1.1](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/models_list_en.md), which are trained with static graph programming paradigm, models 2.0 are the dynamic graph trained version and achieve close performance.
> 3. All models in this tutorial are all ppocr-series models, for more introduction of algorithms and models based on public dataset, you can refer to [algorithm overview tutorial](./algorithm_overview_en.md).
-- [1. Text Detection Model](#Detection)
-- [2. Text Recognition Model](#Recognition)
- - [2.1 Chinese Recognition Model](#Chinese)
- - [2.2 English Recognition Model](#English)
- - [2.3 Multilingual Recognition Model](#Multilingual)
-- [3. Text Angle Classification Model](#Angle)
-- [4. Paddle-Lite Model](#Paddle-Lite)
+- [OCR Model List(V2.1, updated on 2021.9.6)](#ocr-model-listv21-updated-on-202196)
+ - [1. Text Detection Model](#1-text-detection-model)
+ - [2. Text Recognition Model](#2-text-recognition-model)
+ - [2.1 Chinese Recognition Model](#21-chinese-recognition-model)
+ - [2.2 English Recognition Model](#22-english-recognition-model)
+ - [2.3 Multilingual Recognition Model(Updating...)](#23-multilingual-recognition-modelupdating)
+ - [3. Text Angle Classification Model](#3-text-angle-classification-model)
+ - [4. Paddle-Lite Model](#4-paddle-lite-model)
The downloadable models provided by PaddleOCR include `inference model`, `trained model`, `pre-trained model` and `slim model`. The differences between the models are as follows:
@@ -44,7 +45,7 @@ Relationship of the above models is as follows.
|model name|description|config|model size|download|
| --- | --- | --- | --- | --- |
|ch_PP-OCRv2_rec_slim|[New] Slim qunatization with distillation lightweight model, supporting Chinese, English, multilingual text recognition|[ch_PP-OCRv2_rec.yml](../../configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml)| 9M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_train.tar) |
-|ch_PP-OCRv2_rec|[New] Original lightweight model, supporting Chinese, English, multilingual text recognition|[ch_PP-OCRv2_rec_distillation.yml](../../configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml)|8.5M|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar) |
+|ch_PP-OCRv2_rec|[New] Original lightweight model, supporting Chinese, English, multilingual text recognition|[ch_PP-OCRv2_rec_distillation.yml](../../configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml)|8.5M|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar) |
|ch_ppocr_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)| 6M | [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_train.tar) |
|ch_ppocr_mobile_v2.0_rec|Original lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)|5.2M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar) |
|ch_ppocr_server_v2.0_rec|General model, supporting Chinese, English and number recognition|[rec_chinese_common_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml)|94.8M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) |
diff --git a/ppocr/losses/rec_ctc_loss.py b/ppocr/losses/rec_ctc_loss.py
index 063d68e30861e092e10fa3068e4b7f4755b6197f..502fc8c5227460c1e299ae2a46d464d29ddbe374 100755
--- a/ppocr/losses/rec_ctc_loss.py
+++ b/ppocr/losses/rec_ctc_loss.py
@@ -31,7 +31,8 @@ class CTCLoss(nn.Layer):
predicts = predicts[-1]
predicts = predicts.transpose((1, 0, 2))
N, B, _ = predicts.shape
- preds_lengths = paddle.to_tensor([N] * B, dtype='int64')
+ preds_lengths = paddle.to_tensor(
+ [N] * B, dtype='int64', place=paddle.CPUPlace())
labels = batch[1].astype("int32")
label_lengths = batch[2].astype('int64')
loss = self.loss_func(predicts, labels, preds_lengths, label_lengths)
diff --git a/ppstructure/README.md b/ppstructure/README.md
index 1d201a7c6e54f6ed71be6d1872b7f4b226ad35ad..b4c1ec8d828fd521601c97f9f5d0754eecd13152 100644
--- a/ppstructure/README.md
+++ b/ppstructure/README.md
@@ -1,16 +1,18 @@
English | [简体中文](README_ch.md)
-- [1. Introduction](#1)
-- [2. Update log](#2)
-- [3. Features](#3)
-- [4. Results](#4)
- * [4.1 Layout analysis and table recognition](#41)
- * [4.2 DOC-VQA](#42)
-- [5. Quick start](#5)
-- [6. PP-Structure System](#6)
- * [6.1 Layout analysis and table recognition](#61)
- * [6.2 DOC-VQA](#62)
-- [7. Model List](#7)
+- [1. Introduction](#1-introduction)
+- [2. Update log](#2-update-log)
+- [3. Features](#3-features)
+- [4. Results](#4-results)
+ - [4.1 Layout analysis and table recognition](#41-layout-analysis-and-table-recognition)
+ - [4.2 DOC-VQA](#42-doc-vqa)
+- [5. Quick start](#5-quick-start)
+- [6. PP-Structure System](#6-pp-structure-system)
+ - [6.1 Layout analysis and table recognition](#61-layout-analysis-and-table-recognition)
+ - [6.1.1 Layout analysis](#611-layout-analysis)
+ - [6.1.2 Table recognition](#612-table-recognition)
+ - [6.2 DOC-VQA](#62-doc-vqa)
+- [7. Model List](#7-model-list)
@@ -54,8 +56,8 @@ The figure shows the pipeline of layout analysis + table recognition. The image
### 4.2 DOC-VQA
* SER
-
- | 
+*
+ | 
---|---
Different colored boxes in the figure represent different categories. For xfun dataset, there are three categories: query, answer and header:
@@ -69,7 +71,7 @@ The corresponding category and OCR recognition results are also marked at the to
* RE
- | 
+ | 
---|---
@@ -96,7 +98,7 @@ In PP-Structure, the image will be divided into 5 types of areas **text, title,
#### 6.1.1 Layout analysis
-Layout analysis classifies image by region, including the use of Python scripts of layout analysis tools, extraction of designated category detection boxes, performance indicators, and custom training layout analysis models. For details, please refer to [document](layout/README_en.md).
+Layout analysis classifies image by region, including the use of Python scripts of layout analysis tools, extraction of designated category detection boxes, performance indicators, and custom training layout analysis models. For details, please refer to [document](layout/README.md).
#### 6.1.2 Table recognition
diff --git a/ppstructure/README_ch.md b/ppstructure/README_ch.md
index 808a5c68d18df625bedeae4706da7f985d6caecd..a449028dff29739e621bfa2aa77eac63b43e6c84 100644
--- a/ppstructure/README_ch.md
+++ b/ppstructure/README_ch.md
@@ -1,16 +1,18 @@
[English](README.md) | 简体中文
-- [1. 简介](#1)
-- [2. 近期更新](#2)
-- [3. 特性](#3)
-- [4. 效果展示](#4)
- * [4.1 版面分析和表格识别](#41)
- * [4.2 DOC-VQA](#42)
-- [5. 快速体验](#5)
-- [6. PP-Structure 介绍](#6)
- * [6.1 版面分析+表格识别](#61)
- * [6.2 DOC-VQA](#62)
-- [7. 模型库](#7)
+- [1. 简介](#1-简介)
+- [2. 近期更新](#2-近期更新)
+- [3. 特性](#3-特性)
+- [4. 效果展示](#4-效果展示)
+ - [4.1 版面分析和表格识别](#41-版面分析和表格识别)
+ - [4.2 DOC-VQA](#42-doc-vqa)
+- [5. 快速体验](#5-快速体验)
+- [6. PP-Structure 介绍](#6-pp-structure-介绍)
+ - [6.1 版面分析+表格识别](#61-版面分析表格识别)
+ - [6.1.1 版面分析](#611-版面分析)
+ - [6.1.2 表格识别](#612-表格识别)
+ - [6.2 DOC-VQA](#62-doc-vqa)
+- [7. 模型库](#7-模型库)
@@ -54,7 +56,7 @@ PP-Structure的主要特性如下:
* SER
- | 
+ | 
---|---
图中不同颜色的框表示不同的类别,对于XFUN数据集,有`QUESTION`, `ANSWER`, `HEADER` 3种类别
@@ -67,7 +69,7 @@ PP-Structure的主要特性如下:
* RE
- | 
+ | 
---|---
@@ -134,4 +136,4 @@ PP-Structure系列模型列表(更新中)
|PP-Layout_v1.0_re_pretrained|基于LayoutXLM在xfun中文数据集上训练的RE模型|1.4G|[推理模型 coming soon]() / [训练模型](https://paddleocr.bj.bcebos.com/pplayout/PP-Layout_v1.0_re_pretrained.tar) |
-更多模型下载,可以参考 [PPOCR model_list](../doc/doc_en/models_list.md) and [PPStructure model_list](./docs/model_list.md)
\ No newline at end of file
+更多模型下载,可以参考 [PPOCR model_list](../doc/doc_en/models_list.md) and [PPStructure model_list](./docs/model_list.md)
diff --git a/test_tipc/benchmark_train.sh b/test_tipc/benchmark_train.sh
new file mode 100644
index 0000000000000000000000000000000000000000..fc49cbb3e69771efb49b39c45cd627a314205360
--- /dev/null
+++ b/test_tipc/benchmark_train.sh
@@ -0,0 +1,254 @@
+#!/bin/bash
+source test_tipc/common_func.sh
+
+# set env
+python=python
+export model_branch=`git symbolic-ref HEAD 2>/dev/null | cut -d"/" -f 3`
+export model_commit=$(git log|head -n1|awk '{print $2}')
+export str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`)
+export frame_version=${str_tmp%%.post*}
+export frame_commit=$(echo `${python} -c "import paddle;print(paddle.version.commit)"`)
+
+# run benchmark sh
+# Usage:
+# bash run_benchmark_train.sh config.txt params
+# or
+# bash run_benchmark_train.sh config.txt
+
+function func_parser_params(){
+ strs=$1
+ IFS="="
+ array=(${strs})
+ tmp=${array[1]}
+ echo ${tmp}
+}
+
+function func_sed_params(){
+ filename=$1
+ line=$2
+ param_value=$3
+ params=`sed -n "${line}p" $filename`
+ IFS=":"
+ array=(${params})
+ key=${array[0]}
+ value=${array[1]}
+
+ new_params="${key}:${param_value}"
+ IFS=";"
+ cmd="sed -i '${line}s/.*/${new_params}/' '${filename}'"
+ eval $cmd
+}
+
+function set_gpu_id(){
+ string=$1
+ _str=${string:1:6}
+ IFS="C"
+ arr=(${_str})
+ M=${arr[0]}
+ P=${arr[1]}
+ gn=`expr $P - 1`
+ gpu_num=`expr $gn / $M`
+ seq=`seq -s "," 0 $gpu_num`
+ echo $seq
+}
+
+function get_repo_name(){
+ IFS=";"
+ cur_dir=$(pwd)
+ IFS="/"
+ arr=(${cur_dir})
+ echo ${arr[-1]}
+}
+
+FILENAME=$1
+# copy FILENAME as new
+new_filename="./test_tipc/benchmark_train.txt"
+cmd=`yes|cp $FILENAME $new_filename`
+FILENAME=$new_filename
+# MODE must be one of ['benchmark_train']
+MODE=$2
+PARAMS=$3
+# bash test_tipc/benchmark_train.sh test_tipc/configs/det_mv3_db_v2_0/train_benchmark.txt benchmark_train dynamic_bs8_null_DP_N1C1
+IFS=$'\n'
+# parser params from train_benchmark.txt
+dataline=`cat $FILENAME`
+# parser params
+IFS=$'\n'
+lines=(${dataline})
+model_name=$(func_parser_value "${lines[1]}")
+
+# 获取benchmark_params所在的行数
+line_num=`grep -n "train_benchmark_params" $FILENAME | cut -d ":" -f 1`
+# for train log parser
+batch_size=$(func_parser_value "${lines[line_num]}")
+line_num=`expr $line_num + 1`
+fp_items=$(func_parser_value "${lines[line_num]}")
+line_num=`expr $line_num + 1`
+epoch=$(func_parser_value "${lines[line_num]}")
+
+line_num=`expr $line_num + 1`
+profile_option_key=$(func_parser_key "${lines[line_num]}")
+profile_option_params=$(func_parser_value "${lines[line_num]}")
+profile_option="${profile_option_key}:${profile_option_params}"
+
+line_num=`expr $line_num + 1`
+flags_value=$(func_parser_value "${lines[line_num]}")
+# set flags
+IFS=";"
+flags_list=(${flags_value})
+for _flag in ${flags_list[*]}; do
+ cmd="export ${_flag}"
+ eval $cmd
+done
+
+# set log_name
+repo_name=$(get_repo_name )
+SAVE_LOG=${BENCHMARK_LOG_DIR:-$(pwd)} # */benchmark_log
+mkdir -p "${SAVE_LOG}/benchmark_log/"
+status_log="${SAVE_LOG}/benchmark_log/results.log"
+
+# The number of lines in which train params can be replaced.
+line_python=3
+line_gpuid=4
+line_precision=6
+line_epoch=7
+line_batchsize=9
+line_profile=13
+line_eval_py=24
+line_export_py=30
+
+func_sed_params "$FILENAME" "${line_eval_py}" "null"
+func_sed_params "$FILENAME" "${line_export_py}" "null"
+func_sed_params "$FILENAME" "${line_python}" "$python"
+
+# if params
+if [ ! -n "$PARAMS" ] ;then
+ # PARAMS input is not a word.
+ IFS="|"
+ batch_size_list=(${batch_size})
+ fp_items_list=(${fp_items})
+ device_num_list=(N1C4)
+ run_mode="DP"
+else
+ # parser params from input: modeltype_bs${bs_item}_${fp_item}_${run_mode}_${device_num}
+ IFS="_"
+ params_list=(${PARAMS})
+ model_type=${params_list[0]}
+ batch_size=${params_list[1]}
+ batch_size=`echo ${batch_size} | tr -cd "[0-9]" `
+ precision=${params_list[2]}
+ # run_process_type=${params_list[3]}
+ run_mode=${params_list[3]}
+ device_num=${params_list[4]}
+ IFS=";"
+
+ if [ ${precision} = "null" ];then
+ precision="fp32"
+ fi
+
+ fp_items_list=($precision)
+ batch_size_list=($batch_size)
+ device_num_list=($device_num)
+fi
+
+IFS="|"
+for batch_size in ${batch_size_list[*]}; do
+ for precision in ${fp_items_list[*]}; do
+ for device_num in ${device_num_list[*]}; do
+ # sed batchsize and precision
+ func_sed_params "$FILENAME" "${line_precision}" "$precision"
+ func_sed_params "$FILENAME" "${line_batchsize}" "$MODE=$batch_size"
+ func_sed_params "$FILENAME" "${line_epoch}" "$MODE=$epoch"
+ gpu_id=$(set_gpu_id $device_num)
+
+ if [ ${#gpu_id} -le 1 ];then
+ run_process_type="SingleP"
+ log_path="$SAVE_LOG/profiling_log"
+ mkdir -p $log_path
+ log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_profiling"
+ func_sed_params "$FILENAME" "${line_gpuid}" "0" # sed used gpu_id
+ # set profile_option params
+ tmp=`sed -i "${line_profile}s/.*/${profile_option}/" "${FILENAME}"`
+
+ # run test_train_inference_python.sh
+ cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
+ echo $cmd
+ eval $cmd
+ eval "cat ${log_path}/${log_name}"
+
+ # without profile
+ log_path="$SAVE_LOG/train_log"
+ speed_log_path="$SAVE_LOG/index"
+ mkdir -p $log_path
+ mkdir -p $speed_log_path
+ log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
+ speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
+ func_sed_params "$FILENAME" "${line_profile}" "null" # sed profile_id as null
+ cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
+ echo $cmd
+ job_bt=`date '+%Y%m%d%H%M%S'`
+ eval $cmd
+ job_et=`date '+%Y%m%d%H%M%S'`
+ export model_run_time=$((${job_et}-${job_bt}))
+ eval "cat ${log_path}/${log_name}"
+
+ # parser log
+ _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
+ cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
+ --speed_log_file '${speed_log_path}/${speed_log_name}' \
+ --model_name ${_model_name} \
+ --base_batch_size ${batch_size} \
+ --run_mode ${run_mode} \
+ --run_process_type ${run_process_type} \
+ --fp_item ${precision} \
+ --keyword ips: \
+ --skip_steps 2 \
+ --device_num ${device_num} \
+ --speed_unit samples/s \
+ --convergence_key loss: "
+ echo $cmd
+ eval $cmd
+ last_status=${PIPESTATUS[0]}
+ status_check $last_status "${cmd}" "${status_log}"
+ else
+ IFS=";"
+ unset_env=`unset CUDA_VISIBLE_DEVICES`
+ run_process_type="MultiP"
+ log_path="$SAVE_LOG/train_log"
+ speed_log_path="$SAVE_LOG/index"
+ mkdir -p $log_path
+ mkdir -p $speed_log_path
+ log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
+ speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
+ func_sed_params "$FILENAME" "${line_gpuid}" "$gpu_id" # sed used gpu_id
+ func_sed_params "$FILENAME" "${line_profile}" "null" # sed --profile_option as null
+ cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
+ echo $cmd
+ job_bt=`date '+%Y%m%d%H%M%S'`
+ eval $cmd
+ job_et=`date '+%Y%m%d%H%M%S'`
+ export model_run_time=$((${job_et}-${job_bt}))
+ eval "cat ${log_path}/${log_name}"
+ # parser log
+ _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
+
+ cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
+ --speed_log_file '${speed_log_path}/${speed_log_name}' \
+ --model_name ${_model_name} \
+ --base_batch_size ${batch_size} \
+ --run_mode ${run_mode} \
+ --run_process_type ${run_process_type} \
+ --fp_item ${precision} \
+ --keyword ips: \
+ --skip_steps 2 \
+ --device_num ${device_num} \
+ --speed_unit images/s \
+ --convergence_key loss: "
+ echo $cmd
+ eval $cmd
+ last_status=${PIPESTATUS[0]}
+ status_check $last_status "${cmd}" "${status_log}"
+ fi
+ done
+ done
+done
diff --git a/test_tipc/configs/det_mv3_db_v2.0/train_infer_python.txt b/test_tipc/configs/det_mv3_db_v2_0/train_infer_python.txt
similarity index 80%
rename from test_tipc/configs/det_mv3_db_v2.0/train_infer_python.txt
rename to test_tipc/configs/det_mv3_db_v2_0/train_infer_python.txt
index 09168c99ed98ce0edf0ad7bcd59b5ee634c8b737..04bdeec49dbb5a188320b6d1bc9d61a8863363aa 100644
--- a/test_tipc/configs/det_mv3_db_v2.0/train_infer_python.txt
+++ b/test_tipc/configs/det_mv3_db_v2_0/train_infer_python.txt
@@ -1,5 +1,5 @@
===========================train_params===========================
-model_name:det_mv3_db_v2.0
+model_name:det_mv3_db_v2_0
python:python3.7
gpu_list:0|0,1
Global.use_gpu:True|True
@@ -48,4 +48,10 @@ inference:tools/infer/predict_det.py
--image_dir:./inference/ch_det_data_50/all-sum-510/
null:null
--benchmark:True
-null:null
\ No newline at end of file
+null:null
+===========================train_benchmark_params==========================
+batch_size:8|16
+fp_items:fp32|fp16
+epoch:2
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
\ No newline at end of file
diff --git a/test_tipc/configs/det_r18_vd_v2_0/train_infer_python.txt b/test_tipc/configs/det_r18_vd_v2_0/train_infer_python.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b96d8438b5b01c362d2ea13c0425ebc1beb6e82
--- /dev/null
+++ b/test_tipc/configs/det_r18_vd_v2_0/train_infer_python.txt
@@ -0,0 +1,56 @@
+===========================train_params===========================
+model_name:det_r18_db_v2_0
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:null
+Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_lite_infer=4
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
+null:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c configs/det/det_res18_db_v2.0.yml -o
+quant_export:null
+fpgm_export:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:null
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:null
+quant_export:null
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+##
+train_model:null
+infer_export:null
+infer_quant:False
+inference:tools/infer/predict_det.py
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1
+--use_tensorrt:False|True
+--precision:fp32|fp16|int8
+--det_model_dir:
+--image_dir:./inference/ch_det_data_50/all-sum-510/
+--save_log_path:null
+--benchmark:True
+null:null
+===========================train_benchmark_params==========================
+batch_size:8|16
+fp_items:fp32|fp16
+epoch:2
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
diff --git a/test_tipc/configs/det_r50_vd_east_v2.0/det_r50_vd_east.yml b/test_tipc/configs/det_r50_vd_east_v2_0/det_r50_vd_east.yml
similarity index 100%
rename from test_tipc/configs/det_r50_vd_east_v2.0/det_r50_vd_east.yml
rename to test_tipc/configs/det_r50_vd_east_v2_0/det_r50_vd_east.yml
diff --git a/test_tipc/configs/det_r50_vd_east_v2.0/train_infer_python.txt b/test_tipc/configs/det_r50_vd_east_v2_0/train_infer_python.txt
similarity index 80%
rename from test_tipc/configs/det_r50_vd_east_v2.0/train_infer_python.txt
rename to test_tipc/configs/det_r50_vd_east_v2_0/train_infer_python.txt
index c7c8d0891858ebc0ccb024f1cb650996fc83b438..bc149ac8c3e0aa9041578c32ff4c4e192a1aa5b7 100644
--- a/test_tipc/configs/det_r50_vd_east_v2.0/train_infer_python.txt
+++ b/test_tipc/configs/det_r50_vd_east_v2_0/train_infer_python.txt
@@ -1,5 +1,5 @@
===========================train_params===========================
-model_name:det_r50_vd_east_v2.0
+model_name:det_r50_vd_east_v2_0
python:python3.7
gpu_list:0
Global.use_gpu:True|True
@@ -13,7 +13,7 @@ train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
null:null
##
trainer:norm_train
-norm_train:tools/train.py -c test_tipc/configs/det_r50_vd_east_v2.0/det_r50_vd_east.yml -o
+norm_train:tools/train.py -c test_tipc/configs/det_r50_vd_east_v2_0/det_r50_vd_east.yml -o
pact_train:null
fpgm_train:null
distill_train:null
@@ -27,7 +27,7 @@ null:null
===========================infer_params===========================
Global.save_inference_dir:./output/
Global.checkpoints:
-norm_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_east_v2.0/det_r50_vd_east.yml -o
+norm_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_east_v2_0/det_r50_vd_east.yml -o
quant_export:null
fpgm_export:null
distill_export:null
@@ -35,7 +35,7 @@ export1:null
export2:null
##
train_model:./inference/det_r50_vd_east_v2.0_train/best_accuracy
-infer_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_east_v2.0/det_r50_vd_east.yml -o
+infer_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_east_v2_0/det_r50_vd_east.yml -o
infer_quant:False
inference:tools/infer/predict_det.py
--use_gpu:True|False
@@ -49,3 +49,8 @@ inference:tools/infer/predict_det.py
--save_log_path:null
--benchmark:True
--det_algorithm:EAST
+===========================train_benchmark_params==========================
+batch_size:8
+fp_items:fp32|fp16
+epoch:2
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
\ No newline at end of file
diff --git a/test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml b/test_tipc/configs/det_r50_vd_pse_v2_0/det_r50_vd_pse.yml
similarity index 100%
rename from test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml
rename to test_tipc/configs/det_r50_vd_pse_v2_0/det_r50_vd_pse.yml
diff --git a/test_tipc/configs/det_r50_vd_pse_v2.0/train_infer_python.txt b/test_tipc/configs/det_r50_vd_pse_v2_0/train_infer_python.txt
similarity index 80%
rename from test_tipc/configs/det_r50_vd_pse_v2.0/train_infer_python.txt
rename to test_tipc/configs/det_r50_vd_pse_v2_0/train_infer_python.txt
index 6eb351e0367e4dc65d643fdb0a6d20f3475ac7b6..47e0d0e494c32045dafe90b771c522d695ef89da 100644
--- a/test_tipc/configs/det_r50_vd_pse_v2.0/train_infer_python.txt
+++ b/test_tipc/configs/det_r50_vd_pse_v2_0/train_infer_python.txt
@@ -1,5 +1,5 @@
===========================train_params===========================
-model_name:det_r50_vd_pse_v2.0
+model_name:det_r50_vd_pse_v2_0
python:python3.7
gpu_list:0
Global.use_gpu:True|True
@@ -13,7 +13,7 @@ train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
null:null
##
trainer:norm_train
-norm_train:tools/train.py -c test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml -o
+norm_train:tools/train.py -c test_tipc/configs/det_r50_vd_pse_v2_0/det_r50_vd_pse.yml -o
pact_train:null
fpgm_train:null
distill_train:null
@@ -27,7 +27,7 @@ null:null
===========================infer_params===========================
Global.save_inference_dir:./output/
Global.checkpoints:
-norm_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml -o
+norm_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_pse_v2_0/det_r50_vd_pse.yml -o
quant_export:null
fpgm_export:null
distill_export:null
@@ -35,7 +35,7 @@ export1:null
export2:null
##
train_model:./inference/det_r50_vd_pse_v2.0_train/best_accuracy
-infer_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml -o
+infer_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_pse_v2_0/det_r50_vd_pse.yml -o
infer_quant:False
inference:tools/infer/predict_det.py
--use_gpu:True|False
@@ -49,3 +49,8 @@ inference:tools/infer/predict_det.py
--save_log_path:null
--benchmark:True
--det_algorithm:PSE
+===========================train_benchmark_params==========================
+batch_size:8
+fp_items:fp32|fp16
+epoch:2
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
diff --git a/test_tipc/docs/benchmark_train.md b/test_tipc/docs/benchmark_train.md
new file mode 100644
index 0000000000000000000000000000000000000000..3f846574ff75c8602ba1222977362c143582f560
--- /dev/null
+++ b/test_tipc/docs/benchmark_train.md
@@ -0,0 +1,53 @@
+
+# TIPC Linux端Benchmark测试文档
+
+该文档为Benchmark测试说明,Benchmark预测功能测试的主程序为`benchmark_train.sh`,用于验证监控模型训练的性能。
+
+# 1. 测试流程
+## 1.1 准备数据和环境安装
+运行`test_tipc/prepare.sh`,完成训练数据准备和安装环境流程。
+
+```shell
+# 运行格式:bash test_tipc/prepare.sh train_benchmark.txt mode
+bash test_tipc/prepare.sh test_tipc/configs/det_mv3_db_v2_0/train_benchmark.txt benchmark_train
+```
+
+## 1.2 功能测试
+执行`test_tipc/benchmark_train.sh`,完成模型训练和日志解析
+
+```shell
+# 运行格式:bash test_tipc/benchmark_train.sh train_benchmark.txt mode
+bash test_tipc/benchmark_train.sh test_tipc/configs/det_mv3_db_v2_0/train_infer_python.txt benchmark_train
+
+```
+
+`test_tipc/benchmark_train.sh`支持根据传入的第三个参数实现只运行某一个训练配置,如下:
+```shell
+# 运行格式:bash test_tipc/benchmark_train.sh train_benchmark.txt mode
+bash test_tipc/benchmark_train.sh test_tipc/configs/det_mv3_db_v2_0/train_infer_python.txt benchmark_train dynamic_bs8_fp32_DP_N1C1
+```
+dynamic_bs8_fp32_DP_N1C1为test_tipc/benchmark_train.sh传入的参数,格式如下:
+`${modeltype}_${batch_size}_${fp_item}_${run_mode}_${device_num}`
+包含的信息有:模型类型、batchsize大小、训练精度如fp32,fp16等、分布式运行模式以及分布式训练使用的机器信息如单机单卡(N1C1)。
+
+
+## 2. 日志输出
+
+运行后将保存模型的训练日志和解析日志,使用 `test_tipc/configs/det_mv3_db_v2_0/train_benchmark.txt` 参数文件的训练日志解析结果是:
+
+```
+{"model_branch": "dygaph", "model_commit": "7c39a1996b19087737c05d883fd346d2f39dbcc0", "model_name": "det_mv3_db_v2_0_bs8_fp32_SingleP_DP", "batch_size": 8, "fp_item": "fp32", "run_process_type": "SingleP", "run_mode": "DP", "convergence_value": "5.413110", "convergence_key": "loss:", "ips": 19.333, "speed_unit": "samples/s", "device_num": "N1C1", "model_run_time": "0", "frame_commit": "8cc09552473b842c651ead3b9848d41827a3dbab", "frame_version": "0.0.0"}
+```
+
+训练日志和日志解析结果保存在benchmark_log目录下,文件组织格式如下:
+```
+train_log/
+├── index
+│ ├── PaddleOCR_det_mv3_db_v2_0_bs8_fp32_SingleP_DP_N1C1_speed
+│ └── PaddleOCR_det_mv3_db_v2_0_bs8_fp32_SingleP_DP_N1C4_speed
+├── profiling_log
+│ └── PaddleOCR_det_mv3_db_v2_0_bs8_fp32_SingleP_DP_N1C1_profiling
+└── train_log
+ ├── PaddleOCR_det_mv3_db_v2_0_bs8_fp32_SingleP_DP_N1C1_log
+ └── PaddleOCR_det_mv3_db_v2_0_bs8_fp32_SingleP_DP_N1C4_log
+```
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index 570c6832e7a6682f634d7ab7538a228256446372..158b8cb8bc25dd1e33e8c7b8d3a8bb76f9ad7624 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -20,6 +20,15 @@ model_name=$(func_parser_value "${lines[1]}")
trainer_list=$(func_parser_value "${lines[14]}")
+if [ ${MODE} = "benchmark_train" ];then
+ pip install -r requirements.txt
+ if [[ ${model_name} =~ "det_mv3_db_v2_0" || ${model_name} =~ "det_r50_vd_east_v2_0" || ${model_name} =~ "det_r50_vd_pse_v2_0" || ${model_name} =~ "det_r18_db_v2_0" ]];then
+ rm -rf ./train_data/icdar2015
+ wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate
+ wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar --no-check-certificate
+ cd ./train_data/ && tar xf icdar2015.tar && cd ../
+ fi
+fi
if [ ${MODE} = "lite_train_lite_infer" ];then
# pretrain lite train data
@@ -52,7 +61,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/total_text_lite.tar --no-check-certificate
cd ./train_data && tar xf total_text_lite.tar && ln -s total_text_lite total_text && cd ../
fi
- if [ ${model_name} == "det_mv3_db_v2.0" ]; then
+ if [ ${model_name} == "det_mv3_db_v2_0" ]; then
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar --no-check-certificate
cd ./inference/ && tar xf det_mv3_db_v2.0_train.tar && cd ../
fi
@@ -211,7 +220,7 @@ elif [ ${MODE} = "whole_infer" ];then
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_totaltext_v2.0_train.tar --no-check-certificate
cd ./inference/ && tar xf det_r50_vd_sast_totaltext_v2.0_train.tar && cd ../
fi
- if [ ${model_name} == "det_mv3_db_v2.0" ]; then
+ if [ ${model_name} == "det_mv3_db_v2_0" ]; then
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar --no-check-certificate
cd ./inference/ && tar xf det_mv3_db_v2.0_train.tar && tar xf ch_det_data_50.tar && cd ../
fi
@@ -223,7 +232,7 @@ elif [ ${MODE} = "whole_infer" ];then
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/det_mv3_pse_v2.0_train.tar --no-check-certificate
cd ./inference/ && tar xf det_mv3_pse_v2.0_train.tar & cd ../
fi
- if [ ${model_name} == "det_r50_vd_pse_v2.0" ]; then
+ if [ ${model_name} == "det_r50_vd_pse_v2_0" ]; then
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/det_r50_vd_pse_v2.0_train.tar --no-check-certificate
cd ./inference/ && tar xf det_r50_vd_pse_v2.0_train.tar & cd ../
fi
@@ -231,7 +240,7 @@ elif [ ${MODE} = "whole_infer" ];then
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar --no-check-certificate
cd ./inference/ && tar xf det_mv3_east_v2.0_train.tar & cd ../
fi
- if [ ${model_name} == "det_r50_vd_east_v2.0" ]; then
+ if [ ${model_name} == "det_r50_vd_east_v2_0" ]; then
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar --no-check-certificate
cd ./inference/ && tar xf det_r50_vd_east_v2.0_train.tar & cd ../
fi
diff --git a/test_tipc/supplementary/readme.md b/test_tipc/supplementary/readme.md
index b630b0f30b23b71c0dd21def2a45fee01023fe82..a378fc5f357d0deb54d5a2de93d8ca6de034fa24 100644
--- a/test_tipc/supplementary/readme.md
+++ b/test_tipc/supplementary/readme.md
@@ -47,6 +47,13 @@ bash test_tipc/test_train_python.sh ./test_tipc/train_infer_python_PACT.txt 'lit
bash test_tipc/test_train_python.sh ./test_tipc/train_infer_python_FPGM.txt 'lite_train_lite_infer'
```
+多机多卡的运行配置文件分别为 `train_infer_python_fleet.txt`, `train_infer_python_FPGM_fleet.txt` 和 `train_infer_python_PACT_fleet.txt`。
+运行时,需要修改配置文件中的 `gpu_list:xx.xx.xx.xx,yy.yy.yy.yy;0,1`。 将 `xx.xx.xx.xx` 替换为具体的 `ip` 地址,各个`ip`地址之间用`,`分隔。 另外,和单机训练
+不同,启动多机多卡训练需要在多机的每个节点上分别运行命令。以多机多卡量化训练为例,指令如下:
+```
+bash test_tipc/test_train_python.sh ./test_tipc/train_infer_python_PACT_fleet.txt 'lite_train_lite_infer'
+```
+
运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如'lite_train_lite_infer'模式运行后,在test_tipc/extra_output文件夹有以下文件:
```
diff --git a/test_tipc/supplementary/test_tipc/test_train_python.sh b/test_tipc/supplementary/test_tipc/test_train_python.sh
index f922b57bba7de97d3631524c6f1bd1fac7395e76..ed709c1c4be886d8101e50108ad02714874ea14f 100644
--- a/test_tipc/supplementary/test_tipc/test_train_python.sh
+++ b/test_tipc/supplementary/test_tipc/test_train_python.sh
@@ -35,7 +35,6 @@ use_share_conv_key=$(func_parser_key "${lines[13]}")
use_share_conv_list=$(func_parser_value "${lines[13]}")
run_train_py=$(func_parser_value "${lines[14]}")
-
LOG_PATH="./test_tipc/extra_output"
mkdir -p ${LOG_PATH}
status_log="${LOG_PATH}/results_python.log"
@@ -98,6 +97,8 @@ if [ ${MODE} = "lite_train_lite_infer" ] || [ ${MODE} = "whole_train_whole_infer
cmd="${python} ${run_train_py} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_checkpoints} ${set_autocast} ${set_batchsize} ${set_use_custom_op} ${set_model_type} ${set_use_share_conv} ${set_amp_config}"
elif [ ${#ips} -le 26 ];then # train with multi-gpu
cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train_py} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_checkpoints} ${set_autocast} ${set_batchsize} ${set_use_custom_op} ${set_model_type} ${set_use_share_conv} ${set_amp_config}"
+ else
+ cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train_py} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_checkpoints} ${set_autocast} ${set_batchsize} ${set_use_custom_op} ${set_model_type} ${set_use_share_conv} ${set_amp_config}"
fi
# run train
diff --git a/test_tipc/supplementary/test_tipc/train_infer_python_FPGM.txt b/test_tipc/supplementary/test_tipc/train_infer_python_FPGM.txt
index 4c2e28b91e24b34d1bded93cddebe83e0874ae29..ccbd27ffbcb11a0b70f480738186fadf6fc09ded 100644
--- a/test_tipc/supplementary/test_tipc/train_infer_python_FPGM.txt
+++ b/test_tipc/supplementary/test_tipc/train_infer_python_FPGM.txt
@@ -4,9 +4,9 @@ python:python3.7
gpu_list:0|0,1
use_gpu:True|True
AMP.use_amp:True|False
-epoch:lite_train_lite_infer=20|whole_train_whole_infer=1000
+epoch:lite_train_lite_infer=2|whole_train_whole_infer=1000
save_model_dir:./output/
-TRAIN.batch_size:lite_train_lite_infer=2|whole_train_whole_infer=4
+TRAIN.batch_size:lite_train_lite_infer=1280|whole_train_whole_infer=1280
pretrained_model:null
checkpoints:null
use_custom_relu:False|True
diff --git a/test_tipc/supplementary/test_tipc/train_infer_python_FPGM_fleet.txt b/test_tipc/supplementary/test_tipc/train_infer_python_FPGM_fleet.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be2b2117d732816bb4f2f037e27a866eb8e58f19
--- /dev/null
+++ b/test_tipc/supplementary/test_tipc/train_infer_python_FPGM_fleet.txt
@@ -0,0 +1,17 @@
+===========================train_params===========================
+model_name:ch_PPOCRv2_det
+python:python3.7
+gpu_list:xx.xx.xx.xx,yy.yy.yy.yy;0,1
+use_gpu:True
+AMP.use_amp:True|False
+epoch:lite_train_lite_infer=2|whole_train_whole_infer=1000
+save_model_dir:./output/
+TRAIN.batch_size:lite_train_lite_infer=1280|whole_train_whole_infer=1280
+pretrained_model:null
+checkpoints:null
+use_custom_relu:False|True
+model_type:cls|cls_distill|cls_distill_multiopt
+MODEL.siamese:False|True
+norm_train:train.py -c mv3_large_x0_5.yml -o prune_train=True
+quant_train:False
+prune_train:False
diff --git a/test_tipc/supplementary/test_tipc/train_infer_python_PACT.txt b/test_tipc/supplementary/test_tipc/train_infer_python_PACT.txt
index 079cddf878712b2ba3af3a19f97be3bb5a0896da..24d291b4b3b49ab90fcb2eb3fd2b5ae2ece226e9 100644
--- a/test_tipc/supplementary/test_tipc/train_infer_python_PACT.txt
+++ b/test_tipc/supplementary/test_tipc/train_infer_python_PACT.txt
@@ -4,9 +4,9 @@ python:python3.7
gpu_list:0|0,1
use_gpu:True|True
AMP.use_amp:True|False
-epoch:lite_train_lite_infer=20|whole_train_whole_infer=1000
+epoch:lite_train_lite_infer=2|whole_train_whole_infer=1000
save_model_dir:./output/
-TRAIN.batch_size:lite_train_lite_infer=2|whole_train_whole_infer=4
+TRAIN.batch_size:lite_train_lite_infer=1280|whole_train_whole_infer=1280
pretrained_model:null
checkpoints:null
use_custom_relu:False|True
diff --git a/test_tipc/supplementary/test_tipc/train_infer_python_PACT_fleet.txt b/test_tipc/supplementary/test_tipc/train_infer_python_PACT_fleet.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93f06d76336efd1ea7fb94fbb0263e569760086f
--- /dev/null
+++ b/test_tipc/supplementary/test_tipc/train_infer_python_PACT_fleet.txt
@@ -0,0 +1,17 @@
+===========================train_params===========================
+model_name:ch_PPOCRv2_det
+python:python3.7
+gpu_list:xx.xx.xx.xx,yy.yy.yy.yy;0,1
+use_gpu:True
+AMP.use_amp:True|False
+epoch:lite_train_lite_infer=2|whole_train_whole_infer=1000
+save_model_dir:./output/
+TRAIN.batch_size:lite_train_lite_infer=1280|whole_train_whole_infer=1280
+pretrained_model:null
+checkpoints:null
+use_custom_relu:False|True
+model_type:cls|cls_distill|cls_distill_multiopt
+MODEL.siamese:False|True
+norm_train:train.py -c mv3_large_x0_5.yml -o quant_train=True
+quant_train:False
+prune_train:False
diff --git a/test_tipc/supplementary/test_tipc/train_infer_python_fleet.txt b/test_tipc/supplementary/test_tipc/train_infer_python_fleet.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00b9e8234bc5140188077f0b447d706603f612b7
--- /dev/null
+++ b/test_tipc/supplementary/test_tipc/train_infer_python_fleet.txt
@@ -0,0 +1,17 @@
+===========================train_params===========================
+model_name:ch_PPOCRv2_det
+python:python3.7
+gpu_list:xx.xx.xx.xx,yy.yy.yy.yy;0,1
+use_gpu:True
+AMP.use_amp:True|False
+epoch:lite_train_lite_infer=2|whole_train_whole_infer=1000
+save_model_dir:./output/
+TRAIN.batch_size:lite_train_lite_infer=1280|whole_train_whole_infer=1280
+pretrained_model:null
+checkpoints:null
+use_custom_relu:False|True
+model_type:cls|cls_distill|cls_distill_multiopt
+MODEL.siamese:False|True
+norm_train: train.py -c mv3_large_x0_5.yml -o
+quant_train:False
+prune_train:False
diff --git a/tools/infer/predict_system.py b/tools/infer/predict_system.py
index 16789b81cd0364af91f15a4a90ddd614a3f87611..b4e316d6a5edf464abd846ad2129f5373fc2a36f 100755
--- a/tools/infer/predict_system.py
+++ b/tools/infer/predict_system.py
@@ -24,6 +24,7 @@ os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
import cv2
import copy
import numpy as np
+import json
import time
import logging
from PIL import Image
@@ -128,6 +129,9 @@ def main(args):
is_visualize = True
font_path = args.vis_font_path
drop_score = args.drop_score
+ draw_img_save_dir = args.draw_img_save_dir
+ os.makedirs(draw_img_save_dir, exist_ok=True)
+ save_results = []
# warm up 10 times
if args.warmup:
@@ -157,6 +161,14 @@ def main(args):
for text, score in rec_res:
logger.debug("{}, {:.3f}".format(text, score))
+ res = [{
+ "transcription": rec_res[idx][0],
+ "points": np.array(dt_boxes[idx]).astype(np.int32).tolist(),
+ } for idx in range(len(dt_boxes))]
+ save_pred = os.path.basename(image_file) + "\t" + json.dumps(
+ res, ensure_ascii=False) + "\n"
+ save_results.append(save_pred)
+
if is_visualize:
image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
boxes = dt_boxes
@@ -170,8 +182,6 @@ def main(args):
scores,
drop_score=drop_score,
font_path=font_path)
- draw_img_save_dir = args.draw_img_save_dir
- os.makedirs(draw_img_save_dir, exist_ok=True)
if flag:
image_file = image_file[:-3] + "png"
cv2.imwrite(
@@ -185,6 +195,9 @@ def main(args):
text_sys.text_detector.autolog.report()
text_sys.text_recognizer.autolog.report()
+ with open(os.path.join(draw_img_save_dir, "system_results.txt"), 'w') as f:
+ f.writelines(save_results)
+
if __name__ == "__main__":
args = utility.parse_args()
diff --git a/tools/program.py b/tools/program.py
index 5ffb93d14c073c1ac5386dca94920bbd5acc0b83..c5b0e69b2d7256a1efe6b13efeea265cfcb3f5df 100755
--- a/tools/program.py
+++ b/tools/program.py
@@ -146,6 +146,7 @@ def train(config,
scaler=None):
cal_metric_during_train = config['Global'].get('cal_metric_during_train',
False)
+ calc_epoch_interval = config['Global'].get('calc_epoch_interval', 1)
log_smooth_window = config['Global']['log_smooth_window']
epoch_num = config['Global']['epoch_num']
print_batch_step = config['Global']['print_batch_step']
@@ -244,6 +245,16 @@ def train(config,
optimizer.step()
optimizer.clear_grad()
+ if cal_metric_during_train and epoch % calc_epoch_interval == 0: # only rec and cls need
+ batch = [item.numpy() for item in batch]
+ if model_type in ['table', 'kie']:
+ eval_class(preds, batch)
+ else:
+ post_result = post_process_class(preds, batch[1])
+ eval_class(post_result, batch)
+ metric = eval_class.get_metric()
+ train_stats.update(metric)
+
train_batch_time = time.time() - reader_start
train_batch_cost += train_batch_time
eta_meter.update(train_batch_time)
@@ -258,16 +269,6 @@ def train(config,
stats['lr'] = lr
train_stats.update(stats)
- if cal_metric_during_train: # only rec and cls need
- batch = [item.numpy() for item in batch]
- if model_type in ['table', 'kie']:
- eval_class(preds, batch)
- else:
- post_result = post_process_class(preds, batch[1])
- eval_class(post_result, batch)
- metric = eval_class.get_metric()
- train_stats.update(metric)
-
if vdl_writer is not None and dist.get_rank() == 0:
for k, v in train_stats.get().items():
vdl_writer.add_scalar('TRAIN/{}'.format(k), v, global_step)
@@ -277,12 +278,13 @@ def train(config,
(global_step > 0 and global_step % print_batch_step == 0) or
(idx >= len(train_dataloader) - 1)):
logs = train_stats.log()
+
eta_sec = ((epoch_num + 1 - epoch) * \
len(train_dataloader) - idx - 1) * eta_meter.avg
eta_sec_format = str(datetime.timedelta(seconds=int(eta_sec)))
strs = 'epoch: [{}/{}], global_step: {}, {}, avg_reader_cost: ' \
'{:.5f} s, avg_batch_cost: {:.5f} s, avg_samples: {}, ' \
- 'ips: {:.5f}, eta: {}'.format(
+ 'ips: {:.5f} samples/s, eta: {}'.format(
epoch, epoch_num, global_step, logs,
train_reader_cost / print_batch_step,
train_batch_cost / print_batch_step,