Merge remote-tracking branch 'origin/dygraph' into dy1

6b60f074 · qq_25193841 · 97497d35 · 2ded688e · 6b60f074 · 6b60f074
18 changed file
--- a/README.md
+++ b/README.md
@@ -123,7 +123,7 @@ PaddleOCR support a variety of cutting-edge algorithms related to OCR, and devel
    - [Inference and Deployment](./deploy/README.md)
        - [Python Inference](./ppstructure/docs/inference_en.md)
        - [C++ Inference](./deploy/cpp_infer/readme.md)
-        - [Serving](./deploy/pdserving/README.md)
+        - [Serving](./deploy/hubserving/readme_en.md)
 - [Academic Algorithms](./doc/doc_en/algorithm_overview_en.md)
    - [Text detection](./doc/doc_en/algorithm_overview_en.md)
    - [Text recognition](./doc/doc_en/algorithm_overview_en.md)

--- a/README_ch.md
+++ b/README_ch.md
@@ -135,7 +135,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力
    - [推理部署](./deploy/README_ch.md)
        - [基于Python预测引擎推理](./ppstructure/docs/inference.md)
        - [基于C++预测引擎推理](./deploy/cpp_infer/readme_ch.md)
-        - [服务化部署](./deploy/pdserving/README_CN.md)
+        - [服务化部署](./deploy/hubserving/readme.md)
 - [前沿算法与模型🚀](./doc/doc_ch/algorithm_overview.md)
    - [文本检测算法](./doc/doc_ch/algorithm_overview.md)
    - [文本识别算法](./doc/doc_ch/algorithm_overview.md)

--- a/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml
+++ b/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml
@@ -191,7 +191,6 @@ Eval:
          channel_first: False
      - DetLabelEncode: # Class handling label
      - DetResizeForTest:
-#           image_shape: [736, 1280]
      - NormalizeImage:
          scale: 1./255.
          mean: [0.485, 0.456, 0.406]

--- a/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml
+++ b/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml
@@ -24,6 +24,7 @@ Architecture:
  model_type: det
  Models:
    Student:
+      pretrained:
      model_type: det
      algorithm: DB
      Transform: null
@@ -40,6 +41,7 @@ Architecture:
        name: DBHead
        k: 50
    Student2:
+      pretrained:
      model_type: det
      algorithm: DB
      Transform: null
@@ -91,14 +93,11 @@ Loss:
      - ["Student", "Student2"]
      maps_name: "thrink_maps"
      weight: 1.0
-      # act: None
      model_name_pairs: ["Student", "Student2"]
      key: maps
  - DistillationDBLoss:
      weight: 1.0
      model_name_list: ["Student", "Student2"]
-      # key: maps
-      # name: DBLoss
      balance_loss: true
      main_loss_type: DiceLoss
      alpha: 5
@@ -197,6 +196,7 @@ Train:
    drop_last: false
    batch_size_per_card: 8
    num_workers: 4
 Eval:
  dataset:
    name: SimpleDataSet
@@ -204,31 +204,21 @@ Eval:
    label_file_list:
      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
    transforms:
-    - DecodeImage:
+      - DecodeImage: # load image
          img_mode: BGR
-        channel_first: false
+          channel_first: False
-    - DetLabelEncode: null
+      - DetLabelEncode: # Class handling label
-    - DetResizeForTest: null
+      - DetResizeForTest:
      - NormalizeImage:
          scale: 1./255.
-        mean:
+          mean: [0.485, 0.456, 0.406]
-        - 0.485
+          std: [0.229, 0.224, 0.225]
-        - 0.456
+          order: 'hwc'
-        - 0.406
+      - ToCHWImage:
-        std:
-        - 0.229
-        - 0.224
-        - 0.225
-        order: hwc
-    - ToCHWImage: null
      - KeepKeys:
-        keep_keys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
-        - image
-        - shape
-        - polys
-        - ignore_tags
  loader:
-    shuffle: false
+    shuffle: False
-    drop_last: false
+    drop_last: False
-    batch_size_per_card: 1
+    batch_size_per_card: 1 # must be 1
    num_workers: 2
\ No newline at end of file
--- a/doc/doc_ch/whl.md
+++ b/doc/doc_ch/whl.md
@@ -390,6 +390,7 @@ im_show.save('result.jpg')
 | det_db_thresh           | DB模型输出预测图的二值化阈值                                                                                                                                                                                         | 0.3                     |
 | det_db_box_thresh       | DB模型输出框的阈值，低于此值的预测框会被丢弃                                                                                                                                                                           | 0.5                     |
 | det_db_unclip_ratio     | DB模型输出框扩大的比例                                                                                                                                                                                               | 2                       |
+| det_db_score_mode |  计算检测框score的方式，有'fast'和'slow'，如果要检测的文字有弯曲，建议用'slow'，'slow'模式计算的box的score偏大，box不容易被过滤掉  | 'fast' |
 | det_east_score_thresh   | EAST模型输出预测图的二值化阈值                                                                                                                                                                                       | 0.8                     |
 | det_east_cover_thresh   | EAST模型输出框的阈值，低于此值的预测框会被丢弃                                                                                                                                                                         | 0.1                     |
 | det_east_nms_thresh     | EAST模型输出框NMS的阈值                                                                                                                                                                                              | 0.2                     |

--- a/doc/doc_en/whl_en.md
+++ b/doc/doc_en/whl_en.md
@@ -342,6 +342,7 @@ im_show.save('result.jpg')
 | det_db_thresh           | Binarization threshold value of DB output map                                                                                                                                                                                        | 0.3                     |
 | det_db_box_thresh       | The threshold value of the DB output box. Boxes score lower than this value will be discarded                                                                                                                                                                         | 0.5                     |
 | det_db_unclip_ratio     | The expanded ratio of DB output box                                                                                                                                                                                             | 2                       |
+| det_db_score_mode |  The parameter that control how the score of the detection frame is calculated. There are 'fast' and 'slow' options. If the text to be detected is curved, it is recommended to use 'slow'  | 'fast' |
 | det_east_score_thresh   | Binarization threshold value of EAST output map                                                                                                                                                                                       | 0.8                     |
 | det_east_cover_thresh   | The threshold value of the EAST output box. Boxes score lower than this value will be discarded                                                                                                                                                                         | 0.1                     |
 | det_east_nms_thresh     | The NMS threshold value of EAST model output box                                                                                                                                                                                              | 0.2                     |

--- a/ppocr/data/imaug/rec_img_aug.py
+++ b/ppocr/data/imaug/rec_img_aug.py
@@ -502,7 +502,7 @@ def resize_norm_img_chinese(img, image_shape):
    max_wh_ratio = imgW * 1.0 / imgH
    h, w = img.shape[0], img.shape[1]
    ratio = w * 1.0 / h
-    max_wh_ratio = max(max_wh_ratio, ratio)
+    max_wh_ratio = min(max(max_wh_ratio, ratio), max_wh_ratio)
    imgW = int(imgH * max_wh_ratio)
    if math.ceil(imgH * ratio) > imgW:
        resized_w = imgW

--- a/ppocr/losses/basic_loss.py
+++ b/ppocr/losses/basic_loss.py
@@ -60,19 +60,19 @@ class KLJSLoss(object):
                        ], "mode can only be one of ['kl', 'KL', 'js', 'JS']"
        self.mode = mode
-    def __call__(self, p1, p2, reduction="mean"):
+    def __call__(self, p1, p2, reduction="mean", eps=1e-5):
        if self.mode.lower() == 'kl':
            loss = paddle.multiply(p2,
-                                   paddle.log((p2 + 1e-5) / (p1 + 1e-5) + 1e-5))
+                                   paddle.log((p2 + eps) / (p1 + eps) + eps))
-            loss += paddle.multiply(
+            loss += paddle.multiply(p1,
-                p1, paddle.log((p1 + 1e-5) / (p2 + 1e-5) + 1e-5))
+                                    paddle.log((p1 + eps) / (p2 + eps) + eps))
            loss *= 0.5
        elif self.mode.lower() == "js":
            loss = paddle.multiply(
-                p2, paddle.log((2 * p2 + 1e-5) / (p1 + p2 + 1e-5) + 1e-5))
+                p2, paddle.log((2 * p2 + eps) / (p1 + p2 + eps) + eps))
            loss += paddle.multiply(
-                p1, paddle.log((2 * p1 + 1e-5) / (p1 + p2 + 1e-5) + 1e-5))
+                p1, paddle.log((2 * p1 + eps) / (p1 + p2 + eps) + eps))
            loss *= 0.5
        else:
            raise ValueError(

--- a/ppstructure/README.md
+++ b/ppstructure/README.md
 English | [简体中文](README_ch.md)
+# PP-Structure
 - [1. Introduction](#1-introduction)
 - [2. Features](#2-features)
 - [3. Results](#3-results)

--- a/ppstructure/kie/README.md
+++ b/ppstructure/kie/README.md
 English | [简体中文](README_ch.md)
- [1. Introduction](#1-introduction)
+# Key Information Extraction (KIE)
- [2. Accuracy and performance](#2-Accuracy-and-performance)
+- [1. Introduction](#1-introduction)
- [3. Visualization](#3-Visualization)
+- [2. Performance](#2-performance)
+- [3. Visualization](#3-visualization)
  - [3.1 SER](#31-ser)
  - [3.2 RE](#32-re)
 - [4. Usage](#4-usage)
-  - [4.1 Prepare for the environment](#41-Prepare-for-the-environment)
+  - [4.1 Prepare for the environment](#41-prepare-for-the-environment)
-  - [4.2 Quick start](#42-Quick-start)
+  - [4.2 Quick start](#42-quick-start)
-  - [4.3 More](#43-More)
+  - [4.3 More](#43-more)
- [5. Reference](#5-Reference)
+- [5. Reference](#5-reference)
- [6. License](#6-License)
+- [6. License](#6-license)
 ## 1. Introduction
@@ -31,7 +32,7 @@ The main features of the key information extraction module in PP-Structure are a
 - Support SER model export and inference using PaddleInference.
-## 2. Accuracy and performance
+## 2. Performance
 We evaluate the methods on the Chinese dataset of [XFUND](https://github.com/doc-analysis/XFUND), and the performance is as follows

--- a/ppstructure/layout/README.md
+++ b/ppstructure/layout/README.md
@@ -3,21 +3,22 @@ English | [简体中文](README_ch.md)
 # Layout analysis
 - [1. Introduction](#1-Introduction)
- [2. Install](#2-Install)
+- [2. Quick start](#2-Quick-start)
-  - [2.1 Install PaddlePaddle](#21-Install-paddlepaddle)
+- [3. Install](#3-Install)
-  - [2.2 Install PaddleDetection](#22-Install-paddledetection)
+  - [3.1 Install PaddlePaddle](#31-Install-paddlepaddle)
- [3. Data preparation](#3-Data-preparation)
+  - [3.2 Install PaddleDetection](#32-Install-paddledetection)
-  - [3.1 English data set](#31-English-data-set)
+- [4. Data preparation](#4-Data-preparation)
-  - [3.2 More datasets](#32-More-datasets)
+  - [4.1 English data set](#41-English-data-set)
- [4. Start training](#4-Start-training)
+  - [4.2 More datasets](#42-More-datasets)
-  - [4.1 Train](#41-Train)
+- [5. Start training](#5-Start-training)
-  - [4.2 FGD Distillation training](#42-FGD-Distillation-training)
+  - [5.1 Train](#51-Train)
- [5. Model evaluation and prediction](#5-Model-evaluation-and-prediction)
+  - [5.2 FGD Distillation training](#52-Fgd-distillation-training)
-  - [5.1 Indicator evaluation](#51-Indicator-evaluation)
+- [6. Model evaluation and prediction](#6-Model-evaluation-and-prediction)
-  - [5.2 Test layout analysis results](#52-Test-layout-analysis-results)
+  - [6.1 Indicator evaluation](#61-Indicator-evaluation)
- [6 Model export and inference](#6-Model-export-and-inference)
+  - [6.2 Test layout analysis results](#62-Test-layout-analysis-results)
-  - [6.1 Model export](#61-Model-export)
+- [7. Model export and inference](#7-Model-export-and-inference)
-  - [6.2 Model inference](#62-Model-inference)
+  - [7.1 Model export](#71-Model-export)
+  - [7.2 Model inference](#72-Model-inference)
 ## 1. Introduction
@@ -28,11 +29,12 @@ Layout analysis refers to the regional division of documents in the form of pict
    <img src="../docs/layout/layout.png" width="800">
 </div>
+## 2. Quick start
+PP-Structure currently provides layout analysis models in Chinese, English and table documents. For the model link, see [models_list](../docs/models_list_en.md). The whl package is also provided for quick use, see [quickstart](../docs/quickstart_en.md) for details.
+## 3. Install
-## 2. Install
+### 3.1. Install PaddlePaddle
-### 2.1. Install PaddlePaddle
 - **（1) Install PaddlePaddle**
@@ -47,7 +49,7 @@ python3 -m pip install "paddlepaddle>=2.3" -i https://mirror.baidu.com/pypi/simp
 ```
 For more requirements, please refer to the instructions in the [Install file](https://www.paddlepaddle.org.cn/install/quick)。
-### 2.2. Install PaddleDetection
+### 3.2. Install PaddleDetection
 - **（1）Download PaddleDetection Source code**
@@ -62,11 +64,11 @@ cd PaddleDetection
 python3 -m pip install -r requirements.txt
 ```
-## 3. Data preparation
+## 4. Data preparation
 If you want to experience the prediction process directly, you can skip data preparation and download the pre-training model.
-### 3.1. English data set
+### 4.1. English data set
 Download document analysis data set [PubLayNet](https://developer.ibm.com/exchanges/data/all/publaynet/)（Dataset 96G），contains 5 classes：`{0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"}`
@@ -141,7 +143,7 @@ The JSON file contains the annotations of all images, and the data is stored in
  }
  ```
-### 3.2. More datasets
+### 4.2. More datasets
 We provide CDLA(Chinese layout analysis), TableBank(Table layout analysis)etc. data set download links，process to the JSON format of the above annotation file，that is, the training can be conducted in the same way。
@@ -154,7 +156,7 @@ We provide CDLA(Chinese layout analysis), TableBank(Table layout analysis)etc. d
 | [DocBank](https://github.com/doc-analysis/DocBank)           | Large-scale dataset (500K document pages) constructed using weakly supervised methods for document layout analysis, containing 12 categories:Author, Caption, Date, Equation, Figure, Footer, List, Paragraph, Reference, Section, Table, Title |
-## 4. Start training
+## 5. Start training
 Training scripts, evaluation scripts, and prediction scripts are provided, and the PubLayNet pre-training model is used as an example in this section.
@@ -171,7 +173,7 @@ wget https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_
 If the test image is Chinese, the pre-trained model of Chinese CDLA dataset can be downloaded to identify 10 types of document regions：Table, Figure, Figure caption, Table, Table caption, Header, Footer, Reference, Equation，Download the training model and inference model of Model 'picodet_lcnet_x1_0_fgd_layout_cdla' in [layout analysis model](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/ppstructure/docs/models_list.md)。If only the table area in the image is detected, you can download the pre-trained model of the table dataset, and download the training model and inference model of the 'picodet_LCnet_x1_0_FGd_layout_table' model in [Layout Analysis model](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/ppstructure/docs/models_list.md)
-### 4.1. Train
+### 5.1. Train
 Train:
@@ -247,7 +249,7 @@ After starting training normally, you will see the following log output:
 **Note that the configuration file for prediction / evaluation must be consistent with the training.**
-### 4.2.  FGD Distillation Training
+### 5.2. FGD Distillation Training
 PaddleDetection supports FGD-based [Focal and Global Knowledge Distillation for Detectors]( https://arxiv.org/abs/2111.11837v1)  The training process of the target detection model of distillation, FGD distillation is divided into two parts `Focal` and `Global`.     `Focal` Distillation separates the foreground and background of the image, allowing the student model to focus on the key pixels of the foreground and background features of the teacher model respectively;`  Global`Distillation section reconstructs the relationships between different pixels and transfers them from the teacher to the student to compensate for the global information lost in `Focal`Distillation.
@@ -265,9 +267,9 @@ python3 tools/train.py \
 - `-c`: Specify the model configuration file.
 - `--slim_config`:  Specify the compression policy profile.
-## 5. Model evaluation and prediction
+## 6. Model evaluation and prediction
-### 5.1. Indicator evaluation
+### 6.1. Indicator evaluation
 Model parameters in training are saved by default in `output/picodet_ Lcnet_ X1_ 0_ Under the layout` directory. When evaluating indicators, you need to set `weights` to point to the saved parameter file.Assessment datasets can be accessed via `configs/picodet/legacy_ Model/application/layout_ Analysis/picodet_ Lcnet_ X1_ 0_ Layout. Yml` . Modify `EvalDataset`  : `img_dir`,`anno_ Path`and`dataset_dir` setting.
@@ -310,7 +312,7 @@ python3 tools/eval.py \
 - `--slim_config`:  Specify the distillation policy profile.
 - `-o weights`: Specify the model path trained by the distillation algorithm.
-### 5.2. Test Layout Analysis Results
+### 6.2. Test Layout Analysis Results
 The profile predicted to be used must be consistent with the training, for example, if you pass `python3 tools/train'. Py-c configs/picodet/legacy_ Model/application/layout_ Analysis/picodet_ Lcnet_ X1_ 0_ Layout. Yml` completed the training process for the model.
@@ -343,10 +345,10 @@ python3 tools/infer.py \
 ```
-## 6. Model Export and Inference
+## 7. Model Export and Inference
-### 6.1 Model Export
+### 7.1 Model Export
 The inference model (the model saved by `paddle.jit.save`) is generally a solidified model saved after the model training is completed, and is mostly used to give prediction in deployment.
@@ -385,7 +387,7 @@ python3 tools/export_model.py \
    --output_dir=output_inference/
 ```
-### 6.2 Model inference
+### 7.2 Model inference
 Replace model_with the provided inference training model for inference or the FGD distillation training `model_dir`Inference model path, execute the following commands for inference:

--- a/ppstructure/layout/README_ch.md
+++ b/ppstructure/layout/README_ch.md
@@ -3,21 +3,22 @@
 # 版面分析
 - [1. 简介](#1-简介)
- [2. 安装](#2-安装)
+- [2. 快速开始](#2-快速开始)
-  - [2.1 安装PaddlePaddle](#21-安装paddlepaddle)
+- [3. 安装](#3-安装)
-  - [2.2 安装PaddleDetection](#22-安装paddledetection)
+  - [3.1 安装PaddlePaddle](#31-安装paddlepaddle)
- [3. 数据准备](#3-数据准备)
+  - [3.2 安装PaddleDetection](#32-安装paddledetection)
-  - [3.1 英文数据集](#31-英文数据集)
+- [4. 数据准备](#4-数据准备)
-  - [3.2 更多数据集](#32-更多数据集)
+  - [4.1 英文数据集](#41-英文数据集)
- [4. 开始训练](#4-开始训练)
+  - [4.2 更多数据集](#42-更多数据集)
-  - [4.1 启动训练](#41-启动训练)
+- [5. 开始训练](#5-开始训练)
-  - [4.2 FGD蒸馏训练](#42-FGD蒸馏训练)
+  - [5.1 启动训练](#51-启动训练)
- [5. 模型评估与预测](#5-模型评估与预测)
+  - [5.2 FGD蒸馏训练](#52-fgd蒸馏训练)
-  - [5.1 指标评估](#51-指标评估)
+- [6. 模型评估与预测](#6-模型评估与预测)
-  - [5.2 测试版面分析结果](#52-测试版面分析结果)
+  - [6.1 指标评估](#61-指标评估)
- [6 模型导出与预测](#6-模型导出与预测)
+  - [6.2 测试版面分析结果](#62-测试版面分析结果)
-  - [6.1 模型导出](#61-模型导出)
+- [7 模型导出与预测](#7-模型导出与预测)
-  - [6.2 模型推理](#62-模型推理)
+  - [7.1 模型导出](#71-模型导出)
+  - [7.2 模型推理](#72-模型推理)
 ## 1. 简介
@@ -27,11 +28,14 @@
    <img src="../docs/layout/layout.png" width="800">
 </div>
+## 2. 快速开始
+PP-Structure目前提供了中文、英文、表格三类文档版面分析模型，模型链接见 [models_list](../docs/models_list.md#1-版面分析模型)。也提供了whl包的形式方便快速使用，详见 [quickstart](../docs/quickstart.md)。
-## 2. 安装依赖
-### 2.1. 安装PaddlePaddle
+## 3. 安装
+### 3.1. 安装PaddlePaddle
 - **（1) 安装PaddlePaddle**
@@ -46,7 +50,7 @@ python3 -m pip install "paddlepaddle>=2.3" -i https://mirror.baidu.com/pypi/simp
 ```
 更多需求，请参照[安装文档](https://www.paddlepaddle.org.cn/install/quick)中的说明进行操作。
-### 2.2. 安装PaddleDetection
+### 3.2. 安装PaddleDetection
 - **（1）下载PaddleDetection源码**
@@ -61,11 +65,11 @@ cd PaddleDetection
 python3 -m pip install -r requirements.txt
 ```
-## 3. 数据准备
+## 4. 数据准备
 如果希望直接体验预测过程，可以跳过数据准备，下载我们提供的预训练模型。
-### 3.1. 英文数据集
+### 4.1. 英文数据集
 下载文档分析数据集[PubLayNet](https://developer.ibm.com/exchanges/data/all/publaynet/)（数据集96G），包含5个类：`{0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"}`
@@ -140,7 +144,7 @@ json文件包含所有图像的标注，数据以字典嵌套的方式存放，
  }
  ```
-### 3.2. 更多数据集
+### 4.2. 更多数据集
 我们提供了CDLA(中文版面分析)、TableBank(表格版面分析)等数据集的下连接，处理为上述标注文件json格式，即可以按相同方式进行训练。
@@ -153,7 +157,7 @@ json文件包含所有图像的标注，数据以字典嵌套的方式存放，
 | [DocBank](https://github.com/doc-analysis/DocBank)           | 使用弱监督方法构建的大规模数据集(500K文档页面)，用于文档布局分析，包含12类：Author、Caption、Date、Equation、Figure、Footer、List、Paragraph、Reference、Section、Table、Title |
-## 4. 开始训练
+## 5. 开始训练
 提供了训练脚本、评估脚本和预测脚本，本节将以PubLayNet预训练模型为例进行讲解。
@@ -170,7 +174,7 @@ wget https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_
 如果测试图片为中文，可以下载中文CDLA数据集的预训练模型，识别10类文档区域：Table、Figure、Figure caption、Table、Table caption、Header、Footer、Reference、Equation，在[版面分析模型](../docs/models_list.md)中下载`picodet_lcnet_x1_0_fgd_layout_cdla`模型的训练模型和推理模型。如果只检测图片中的表格区域，可以下载表格数据集的预训练模型，在[版面分析模型](../docs/models_list.md)中下载`picodet_lcnet_x1_0_fgd_layout_table`模型的训练模型和推理模型。
-### 4.1. 启动训练
+### 5.1. 启动训练
 开始训练:
@@ -246,7 +250,7 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3'  tools/train.py \
 **注意，预测/评估时的配置文件请务必与训练一致。**
-### 4.2. FGD蒸馏训练
+### 5.2. FGD蒸馏训练
 PaddleDetection支持了基于FGD([Focal and Global Knowledge Distillation for Detectors](https://arxiv.org/abs/2111.11837v1))蒸馏的目标检测模型训练过程，FGD蒸馏分为两个部分`Focal`和`Global`。`Focal`蒸馏分离图像的前景和背景，让学生模型分别关注教师模型的前景和背景部分特征的关键像素；`Global`蒸馏部分重建不同像素之间的关系并将其从教师转移到学生，以补偿`Focal`蒸馏中丢失的全局信息。
@@ -264,9 +268,9 @@ python3 tools/train.py \
 - `-c`: 指定模型配置文件。
 - `--slim_config`: 指定压缩策略配置文件。
-## 5. 模型评估与预测
+## 6. 模型评估与预测
-### 5.1. 指标评估
+### 6.1. 指标评估
 训练中模型参数默认保存在`output/picodet_lcnet_x1_0_layout`目录下。在评估指标时，需要设置`weights`指向保存的参数文件。评估数据集可以通过 `configs/picodet/legacy_model/application/layout_analysis/picodet_lcnet_x1_0_layout.yml`  修改`EvalDataset`中的 `image_dir`、`anno_path`和`dataset_dir` 设置。
@@ -309,7 +313,7 @@ python3 tools/eval.py \
 - `--slim_config`: 指定蒸馏策略配置文件。
 - `-o weights`: 指定蒸馏算法训好的模型路径。
-### 5.2. 测试版面分析结果
+### 6.2 测试版面分析结果
 预测使用的配置文件必须与训练一致，如您通过 `python3 tools/train.py -c configs/picodet/legacy_model/application/layout_analysis/picodet_lcnet_x1_0_layout.yml` 完成了模型的训练过程。
@@ -342,10 +346,10 @@ python3 tools/infer.py \
 ```
-## 6. 模型导出与预测
+## 7. 模型导出与预测
-### 6.1 模型导出
+### 7.1 模型导出
 inference 模型（`paddle.jit.save`保存的模型） 一般是模型训练，把模型结构和模型参数保存在文件中的固化模型，多用于预测部署场景。 训练过程中保存的模型是checkpoints模型，保存的只有模型的参数，多用于恢复训练等。 与checkpoints模型相比，inference 模型会额外保存模型的结构信息，在预测部署、加速推理上性能优越，灵活方便，适合于实际系统集成。
@@ -382,7 +386,7 @@ python3 tools/export_model.py \
-### 6.2 模型推理
+### 7.2 模型推理
 若使用**提供的推理训练模型推理**，或使用**FGD蒸馏训练的模型**，更换`model_dir`推理模型路径，执行如下命令进行推理：

--- a/ppstructure/recovery/README.md
+++ b/ppstructure/recovery/README.md
 English | [简体中文](README_ch.md)
- [Getting Started](#getting-started)
+# Layout Recovery
-  - [1.  Introduction](#1)
-  - [2. Install](#2)
+- [1. Introduction](#1)
-    - [2.1 Installation dependencies](#2.1)
+- [2. Install](#2)
+    - [2.1 Install PaddlePaddle](#2.1)
    - [2.2 Install PaddleOCR](#2.2)
-  - [3. Quick Start](#3)
+- [3. Quick Start](#3)
    - [3.1 Download models](#3.1)
    - [3.2 Layout recovery](#3.2)
-  - [4. More](#4)
+- [4. More](#4)
 <a name="1"></a>
@@ -25,16 +26,13 @@ Layout recovery combines [layout analysis](../layout/README.md)、[table recogni
 <div align="center">
 <img src="../docs/recovery/recovery_ch.jpg"  width = "800" />
 </div>
 <a name="2"></a>
 ## 2. Install
 <a name="2.1"></a>
-### 2.1 Install dependencies
+### 2.1 Install PaddlePaddle
- **(1) Install PaddlePaddle**
 ```bash
 python3 -m pip install --upgrade pip
@@ -44,7 +42,6 @@ python3 -m pip install "paddlepaddle-gpu" -i https://mirror.baidu.com/pypi/simpl
 # CPU installation
 python3 -m pip install "paddlepaddle" -i https://mirror.baidu.com/pypi/simple
 ````
 For more requirements, please refer to the instructions in [Installation Documentation](https://www.paddlepaddle.org.cn/en/install/quick?docurl=/documentation/docs/en/install/pip/macos-pip_en.html).
@@ -85,6 +82,8 @@ Through layout analysis, we divided the image/PDF documents into regions, locate
 We can restore the test picture through the layout information, OCR detection and recognition structure, table information, and saved pictures.
+The whl package is also provided  for quick use, see [quickstart](../docs/quickstart_en.md) for details.
 <a name="3.1"></a>
 ### 3.1 Download models
@@ -151,10 +150,10 @@ Field：
 ## 4. More
-For training, evaluation and inference tutorial for text detection models, please refer to [text detection doc](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/detection.md).
+For training, evaluation and inference tutorial for text detection models, please refer to [text detection doc](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_en/detection_en.md).
-For training, evaluation and inference tutorial for text recognition models, please refer to [text recognition doc](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/recognition.md).
+For training, evaluation and inference tutorial for text recognition models, please refer to [text recognition doc](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_en/recognition_en.md).
-For training, evaluation and inference tutorial for layout analysis models, please refer to [layout analysis doc](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/ppstructure/layout/README_ch.md)
+For training, evaluation and inference tutorial for layout analysis models, please refer to [layout analysis doc](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/ppstructure/layout/README.md)
-For training, evaluation and inference tutorial for table recognition models, please refer to [table recognition doc](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/ppstructure/table/README_ch.md)
+For training, evaluation and inference tutorial for table recognition models, please refer to [table recognition doc](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/ppstructure/table/README.md)
--- a/ppstructure/recovery/README_ch.md
+++ b/ppstructure/recovery/README_ch.md
 [English](README.md) | 简体中文
-# 版面恢复使用说明
+# 版面恢复
 - [1. 简介](#1)
 - [2. 安装](#2)
-  - [2.1 安装依赖](#2.1)
+  - [2.1 安装PaddlePaddle](#2.1)
  - [2.2 安装PaddleOCR](#2.2)
 - [3. 使用](#3)
  - [3.1 下载模型](#3.1)
  - [3.2 版面恢复](#3.2)
@@ -27,16 +26,13 @@
 <div align="center">
 <img src="../docs/recovery/recovery_ch.jpg"  width = "800" />
 </div>
 <a name="2"></a>
 ## 2. 安装
 <a name="2.1"></a>
-### 2.1 安装依赖
+### 2.1 安装PaddlePaddle
- **（1) 安装PaddlePaddle**
 ```bash
 python3 -m pip install --upgrade pip
@@ -87,6 +83,8 @@ python3 -m pip install -r ppstructure/recovery/requirements.txt
 我们通过版面信息、OCR检测和识别结构、表格信息、保存的图片，对测试图片进行恢复即可。
+提供如下代码实现版面恢复，也提供了whl包的形式方便快速使用，详见 [quickstart](../docs/quickstart.md)。
 <a name="3.1"></a>
 ### 3.1 下载模型

--- a/requirements.txt
+++ b/requirements.txt
@@ -7,6 +7,7 @@ tqdm
 numpy
 visualdl
 rapidfuzz
+opencv-python
 opencv-contrib-python
 cython
 lxml

--- a/test_tipc/configs/slanet/SLANet.yml
+++ b/test_tipc/configs/slanet/SLANet.yml
+Global:
+  use_gpu: true
+  epoch_num: 100
+  log_smooth_window: 20
+  print_batch_step: 20
+  save_model_dir: ./output/SLANet
+  save_epoch_step: 400
+  # evaluation is run every 1000 iterations after the 0th iteration
+  eval_batch_step: [0, 1000]
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints:
+  save_inference_dir: ./output/SLANet/infer
+  use_visualdl: False
+  infer_img: doc/table/table.jpg
+  # for data or label process
+  character_dict_path: ppocr/utils/dict/table_structure_dict.txt
+  character_type: en
+  max_text_length: &max_text_length 500
+  box_format: &box_format 'xyxy' # 'xywh', 'xyxy', 'xyxyxyxy'
+  infer_mode: False
+  use_sync_bn: True
+  save_res_path: 'output/infer'
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  clip_norm: 5.0
+  lr:
+    name: Piecewise
+    learning_rate: 0.001
+    decay_epochs : [40, 50]
+    values : [0.001, 0.0001, 0.00005]
+  regularizer:
+    name: 'L2'
+    factor: 0.00000
+Architecture:
+  model_type: table
+  algorithm: SLANet
+  Backbone:
+    name: PPLCNet
+    scale: 1.0
+    pretrained: true
+    use_ssld: true
+  Neck:
+    name: CSPPAN
+    out_channels: 96
+  Head:
+    name: SLAHead
+    hidden_size: 256
+    max_text_length: *max_text_length
+    loc_reg_num: &loc_reg_num 4
+Loss:
+  name: SLALoss
+  structure_weight: 1.0
+  loc_weight: 2.0
+  loc_loss: smooth_l1
+PostProcess:
+  name: TableLabelDecode
+  merge_no_span_structure: &merge_no_span_structure True
+Metric:
+  name: TableMetric
+  main_indicator: acc
+  compute_bbox_metric: False
+  loc_reg_num: *loc_reg_num
+  box_format: *box_format
+Train:
+  dataset:
+    name: PubTabDataSet
+    data_dir: ./train_data/pubtabnet/train/
+    label_file_list: [./train_data/pubtabnet/train.jsonl]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - TableLabelEncode:
+          learn_empty_box: False
+          merge_no_span_structure: *merge_no_span_structure
+          replace_empty_cell_token: False
+          loc_reg_num: *loc_reg_num
+          max_text_length: *max_text_length
+      - TableBoxEncode:
+          in_box_format: *box_format
+          out_box_format: *box_format
+      - ResizeTableImage:
+          max_len: 488
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - PaddingTableImage:
+          size: [488, 488]
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
+  loader:
+    shuffle: True
+    batch_size_per_card: 48
+    drop_last: True
+    num_workers: 1
+Eval:
+  dataset:
+    name: PubTabDataSet
+    data_dir: ./train_data/pubtabnet/test/
+    label_file_list: [./train_data/pubtabnet/test.jsonl]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - TableLabelEncode:
+          learn_empty_box: False
+          merge_no_span_structure: *merge_no_span_structure
+          replace_empty_cell_token: False
+          loc_reg_num: *loc_reg_num
+          max_text_length: *max_text_length
+      - TableBoxEncode:
+          in_box_format: *box_format
+          out_box_format: *box_format
+      - ResizeTableImage:
+          max_len: 488
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - PaddingTableImage:
+          size: [488, 488]
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 48
+    num_workers: 1
--- a/test_tipc/configs/slanet/train_infer_python.txt
+++ b/test_tipc/configs/slanet/train_infer_python.txt
+===========================train_params===========================
+model_name:slanet
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:fp32
+Global.epoch_num:lite_train_lite_infer=3|whole_train_whole_infer=50
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=16|whole_train_whole_infer=128
+Global.pretrained_model:./pretrain_models/en_ppstructure_mobile_v2.0_SLANet_train/best_accuracy
+train_model_name:latest
+train_infer_img_dir:./ppstructure/docs/table/table.jpg
+null:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c test_tipc/configs/slanet/SLANet.yml -o Global.print_batch_step=1 Train.loader.shuffle=false
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:null
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:tools/export_model.py -c test_tipc/configs/slanet/SLANet.yml -o
+quant_export:
+fpgm_export: 
+distill_export:null
+export1:null
+export2:null
+##
+infer_model:./inference/en_ppstructure_mobile_v2.0_SLANet_train
+infer_export:null
+infer_quant:False
+inference:ppstructure/table/predict_table.py --det_model_dir=./inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=./inference/en_ppocr_mobile_v2.0_table_rec_infer  --rec_char_dict_path=./ppocr/utils/dict/table_dict.txt --table_char_dict_path=./ppocr/utils/dict/table_structure_dict.txt --image_dir=./ppstructure/docs/table/table.jpg --det_limit_side_len=736 --det_limit_type=min --output ./output/table
+--use_gpu:True|False
+--enable_mkldnn:False
+--cpu_threads:6
+--rec_batch_num:1
+--use_tensorrt:False
+--precision:fp32
+--table_model_dir:
+--image_dir:./ppstructure/docs/table/table.jpg
+null:null
+--benchmark:False
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,488,488]}]
+===========================train_benchmark_params==========================
+batch_size:32
+fp_items:fp32|fp16
+epoch:2
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -108,6 +108,7 @@ if [ ${MODE} = "benchmark_train" ];then
    fi
    if [ ${model_name} == "layoutxlm_ser" ] || [ ${model_name} == "vi_layoutxlm_ser" ]; then
        pip install -r ppstructure/kie/requirements.txt
+        pip install opencv-python -U
        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
        cd ./train_data/ && tar xf XFUND.tar
        # expand gt.txt 10 times
@@ -139,6 +140,13 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
        wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar --no-check-certificate
        cd ./inference/ && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar && cd ../
    fi
+    if [ ${model_name} == "slanet" ];then
+        wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar --no-check-certificate
+        cd ./pretrain_models/ && tar xf en_ppstructure_mobile_v2.0_SLANet_train.tar  && cd ../
+        wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar --no-check-certificate
+        wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar --no-check-certificate
+        cd ./inference/ && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar && cd ../
+    fi
    if [[ ${model_name} =~ "det_r50_db_plusplus" ]];then
        wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/ResNet50_dcn_asf_synthtext_pretrained.pdparams --no-check-certificate
    fi
@@ -223,6 +231,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
    fi
    if [ ${model_name} == "layoutxlm_ser" ] || [ ${model_name} == "vi_layoutxlm_ser" ]; then
        pip install -r ppstructure/kie/requirements.txt
+        pip install opencv-python -U
        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
        cd ./train_data/ && tar xf XFUND.tar
        cd ../