Merge remote-tracking branch 'origin/dygraph' into dygraph

df001f3c · qq_25193841 · 9cce1213 · bdca6cd7 · df001f3c · df001f3c
76 changed file
--- a/MANIFEST.in
+++ b/MANIFEST.in
 include LICENSE
 include README.md

-recursive-include ppocr/utils *.txt utility.py logging.py network.py
+recursive-include ppocr/utils *.*
 recursive-include ppocr/data *.py
 recursive-include ppocr/postprocess *.py
 recursive-include tools/infer *.py

--- a/__init__.py
+++ b/__init__.py
@@ -11,8 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import paddleocr
 from .paddleocr import *

 __version__ = paddleocr.VERSION
-__all__ = ['PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result', 'save_structure_res','download_with_progressbar']
+__all__ = [
+    'PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result',
+    'save_structure_res', 'download_with_progressbar'
+]
--- a/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml
+++ b/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml
@@ -8,7 +8,7 @@ Global:
  # evaluation is run every 5000 iterations after the 4000th iteration
  eval_batch_step: [3000, 2000]
  cal_metric_during_train: False
-  pretrained_model: ./pretrain_models/ch_PP-OCRv2_det_distill_train/best_accuracy
+  pretrained_model: 
  checkpoints:
  save_inference_dir:
  use_visualdl: False

--- a/deploy/cpp_infer/docs/imgs/cmake_step1.png
+++ b/deploy/cpp_infer/docs/imgs/cmake_step1.png
--- a/deploy/cpp_infer/docs/imgs/cmake_step2.png
+++ b/deploy/cpp_infer/docs/imgs/cmake_step2.png
--- a/deploy/cpp_infer/docs/imgs/cmake_step3.png
+++ b/deploy/cpp_infer/docs/imgs/cmake_step3.png
--- a/deploy/cpp_infer/docs/imgs/cmake_step4.png
+++ b/deploy/cpp_infer/docs/imgs/cmake_step4.png
--- a/deploy/cpp_infer/docs/imgs/result.png
+++ b/deploy/cpp_infer/docs/imgs/result.png
--- a/deploy/cpp_infer/docs/imgs/vs_step1.png
+++ b/deploy/cpp_infer/docs/imgs/vs_step1.png
--- a/deploy/cpp_infer/docs/vs2019_build_withgpu_config.png
+++ b/deploy/cpp_infer/docs/vs2019_build_withgpu_config.png
--- a/deploy/cpp_infer/docs/windows_vs2019_build.md
+++ b/deploy/cpp_infer/docs/windows_vs2019_build.md
+- [Visual Studio 2019 Community CMake 编译指南](#visual-studio-2019-community-cmake-编译指南)
+  - [1. 环境准备](#1-环境准备)
+    - [1.1 安装必须环境](#11-安装必须环境)
+    - [1.2 下载 PaddlePaddle C++ 预测库和 Opencv](#12-下载-paddlepaddle-c-预测库和-opencv)
+      - [1.2.1 下载 PaddlePaddle C++ 预测库](#121-下载-paddlepaddle-c-预测库)
+      - [1.2.2 安装配置OpenCV](#122-安装配置opencv)
+      - [1.2.3 下载PaddleOCR代码](#123-下载paddleocr代码)
+  - [2. 开始运行](#2-开始运行)
+    - [Step1: 构建Visual Studio项目](#step1-构建visual-studio项目)
+    - [Step2: 执行cmake配置](#step2-执行cmake配置)
+    - [Step3: 生成Visual Studio 项目](#step3-生成visual-studio-项目)
+    - [Step4: 预测](#step4-预测)
+  - [FAQ](#faq)
+
 # Visual Studio 2019 Community CMake 编译指南

 PaddleOCR在Windows 平台下基于`Visual Studio 2019 Community` 进行了测试。微软从`Visual Studio 2017`开始即支持直接管理`CMake`跨平台编译项目，但是直到`2019`才提供了稳定和完全的支持，所以如果你想使用CMake管理项目编译构建，我们推荐你使用`Visual Studio 2019`环境下构建。

+**下面所有示例以工作目录为 `D:\projects\cpp`演示**。
+
+## 1. 环境准备
+### 1.1 安装必须环境

-## 前置条件
 * Visual Studio 2019
 * CUDA 10.2，cudnn 7+ （仅在使用GPU版本的预测库时需要）
-* CMake 3.0+
+* CMake 3.22+

 请确保系统已经安装好上述基本软件，我们使用的是`VS2019`的社区版。

-**下面所有示例以工作目录为 `D:\projects`演示**。
+### 1.2 下载 PaddlePaddle C++ 预测库和 Opencv

-### Step1: 下载PaddlePaddle C++ 预测库 paddle_inference
+#### 1.2.1 下载 PaddlePaddle C++ 预测库

 PaddlePaddle C++ 预测库针对不同的`CPU`和`CUDA`版本提供了不同的预编译版本，请根据实际情况下载:  [C++预测库下载列表](https://paddleinference.paddlepaddle.org.cn/user_guides/download_lib.html#windows)

@@ -26,87 +43,94 @@ paddle_inference
 └── version.txt # 版本和编译信息
 ```

-### Step2: 安装配置OpenCV
+#### 1.2.2 安装配置OpenCV

-1. 在OpenCV官网下载适用于Windows平台的3.4.6版本， [下载地址](https://sourceforge.net/projects/opencvlibrary/files/3.4.6/opencv-3.4.6-vc14_vc15.exe/download)  
-2. 运行下载的可执行文件，将OpenCV解压至指定目录，如`D:\projects\opencv`
-3. 配置环境变量，如下流程所示  
-    - 我的电脑->属性->高级系统设置->环境变量
-    - 在系统变量中找到Path（如没有，自行创建），并双击编辑
-    - 新建，将opencv路径填入并保存，如`D:\projects\opencv\build\x64\vc14\bin`
+1. 在OpenCV官网下载适用于Windows平台的Opencv， [下载地址](https://github.com/opencv/opencv/releases)  
+2. 运行下载的可执行文件，将OpenCV解压至指定目录，如`D:\projects\cpp\opencv`

-### Step3: 使用Visual Studio 2019直接编译CMake
+#### 1.2.3 下载PaddleOCR代码
+```bash
+git clone -b dygraph https://github.com/PaddlePaddle/PaddleOCR
+```

-1. 打开Visual Studio 2019 Community，点击`继续但无需代码`
-![step2](https://paddleseg.bj.bcebos.com/inference/vs2019_step1.png)
-2. 点击： `文件`->`打开`->`CMake`
-![step2.1](https://paddleseg.bj.bcebos.com/inference/vs2019_step2.png)
+## 2. 开始运行

-选择项目代码所在路径，并打开`CMakeList.txt`：
+### Step1: 构建Visual Studio项目
+cmake安装完后后系统里会有一个cmake-gui程序，打开cmake-gui，在第一个输入框处填写源代码路径，第二个输入框处填写编译输出路径

-![step2.2](https://paddleseg.bj.bcebos.com/inference/vs2019_step3.png)
+![step1](imgs/cmake_step1.png)

-3. 点击：`项目`->`CMake设置`
+### Step2: 执行cmake配置
+点击界面下方的`Configure`按钮，第一次点击会弹出提示框进行Visual Studio配置，如下图，选择你的Visual Studio版本即可，目标平台选择x64。然后点击`finish`按钮即开始自动执行配置。

-![step3](https://paddleseg.bj.bcebos.com/inference/vs2019_step4.png)
+![step2](imgs/cmake_step2.png)

-4. 分别设置编译选项指定`CUDA`、`CUDNN_LIB`、`OpenCV`、`Paddle预测库`的路径
+第一次执行会报错，这是正常现象，接下来进行Opencv和预测库的配置

-三个编译参数的含义说明如下（带`*`表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐）：
+* cpu版本，仅需考虑OPENCV_DIR、OpenCV_DIR、PADDLE_LIB三个参数

-|  参数名   | 含义  |
-|  ----  | ----  |
-| *CUDA_LIB  | CUDA的库路径 |
-| *CUDNN_LIB | CUDNN的库路径 |
-| OPENCV_DIR  | OpenCV的安装路径 |
-| PADDLE_LIB | Paddle预测库的路径 |
+  - OPENCV_DIR：填写opencv lib文件夹所在位置
+  - OpenCV_DIR：同填写opencv lib文件夹所在位
+  - PADDLE_LIB：paddle_inference文件夹所在位置

-**注意：**
-  1. 使用`CPU`版预测库，请把`WITH_GPU`的勾去掉
-  2. 如果使用的是`openblas`版本，请把`WITH_MKL`勾去掉
+* GPU版本，在cpu版本的基础上，还需填写以下变量
+CUDA_LIB、CUDNN_LIB、TENSORRT_DIR、WITH_GPU、WITH_TENSORRT

-![step4](https://paddleseg.bj.bcebos.com/inference/vs2019_step5.png)
+- CUDA_LIB: CUDA地址，如 `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\lib\x64`
+- CUDNN_LIB: 和CUDA_LIB一致
+- TENSORRT_DIR：TRT下载后解压缩的位置
+- WITH_GPU: 打钩
+- WITH_TENSORRT：打勾
+
+配置好的截图如下
+
+![step3](imgs/cmake_step3.png)
+
+配置完成后，再次点击`Configure`按钮。

-下面给出with GPU的配置示例：
-![step5](./vs2019_build_withgpu_config.png)
 **注意：**
-  CMAKE_BACKWARDS的版本要根据平台安装cmake的版本进行设置。
+  1. 如果使用的是`openblas`版本，请把`WITH_MKL`勾去掉
+  2. 遇到报错 `unable to access 'https://github.com/LDOUBLEV/AutoLog.git/': gnutls_handshake() failed: The TLS connection was non-properly terminated.`， 将 `deploy/cpp_infer/external-cmake/auto-log.cmake` 中的github地址改为 https://gitee.com/Double_V/AutoLog 地址即可。
+
+### Step3: 生成Visual Studio 项目

-**设置完成后**, 点击上图中`保存并生成CMake缓存以加载变量`。
+点击`Generate`按钮即可生成Visual Studio 项目的sln文件。
+![step4](imgs/cmake_step4.png)

-5. 点击`生成`->`全部生成`
+点击`Open Project`按钮即可在Visual Studio 中打开项目。打开后截图如下

-![step6](https://paddleseg.bj.bcebos.com/inference/vs2019_step6.png)
+![step5](imgs/vs_step1.png)

+在开始生成解决方案之前，执行下面步骤：
+1. 将`Debug`改为`Release`
+2. 下载[dirent.h](https://paddleocr.bj.bcebos.com/deploy/cpp_infer/cpp_files/dirent.h)，并拷贝到 Visual Studio 的 include 文件夹下，如`C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\VS\include`。
+
+点击`生成->生成解决方案`，即可在`build/Release/`文件夹下看见`ppocr.exe`文件。
+
+运行之前，将下面文件拷贝到`build/Release/`文件夹下
+1. `paddle_inference/paddle/lib/paddle_inference.dll`
+2. `opencv/build/x64/vc15/bin/opencv_world455.dll`

 ### Step4: 预测

-上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release\Release`目录下，打开`cmd`，并切换到`D:\projects\PaddleOCR\deploy\cpp_infer\`：
+上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release\Release`目录下，打开`cmd`，并切换到`D:\projects\cpp\PaddleOCR\deploy\cpp_infer\`：

 ```
-cd D:\projects\PaddleOCR\deploy\cpp_infer
+cd /d D:\projects\cpp\PaddleOCR\deploy\cpp_infer
 ```
 可执行文件`ppocr.exe`即为样例的预测程序，其主要使用方法如下，更多使用方法可以参考[说明文档](../readme.md)`运行demo`部分。

 ```shell
-#识别中文图片 `D:\projects\PaddleOCR\doc\imgs_words\ch\`  
-.\out\build\x64-Release\Release\ppocr.exe rec --rec_model_dir=D:\projects\PaddleOCR\ch_ppocr_mobile_v2.0_rec_infer --image_dir=D:\projects\PaddleOCR\doc\imgs_words\ch\
-
-#识别英文图片 'D:\projects\PaddleOCR\doc\imgs_words\en\'
-.\out\build\x64-Release\Release\ppocr.exe rec --rec_model_dir=D:\projects\PaddleOCR\inference\rec_mv3crnn --image_dir=D:\projects\PaddleOCR\doc\imgs_words\en\ --char_list_file=D:\projects\PaddleOCR\ppocr\utils\dict\en_dict.txt
+# 切换终端编码为utf8
+CHCP 65001
+# 执行预测
+.\build\Release\ppocr.exe system --det_model_dir=D:\projects\cpp\ch_PP-OCRv2_det_slim_quant_infer --rec_model_dir=D:\projects\cpp\ch_PP-OCRv2_rec_slim_quant_infer --image_dir=D:\projects\cpp\PaddleOCR\doc\imgs\11.jpg
 ```

+识别结果如下
+![result](imgs/result.png)

-第一个参数为配置文件路径，第二个参数为需要预测的图片路径，第三个参数为配置文本识别的字典。
-
-
-### FQA
-* 在Windows下的终端中执行文件exe时，可能会发生乱码的现象，此时需要在终端中输入`CHCP 65001`，将终端的编码方式由GBK编码(默认)改为UTF-8编码，更加具体的解释可以参考这篇博客：[https://blog.csdn.net/qq_35038153/article/details/78430359](https://blog.csdn.net/qq_35038153/article/details/78430359)。
-
-* 编译时，如果报错`错误：C1083 无法打开包括文件:"dirent.h":No such file or directory`，可以参考该[文档](https://blog.csdn.net/Dora_blank/article/details/117740837#41_C1083_direnthNo_such_file_or_directory_54)，新建`dirent.h`文件，并添加到`utility.cpp`的头文件引用中。同时修改`utility.cpp`70行：`lstat`改成`stat`。
-
-* 编译时，如果报错`Autolog未定义`，新建`autolog.h`文件，内容为：[autolog.h](https://github.com/LDOUBLEV/AutoLog/blob/main/auto_log/autolog.h)，并添加到`main.cpp`的头文件引用中，再次编译。

-* 运行时，如果弹窗报错找不到`paddle_inference.dll`或者`openblas.dll`，在`D:\projects\paddle_inference`预测库内找到这两个文件，复制到`D:\projects\PaddleOCR\deploy\cpp_infer\out\build\x64-Release\Release`目录下。不用重新编译，再次运行即可。
+## FAQ

 * 运行时，弹窗报错提示`应用程序无法正常启动(0xc0000142)`，并且`cmd`窗口内提示`You are using Paddle compiled with TensorRT, but TensorRT dynamic library is not found.`，把tensort目录下的lib里面的所有dll文件复制到release目录下，再次运行即可。
--- a/deploy/cpp_infer/external-cmake/auto-log.cmake
+++ b/deploy/cpp_infer/external-cmake/auto-log.cmake
@@ -6,6 +6,7 @@ set(FETCHCONTENT_BASE_DIR "${CMAKE_CURRENT_BINARY_DIR}/third-party")
 FetchContent_Declare(
  extern_Autolog
  PREFIX autolog
+  # If you don't have access to github, replace it with https://gitee.com/Double_V/AutoLog
  GIT_REPOSITORY https://github.com/LDOUBLEV/AutoLog.git
  GIT_TAG        main
 )

--- a/deploy/cpp_infer/include/ocr_det.h
+++ b/deploy/cpp_infer/include/ocr_det.h
@@ -46,8 +46,7 @@ public:
                      const double &det_db_box_thresh,
                      const double &det_db_unclip_ratio,
                      const bool &use_polygon_score, const bool &use_dilation,
-                      const bool &visualize, const bool &use_tensorrt,
-                      const std::string &precision) {
+                      const bool &use_tensorrt, const std::string &precision) {
    this->use_gpu_ = use_gpu;
    this->gpu_id_ = gpu_id;
    this->gpu_mem_ = gpu_mem;
@@ -62,7 +61,6 @@ public:
    this->use_polygon_score_ = use_polygon_score;
    this->use_dilation_ = use_dilation;

-    this->visualize_ = visualize;
    this->use_tensorrt_ = use_tensorrt;
    this->precision_ = precision;


--- a/deploy/cpp_infer/include/ocr_rec.h
+++ b/deploy/cpp_infer/include/ocr_rec.h
@@ -44,7 +44,8 @@ public:
                          const int &gpu_id, const int &gpu_mem,
                          const int &cpu_math_library_num_threads,
                          const bool &use_mkldnn, const string &label_path,
-                          const bool &use_tensorrt, const std::string &precision,
+                          const bool &use_tensorrt,
+                          const std::string &precision,
                          const int &rec_batch_num) {
    this->use_gpu_ = use_gpu;
    this->gpu_id_ = gpu_id;
@@ -66,7 +67,8 @@ public:
  // Load Paddle inference model
  void LoadModel(const std::string &model_dir);

-  void Run(std::vector<cv::Mat> img_list, std::vector<double> *times);
+  void Run(std::vector<cv::Mat> img_list, std::vector<std::string> &rec_texts,
+           std::vector<float> &rec_text_scores, std::vector<double> *times);

 private:
  std::shared_ptr<Predictor> predictor_;
@@ -85,7 +87,7 @@ private:
  bool use_tensorrt_ = false;
  std::string precision_ = "fp32";
  int rec_batch_num_ = 6;
-    
+
  // pre-process
  CrnnResizeImg resize_op_;
  Normalize normalize_op_;

--- a/deploy/cpp_infer/include/utility.h
+++ b/deploy/cpp_infer/include/utility.h
@@ -38,7 +38,8 @@ public:

  static void
  VisualizeBboxes(const cv::Mat &srcimg,
-                  const std::vector<std::vector<std::vector<int>>> &boxes);
+                  const std::vector<std::vector<std::vector<int>>> &boxes,
+                  const std::string &save_path);

  template <class ForwardIterator>
  inline static size_t argmax(ForwardIterator first, ForwardIterator last) {
@@ -47,12 +48,13 @@ public:

  static void GetAllFiles(const char *dir_name,
                          std::vector<std::string> &all_inputs);
-    
+
  static cv::Mat GetRotateCropImage(const cv::Mat &srcimage,
-                          std::vector<std::vector<int>> box);
-    
-  static std::vector<int> argsort(const std::vector<float>& array);
+                                    std::vector<std::vector<int>> box);
+
+  static std::vector<int> argsort(const std::vector<float> &array);

+  static std::string basename(const std::string &filename);
 };

 } // namespace PaddleOCR
\ No newline at end of file
--- a/deploy/cpp_infer/readme.md
+++ b/deploy/cpp_infer/readme.md
-# 服务器端C++预测
-
-本章节介绍PaddleOCR 模型的的C++部署方法，与之对应的python预测部署方式参考[文档](../../doc/doc_ch/inference.md)。
-C++在性能计算上优于python，因此，在大多数CPU、GPU部署场景，多采用C++的部署方式，本节将介绍如何在Linux\Windows (CPU\GPU)环境下配置C++环境并完成
-PaddleOCR模型部署。
-
 - [服务器端C++预测](#服务器端c预测)
  - [1. 准备环境](#1-准备环境)
    - [1.0 运行准备](#10-运行准备)
@@ -18,6 +12,14 @@ PaddleOCR模型部署。
        - [1. 只调用检测：](#1-只调用检测)
        - [2. 只调用识别：](#2-只调用识别)
        - [3. 调用串联：](#3-调用串联)
+  - [3. FAQ](#3-faq)
+
+# 服务器端C++预测
+
+本章节介绍PaddleOCR 模型的的C++部署方法，与之对应的python预测部署方式参考[文档](../../doc/doc_ch/inference.md)。
+C++在性能计算上优于python，因此，在大多数CPU、GPU部署场景，多采用C++的部署方式，本节将介绍如何在Linux\Windows (CPU\GPU)环境下配置C++环境并完成
+PaddleOCR模型部署。
+

 <a name="1"></a>

@@ -28,7 +30,7 @@ PaddleOCR模型部署。
 ### 1.0 运行准备

 - Linux环境，推荐使用docker。
- Windows环境，目前支持基于`Visual Studio 2019 Community`进行编译。
+- Windows环境。

 * 该文档主要介绍基于Linux环境的PaddleOCR C++预测流程，如果需要在Windows下基于预测库进行C++预测，具体编译方法请参考[Windows下编译教程](./docs/windows_vs2019_build.md)

@@ -254,6 +256,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
 |gpu_mem|int|4000|申请的GPU内存|
 |cpu_math_library_num_threads|int|10|CPU预测时的线程数，在机器核数充足的情况下，该值越大，预测速度越快|
 |enable_mkldnn|bool|true|是否使用mkldnn库|
+|output|str|./output|可视化结果保存的路径|

 - 检测模型相关

@@ -265,7 +268,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
 |det_db_box_thresh|float|0.5|DB后处理过滤box的阈值，如果检测存在漏框情况，可酌情减小|
 |det_db_unclip_ratio|float|1.6|表示文本框的紧致程度，越小则文本框更靠近文本|
 |use_polygon_score|bool|false|是否使用多边形框计算bbox score，false表示使用矩形框计算。矩形框计算速度更快，多边形框对弯曲文本区域计算更准确。|
-|visualize|bool|true|是否对结果进行可视化，为1时，会在当前文件夹下保存文件名为`ocr_vis.png`的预测结果。|
+|visualize|bool|true|是否对结果进行可视化，为1时，预测结果会保存在`output`字段指定的文件夹下和输入图像同名的图像上。|

 - 方向分类器相关

@@ -280,10 +283,10 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
 |参数名称|类型|默认参数|意义|
 | :---: | :---: | :---: | :---: |
 |rec_model_dir|string|-|识别模型inference model地址|
-|char_list_file|string|../../ppocr/utils/ppocr_keys_v1.txt|字典文件|
+|rec_char_dict_path|string|../../ppocr/utils/ppocr_keys_v1.txt|字典文件|


-* PaddleOCR也支持多语言的预测，更多支持的语言和模型可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分，如果希望进行多语言预测，只需将修改`char_list_file`（字典文件路径）以及`rec_model_dir`（inference模型路径）字段即可。
+* PaddleOCR也支持多语言的预测，更多支持的语言和模型可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分，如果希望进行多语言预测，只需将修改`rec_char_dict_path`（字典文件路径）以及`rec_model_dir`（inference模型路径）字段即可。

 最终屏幕上会输出检测结果如下。

@@ -291,5 +294,6 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
    <img src="./imgs/cpp_infer_pred_12.png" width="600">
 </div>

+## 3. FAQ

-**注意：在使用Paddle预测库时，推荐使用2.0.0版本的预测库。**
+ 1.  遇到报错 `unable to access 'https://github.com/LDOUBLEV/AutoLog.git/': gnutls_handshake() failed: The TLS connection was non-properly terminated.`， 将 `deploy/cpp_infer/external-cmake/auto-log.cmake` 中的github地址改为 https://gitee.com/Double_V/AutoLog 地址即可。
--- a/deploy/cpp_infer/readme_en.md
+++ b/deploy/cpp_infer/readme_en.md
+- [Server-side C++ Inference](#server-side-c-inference)
+  - [1. Prepare the Environment](#1-prepare-the-environment)
+    - [Environment](#environment)
+    - [1.1 Compile OpenCV](#11-compile-opencv)
+    - [1.2 Compile or Download or the Paddle Inference Library](#12-compile-or-download-or-the-paddle-inference-library)
+      - [1.2.1 Direct download and installation](#121-direct-download-and-installation)
+      - [1.2.2 Compile the inference source code](#122-compile-the-inference-source-code)
+  - [2. Compile and Run the Demo](#2-compile-and-run-the-demo)
+    - [2.1 Export the inference model](#21-export-the-inference-model)
+    - [2.2 Compile PaddleOCR C++ inference demo](#22-compile-paddleocr-c-inference-demo)
+    - [Run the demo](#run-the-demo)
+        - [1. run det demo:](#1-run-det-demo)
+        - [2. run rec demo:](#2-run-rec-demo)
+        - [3. run system demo:](#3-run-system-demo)
+  - [3. FAQ](#3-faq)
+
 # Server-side C++ Inference

 This chapter introduces the C++ deployment steps of the PaddleOCR model. The corresponding Python predictive deployment method refers to [document](../../doc/doc_ch/inference.md).
@@ -10,6 +26,7 @@ This section will introduce how to configure the C++ environment and deploy Padd
 ### Environment

 - Linux, docker is recommended.
+- Windows.


 ### 1.1 Compile OpenCV
@@ -232,6 +249,7 @@ More parameters are as follows,
 |gpu_mem|int|4000|GPU memory requested|
 |cpu_math_library_num_threads|int|10|Number of threads when using CPU inference. When machine cores is enough, the large the value, the faster the inference speed|
 |enable_mkldnn|bool|true|Whether to use mkdlnn library|
+|output|str|./output|Path where visualization results are saved|

 - Detection related parameters

@@ -243,7 +261,7 @@ More parameters are as follows,
 |det_db_box_thresh|float|0.5|DB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate|
 |det_db_unclip_ratio|float|1.6|Indicates the compactness of the text box, the smaller the value, the closer the text box to the text|
 |use_polygon_score|bool|false|Whether to use polygon box to calculate bbox score, false means to use rectangle box to calculate. Use rectangular box to calculate faster, and polygonal box more accurate for curved text area.|
-|visualize|bool|true|Whether to visualize the results，when it is set as true, The prediction result will be save in the image file `./ocr_vis.png`.|
+|visualize|bool|true|Whether to visualize the results，when it is set as true, the prediction results will be saved in the folder specified by the `output` field on an image with the same name as the input image.|

 - Classifier related parameters

@@ -258,9 +276,9 @@ More parameters are as follows,
 |parameter|data type|default|meaning|
 | --- | --- | --- | --- |
 |rec_model_dir|string|-|Address of recognition inference model|
-|char_list_file|string|../../ppocr/utils/ppocr_keys_v1.txt|dictionary file|
+|rec_char_dict_path|string|../../ppocr/utils/ppocr_keys_v1.txt|dictionary file|

-* Multi-language inference is also supported in PaddleOCR, you can refer to [recognition tutorial](../../doc/doc_en/recognition_en.md) for more supported languages and models in PaddleOCR. Specifically, if you want to infer using multi-language models, you just need to modify values of `char_list_file` and `rec_model_dir`.
+* Multi-language inference is also supported in PaddleOCR, you can refer to [recognition tutorial](../../doc/doc_en/recognition_en.md) for more supported languages and models in PaddleOCR. Specifically, if you want to infer using multi-language models, you just need to modify values of `rec_char_dict_path` and `rec_model_dir`.


 The detection results will be shown on the screen, which is as follows.
@@ -270,6 +288,6 @@ The detection results will be shown on the screen, which is as follows.
 </div>


-### 2.3 Notes
+## 3. FAQ

-* Paddle 2.0.0 inference model library is recommended for this tutorial.
+ 1.  Encountered the error `unable to access 'https://github.com/LDOUBLEV/AutoLog.git/': gnutls_handshake() failed: The TLS connection was non-properly terminated.`, change the github address in `deploy/cpp_infer/external-cmake/auto-log.cmake` to the https://gitee.com/Double_V/AutoLog address.
--- a/deploy/cpp_infer/src/main.cpp
+++ b/deploy/cpp_infer/src/main.cpp
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "glog/logging.h"
 #include "omp.h"
 #include "opencv2/core.hpp"
 #include "opencv2/imgcodecs.hpp"
@@ -21,13 +20,13 @@
 #include <iomanip>
 #include <iostream>
 #include <ostream>
+#include <sys/stat.h>
 #include <vector>

 #include <cstring>
 #include <fstream>
 #include <numeric>

-#include <glog/logging.h>
 #include <include/ocr_cls.h>
 #include <include/ocr_det.h>
 #include <include/ocr_rec.h>
@@ -45,7 +44,7 @@ DEFINE_bool(enable_mkldnn, false, "Whether use mkldnn with CPU.");
 DEFINE_bool(use_tensorrt, false, "Whether use tensorrt.");
 DEFINE_string(precision, "fp32", "Precision be one of fp32/fp16/int8");
 DEFINE_bool(benchmark, false, "Whether use benchmark.");
-DEFINE_string(save_log_path, "./log_output/", "Save benchmark log path.");
+DEFINE_string(output, "./output/", "Save benchmark log path.");
 // detection related
 DEFINE_string(image_dir, "", "Dir of input image.");
 DEFINE_string(det_model_dir, "", "Path of det inference model.");
@@ -63,7 +62,7 @@ DEFINE_double(cls_thresh, 0.9, "Threshold of cls_thresh.");
 // recognition related
 DEFINE_string(rec_model_dir, "", "Path of rec inference model.");
 DEFINE_int32(rec_batch_num, 6, "rec_batch_num.");
-DEFINE_string(char_list_file, "../../ppocr/utils/ppocr_keys_v1.txt",
+DEFINE_string(rec_char_dict_path, "../../ppocr/utils/ppocr_keys_v1.txt",
              "Path of dictionary.");

 using namespace std;
@@ -86,11 +85,17 @@ int main_det(std::vector<cv::String> cv_all_img_names) {
                 FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
                 FLAGS_max_side_len, FLAGS_det_db_thresh,
                 FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
-                 FLAGS_use_polygon_score, FLAGS_use_dilation, FLAGS_visualize,
+                 FLAGS_use_polygon_score, FLAGS_use_dilation,
                 FLAGS_use_tensorrt, FLAGS_precision);

+  if (!PathExists(FLAGS_output)) {
+    mkdir(FLAGS_output.c_str(), 0777);
+  }
+
  for (int i = 0; i < cv_all_img_names.size(); ++i) {
-    //       LOG(INFO) << "The predict img: " << cv_all_img_names[i];
+    if (!FLAGS_benchmark) {
+      cout << "The predict img: " << cv_all_img_names[i] << endl;
+    }

    cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
    if (!srcimg.data) {
@@ -102,7 +107,11 @@ int main_det(std::vector<cv::String> cv_all_img_names) {
    std::vector<double> det_times;

    det.Run(srcimg, boxes, &det_times);
-
+    // visualization
+    if (FLAGS_visualize) {
+      std::string file_name = Utility::basename(cv_all_img_names[i]);
+      Utility::VisualizeBboxes(srcimg, boxes, FLAGS_output + "/" + file_name);
+    }
    time_info[0] += det_times[0];
    time_info[1] += det_times[1];
    time_info[2] += det_times[2];
@@ -130,20 +139,18 @@ int main_det(std::vector<cv::String> cv_all_img_names) {
 int main_rec(std::vector<cv::String> cv_all_img_names) {
  std::vector<double> time_info = {0, 0, 0};

-  std::string char_list_file = FLAGS_char_list_file;
+  std::string rec_char_dict_path = FLAGS_rec_char_dict_path;
  if (FLAGS_benchmark)
-    char_list_file = FLAGS_char_list_file.substr(6);
-  cout << "label file: " << char_list_file << endl;
+    rec_char_dict_path = FLAGS_rec_char_dict_path.substr(6);
+  cout << "label file: " << rec_char_dict_path << endl;

  CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
                     FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
-                     char_list_file, FLAGS_use_tensorrt, FLAGS_precision,
+                     rec_char_dict_path, FLAGS_use_tensorrt, FLAGS_precision,
                     FLAGS_rec_batch_num);

  std::vector<cv::Mat> img_list;
  for (int i = 0; i < cv_all_img_names.size(); ++i) {
-    LOG(INFO) << "The predict img: " << cv_all_img_names[i];
-
    cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
    if (!srcimg.data) {
      std::cerr << "[ERROR] image read failed! image path: "
@@ -152,8 +159,15 @@ int main_rec(std::vector<cv::String> cv_all_img_names) {
    }
    img_list.push_back(srcimg);
  }
+  std::vector<std::string> rec_texts(img_list.size(), "");
+  std::vector<float> rec_text_scores(img_list.size(), 0);
  std::vector<double> rec_times;
-  rec.Run(img_list, &rec_times);
+  rec.Run(img_list, rec_texts, rec_text_scores, &rec_times);
+  // output rec results
+  for (int i = 0; i < rec_texts.size(); i++) {
+    cout << "The predict img: " << cv_all_img_names[i] << "\t" << rec_texts[i]
+         << "\t" << rec_text_scores[i] << endl;
+  }
  time_info[0] += rec_times[0];
  time_info[1] += rec_times[1];
  time_info[2] += rec_times[2];
@@ -172,11 +186,15 @@ int main_system(std::vector<cv::String> cv_all_img_names) {
  std::vector<double> time_info_det = {0, 0, 0};
  std::vector<double> time_info_rec = {0, 0, 0};

+  if (!PathExists(FLAGS_output)) {
+    mkdir(FLAGS_output.c_str(), 0777);
+  }
+
  DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
                 FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
                 FLAGS_max_side_len, FLAGS_det_db_thresh,
                 FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
-                 FLAGS_use_polygon_score, FLAGS_use_dilation, FLAGS_visualize,
+                 FLAGS_use_polygon_score, FLAGS_use_dilation,
                 FLAGS_use_tensorrt, FLAGS_precision);

  Classifier *cls = nullptr;
@@ -186,18 +204,18 @@ int main_system(std::vector<cv::String> cv_all_img_names) {
                         FLAGS_cls_thresh, FLAGS_use_tensorrt, FLAGS_precision);
  }

-  std::string char_list_file = FLAGS_char_list_file;
+  std::string rec_char_dict_path = FLAGS_rec_char_dict_path;
  if (FLAGS_benchmark)
-    char_list_file = FLAGS_char_list_file.substr(6);
-  cout << "label file: " << char_list_file << endl;
+    rec_char_dict_path = FLAGS_rec_char_dict_path.substr(6);
+  cout << "label file: " << rec_char_dict_path << endl;

  CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
                     FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
-                     char_list_file, FLAGS_use_tensorrt, FLAGS_precision,
+                     rec_char_dict_path, FLAGS_use_tensorrt, FLAGS_precision,
                     FLAGS_rec_batch_num);

  for (int i = 0; i < cv_all_img_names.size(); ++i) {
-    LOG(INFO) << "The predict img: " << cv_all_img_names[i];
+    cout << "The predict img: " << cv_all_img_names[i] << endl;

    cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
    if (!srcimg.data) {
@@ -205,15 +223,21 @@ int main_system(std::vector<cv::String> cv_all_img_names) {
                << cv_all_img_names[i] << endl;
      exit(1);
    }
+    // det
    std::vector<std::vector<std::vector<int>>> boxes;
    std::vector<double> det_times;
    std::vector<double> rec_times;

    det.Run(srcimg, boxes, &det_times);
+    if (FLAGS_visualize) {
+      std::string file_name = Utility::basename(cv_all_img_names[i]);
+      Utility::VisualizeBboxes(srcimg, boxes, FLAGS_output + "/" + file_name);
+    }
    time_info_det[0] += det_times[0];
    time_info_det[1] += det_times[1];
    time_info_det[2] += det_times[2];

+    // rec
    std::vector<cv::Mat> img_list;
    for (int j = 0; j < boxes.size(); j++) {
      cv::Mat crop_img;
@@ -223,8 +247,14 @@ int main_system(std::vector<cv::String> cv_all_img_names) {
      }
      img_list.push_back(crop_img);
    }
-
-    rec.Run(img_list, &rec_times);
+    std::vector<std::string> rec_texts(img_list.size(), "");
+    std::vector<float> rec_text_scores(img_list.size(), 0);
+    rec.Run(img_list, rec_texts, rec_text_scores, &rec_times);
+    // output rec results
+    for (int i = 0; i < rec_texts.size(); i++) {
+      std::cout << i << "\t" << rec_texts[i] << "\t" << rec_text_scores[i]
+                << std::endl;
+    }
    time_info_rec[0] += rec_times[0];
    time_info_rec[1] += rec_times[1];
    time_info_rec[2] += rec_times[2];

--- a/deploy/cpp_infer/src/ocr_det.cpp
+++ b/deploy/cpp_infer/src/ocr_det.cpp
@@ -175,11 +175,6 @@ void DBDetector::Run(cv::Mat &img,
  std::chrono::duration<float> postprocess_diff =
      postprocess_end - postprocess_start;
  times->push_back(double(postprocess_diff.count() * 1000));
-
-  //// visualization
-  if (this->visualize_) {
-    Utility::VisualizeBboxes(srcimg, boxes);
-  }
 }

 } // namespace PaddleOCR
--- a/deploy/cpp_infer/src/ocr_rec.cpp
+++ b/deploy/cpp_infer/src/ocr_rec.cpp
@@ -17,6 +17,8 @@
 namespace PaddleOCR {

 void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
+                         std::vector<std::string> &rec_texts,
+                         std::vector<float> &rec_text_scores,
                         std::vector<double> *times) {
  std::chrono::duration<float> preprocess_diff =
      std::chrono::steady_clock::now() - std::chrono::steady_clock::now();
@@ -86,7 +88,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
    // ctc decode
    auto postprocess_start = std::chrono::steady_clock::now();
    for (int m = 0; m < predict_shape[0]; m++) {
-      std::vector<std::string> str_res;
+      std::string str_res;
      int argmax_idx;
      int last_index = 0;
      float score = 0.f;
@@ -104,17 +106,16 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
        if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) {
          score += max_value;
          count += 1;
-          str_res.push_back(label_list_[argmax_idx]);
+          str_res += label_list_[argmax_idx];
        }
        last_index = argmax_idx;
      }
      score /= count;
-      if (isnan(score))
+      if (isnan(score)) {
        continue;
-      for (int i = 0; i < str_res.size(); i++) {
-        std::cout << str_res[i];
      }
-      std::cout << "\tscore: " << score << std::endl;
+      rec_texts[indices[beg_img_no + m]] = str_res;
+      rec_text_scores[indices[beg_img_no + m]] = score;
    }
    auto postprocess_end = std::chrono::steady_clock::now();
    postprocess_diff += postprocess_end - postprocess_start;

--- a/deploy/cpp_infer/src/utility.cpp
+++ b/deploy/cpp_infer/src/utility.cpp
@@ -40,7 +40,8 @@ std::vector<std::string> Utility::ReadDict(const std::string &path) {

 void Utility::VisualizeBboxes(
    const cv::Mat &srcimg,
-    const std::vector<std::vector<std::vector<int>>> &boxes) {
+    const std::vector<std::vector<std::vector<int>>> &boxes,
+    const std::string &save_path) {
  cv::Mat img_vis;
  srcimg.copyTo(img_vis);
  for (int n = 0; n < boxes.size(); n++) {
@@ -54,8 +55,8 @@ void Utility::VisualizeBboxes(
    cv::polylines(img_vis, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
  }

-  cv::imwrite("./ocr_vis.png", img_vis);
-  std::cout << "The detection visualized image saved in ./ocr_vis.png"
+  cv::imwrite(save_path, img_vis);
+  std::cout << "The detection visualized image saved in " + save_path
            << std::endl;
 }

@@ -67,7 +68,7 @@ void Utility::GetAllFiles(const char *dir_name,
    return;
  }
  struct stat s;
-  lstat(dir_name, &s);
+  stat(dir_name, &s);
  if (!S_ISDIR(s.st_mode)) {
    std::cout << "dir_name is not a valid directory !" << std::endl;
    all_inputs.push_back(dir_name);
@@ -93,7 +94,7 @@ void Utility::GetAllFiles(const char *dir_name,
 }

 cv::Mat Utility::GetRotateCropImage(const cv::Mat &srcimage,
-                            std::vector<std::vector<int>> box) {
+                                    std::vector<std::vector<int>> box) {
  cv::Mat image;
  srcimage.copyTo(image);
  std::vector<std::vector<int>> points = box;
@@ -147,17 +148,52 @@ cv::Mat Utility::GetRotateCropImage(const cv::Mat &srcimage,
  }
 }

-std::vector<int> Utility::argsort(const std::vector<float>& array)
-{
-    const int array_len(array.size());
-    std::vector<int> array_index(array_len, 0);
-    for (int i = 0; i < array_len; ++i)
-        array_index[i] = i;
+std::vector<int> Utility::argsort(const std::vector<float> &array) {
+  const int array_len(array.size());
+  std::vector<int> array_index(array_len, 0);
+  for (int i = 0; i < array_len; ++i)
+    array_index[i] = i;

-    std::sort(array_index.begin(), array_index.end(),
-        [&array](int pos1, int pos2) {return (array[pos1] < array[pos2]); });
+  std::sort(
+      array_index.begin(), array_index.end(),
+      [&array](int pos1, int pos2) { return (array[pos1] < array[pos2]); });

-    return array_index;
+  return array_index;
+}
+
+std::string Utility::basename(const std::string &filename) {
+  if (filename.empty()) {
+    return "";
+  }
+
+  auto len = filename.length();
+  auto index = filename.find_last_of("/\\");
+
+  if (index == std::string::npos) {
+    return filename;
+  }
+
+  if (index + 1 >= len) {
+
+    len--;
+    index = filename.substr(0, len).find_last_of("/\\");
+
+    if (len == 0) {
+      return filename;
+    }
+
+    if (index == 0) {
+      return filename.substr(1, len - 1);
+    }
+
+    if (index == std::string::npos) {
+      return filename.substr(0, len);
+    }
+
+    return filename.substr(index + 1, len - index - 1);
+  }
+
+  return filename.substr(index + 1, len - index);
 }

 } // namespace PaddleOCR
\ No newline at end of file
--- a/deploy/hubserving/ocr_cls/__init__.py
+++ b/deploy/hubserving/ocr_cls/__init__.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
\ No newline at end of file
--- a/deploy/hubserving/ocr_cls/module.py
+++ b/deploy/hubserving/ocr_cls/module.py
-# -*- coding:utf-8 -*-
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -7,7 +20,7 @@ import os
 import sys
 sys.path.insert(0, ".")
 import copy
-
+import paddlehub
 from paddlehub.common.logger import logger
 from paddlehub.module.module import moduleinfo, runnable, serving
 import cv2
@@ -22,10 +35,10 @@ from deploy.hubserving.ocr_cls.params import read_params
 @moduleinfo(
    name="ocr_cls",
    version="1.0.0",
-    summary="ocr recognition service",
+    summary="ocr angle cls service",
    author="paddle-dev",
    author_email="paddle-dev@baidu.com",
-    type="cv/text_recognition")
+    type="cv/text_angle_cls")
 class OCRCls(hub.Module):
    def _initialize(self, use_gpu=False, enable_mkldnn=False):
        """
@@ -128,6 +141,7 @@ class OCRCls(hub.Module):

 if __name__ == '__main__':
    ocr = OCRCls()
+    ocr._initialize()
    image_path = [
        './doc/imgs_words/ch/word_1.jpg',
        './doc/imgs_words/ch/word_2.jpg',

--- a/deploy/hubserving/ocr_cls/params.py
+++ b/deploy/hubserving/ocr_cls/params.py
-# -*- coding:utf-8 -*-
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/deploy/hubserving/ocr_det/__init__.py
+++ b/deploy/hubserving/ocr_det/__init__.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
\ No newline at end of file
--- a/deploy/hubserving/ocr_det/module.py
+++ b/deploy/hubserving/ocr_det/module.py
-# -*- coding:utf-8 -*-
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -8,7 +21,7 @@ import sys
 sys.path.insert(0, ".")

 import copy
-
+import paddlehub
 from paddlehub.common.logger import logger
 from paddlehub.module.module import moduleinfo, runnable, serving
 import cv2
@@ -27,7 +40,7 @@ from deploy.hubserving.ocr_system.params import read_params
    summary="ocr detection service",
    author="paddle-dev",
    author_email="paddle-dev@baidu.com",
-    type="cv/text_recognition")
+    type="cv/text_detection")
 class OCRDet(hub.Module):
    def _initialize(self, use_gpu=False, enable_mkldnn=False):
        """
@@ -126,6 +139,7 @@ class OCRDet(hub.Module):

 if __name__ == '__main__':
    ocr = OCRDet()
+    ocr._initialize()
    image_path = [
        './doc/imgs/11.jpg',
        './doc/imgs/12.jpg',

--- a/deploy/hubserving/ocr_det/params.py
+++ b/deploy/hubserving/ocr_det/params.py
-# -*- coding:utf-8 -*-
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/deploy/hubserving/ocr_rec/__init__.py
+++ b/deploy/hubserving/ocr_rec/__init__.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
\ No newline at end of file
--- a/deploy/hubserving/ocr_rec/module.py
+++ b/deploy/hubserving/ocr_rec/module.py
-# -*- coding:utf-8 -*-
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -7,7 +20,7 @@ import os
 import sys
 sys.path.insert(0, ".")
 import copy
-
+import paddlehub
 from paddlehub.common.logger import logger
 from paddlehub.module.module import moduleinfo, runnable, serving
 import cv2
@@ -128,6 +141,7 @@ class OCRRec(hub.Module):

 if __name__ == '__main__':
    ocr = OCRRec()
+    ocr._initialize()
    image_path = [
        './doc/imgs_words/ch/word_1.jpg',
        './doc/imgs_words/ch/word_2.jpg',

--- a/deploy/hubserving/ocr_rec/params.py
+++ b/deploy/hubserving/ocr_rec/params.py
-# -*- coding:utf-8 -*-
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/deploy/hubserving/ocr_system/__init__.py
+++ b/deploy/hubserving/ocr_system/__init__.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
\ No newline at end of file
--- a/deploy/hubserving/ocr_system/module.py
+++ b/deploy/hubserving/ocr_system/module.py
-# -*- coding:utf-8 -*-
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -9,7 +22,7 @@ sys.path.insert(0, ".")
 import copy

 import time
-
+import paddlehub
 from paddlehub.common.logger import logger
 from paddlehub.module.module import moduleinfo, runnable, serving
 import cv2
@@ -28,7 +41,7 @@ from deploy.hubserving.ocr_system.params import read_params
    summary="ocr system service",
    author="paddle-dev",
    author_email="paddle-dev@baidu.com",
-    type="cv/text_recognition")
+    type="cv/PP-OCR_system")
 class OCRSystem(hub.Module):
    def _initialize(self, use_gpu=False, enable_mkldnn=False):
        """
@@ -134,6 +147,7 @@ class OCRSystem(hub.Module):

 if __name__ == '__main__':
    ocr = OCRSystem()
+    ocr._initialize()
    image_path = [
        './doc/imgs/11.jpg',
        './doc/imgs/12.jpg',

--- a/deploy/hubserving/ocr_system/params.py
+++ b/deploy/hubserving/ocr_system/params.py
-# -*- coding:utf-8 -*-
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/deploy/hubserving/readme.md
+++ b/deploy/hubserving/readme.md
 [English](readme_en.md) | 简体中文

+- [基于PaddleHub Serving的服务部署](#基于paddlehub-serving的服务部署)
+  - [1. 近期更新](#1-近期更新)
+  - [2. 快速启动服务](#2-快速启动服务)
+    - [2.1 准备环境](#21-准备环境)
+    - [2.2 下载推理模型](#22-下载推理模型)
+    - [2.3 安装服务模块](#23-安装服务模块)
+    - [2.4 启动服务](#24-启动服务)
+      - [2.4.1. 命令行命令启动（仅支持CPU）](#241-命令行命令启动仅支持cpu)
+      - [2.4.2 配置文件启动（支持CPU、GPU）](#242-配置文件启动支持cpugpu)
+  - [3. 发送预测请求](#3-发送预测请求)
+  - [4. 返回结果格式说明](#4-返回结果格式说明)
+  - [5. 自定义修改服务模块](#5-自定义修改服务模块)
+
+
 PaddleOCR提供2种服务部署方式：
 - 基于PaddleHub Serving的部署：代码路径为"`./deploy/hubserving`"，按照本教程使用；
 - 基于PaddleServing的部署：代码路径为"`./deploy/pdserving`"，使用方法参考[文档](../../deploy/pdserving/README_CN.md)。

 # 基于PaddleHub Serving的服务部署

-hubserving服务部署目录下包括检测、识别、2阶段串联三种服务包，请根据需求选择相应的服务包进行安装和启动。目录结构如下：
+hubserving服务部署目录下包括文本检测、文本方向分类，文本识别、文本检测+文本方向分类+文本识别3阶段串联，表格识别和PP-Structure六种服务包，请根据需求选择相应的服务包进行安装和启动。目录结构如下：
 ```
 deploy/hubserving/
-  └─  ocr_cls     分类模块服务包
-  └─  ocr_det     检测模块服务包
-  └─  ocr_rec     识别模块服务包
-  └─  ocr_system  检测+识别串联服务包
+  └─  ocr_cls     文本方向分类模块服务包
+  └─  ocr_det     文本检测模块服务包
+  └─  ocr_rec     文本识别模块服务包
+  └─  ocr_system  文本检测+文本方向分类+文本识别串联服务包
+  └─  structure_table  表格识别服务包
+  └─  structure_system  PP-Structure服务包
 ```

 每个服务包下包含3个文件。以2阶段串联服务包为例，目录如下：
@@ -23,28 +39,32 @@ deploy/hubserving/ocr_system/
  └─  module.py      主模块，必选，包含服务的完整逻辑
  └─  params.py      参数文件，必选，包含模型路径、前后处理参数等参数
 ```
+## 1. 近期更新

-## 快速启动服务
+* 2022.03.30 新增PP-Structure和表格识别两种服务。
+
+## 2. 快速启动服务
 以下步骤以检测+识别2阶段串联服务为例，如果只需要检测服务或识别服务，替换相应文件路径即可。
-### 1. 准备环境
+### 2.1 准备环境
 ```shell
 # 安装paddlehub  
 # paddlehub 需要 python>3.6.2
-pip3 install paddlehub==2.1.0 --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple
+pip3 install paddlehub==2.1.0 --upgrade -i https://mirror.baidu.com/pypi/simple
 ```

-### 2. 下载推理模型
+### 2.2 下载推理模型
 安装服务模块前，需要准备推理模型并放到正确路径。默认使用的是PP-OCRv2模型，默认模型路径为：
 ```
 检测模型：./inference/ch_PP-OCRv2_det_infer/
 识别模型：./inference/ch_PP-OCRv2_rec_infer/
 方向分类器：./inference/ch_ppocr_mobile_v2.0_cls_infer/
+表格结构识别模型：./inference/en_ppocr_mobile_v2.0_table_structure_infer/
 ```  

-**模型路径可在`params.py`中查看和修改。** 更多模型可以从PaddleOCR提供的[模型库](../../doc/doc_ch/models_list.md)下载，也可以替换成自己训练转换好的模型。
+**模型路径可在`params.py`中查看和修改。** 更多模型可以从PaddleOCR提供的模型库[PP-OCR](../../doc/doc_ch/models_list.md)和[PP-Structure](../../ppstructure/docs/models_list.md)下载，也可以替换成自己训练转换好的模型。

-### 3. 安装服务模块
-PaddleOCR提供3种服务模块，根据需要安装所需模块。
+### 2.3 安装服务模块
+PaddleOCR提供5种服务模块，根据需要安装所需模块。

 * 在Linux环境下，安装示例如下：
 ```shell
@@ -59,6 +79,12 @@ hub install deploy/hubserving/ocr_rec/

 # 或，安装检测+识别串联服务模块：  
 hub install deploy/hubserving/ocr_system/
+
+# 或，安装表格识别服务模块：  
+hub install deploy/hubserving/structure_table/
+
+# 或，安装PP-Structure服务模块：  
+hub install deploy/hubserving/structure_system/
 ```

 * 在Windows环境下(文件夹的分隔符为`\`)，安装示例如下：
@@ -74,10 +100,16 @@ hub install deploy\hubserving\ocr_rec\

 # 或，安装检测+识别串联服务模块：
 hub install deploy\hubserving\ocr_system\
+
+# 或，安装表格识别服务模块：
+hub install deploy\hubserving\structure_table\
+
+# 或，安装PP-Structure服务模块：  
+hub install deploy\hubserving\structure_system\
 ```

-### 4. 启动服务
-#### 方式1. 命令行命令启动（仅支持CPU）
+### 2.4 启动服务
+#### 2.4.1. 命令行命令启动（仅支持CPU）
 **启动命令：**  
 ```shell
 $ hub serving start --modules [Module1==Version1, Module2==Version2, ...] \
@@ -89,7 +121,7 @@ $ hub serving start --modules [Module1==Version1, Module2==Version2, ...] \
 **参数：**  

 |参数|用途|  
-|-|-|  
+|---|---|  
 |--modules/-m|PaddleHub Serving预安装模型，以多个Module==Version键值对的形式列出<br>*`当不指定Version时，默认选择最新版本`*|  
 |--port/-p|服务端口，默认为8866|  
 |--use_multiprocess|是否启用并发方式，默认为单进程方式，推荐多核CPU机器使用此方式<br>*`Windows操作系统只支持单进程方式`*|
@@ -99,7 +131,7 @@ $ hub serving start --modules [Module1==Version1, Module2==Version2, ...] \

 这样就完成了一个服务化API的部署，使用默认端口号8866。

-#### 方式2. 配置文件启动（支持CPU、GPU）
+#### 2.4.2 配置文件启动（支持CPU、GPU）
 **启动命令：**  
 ```hub serving start -c config.json```  

@@ -136,7 +168,7 @@ export CUDA_VISIBLE_DEVICES=3
 hub serving start -c deploy/hubserving/ocr_system/config.json
 ```  

-## 发送预测请求
+## 3. 发送预测请求
 配置好服务端，可使用以下命令发送预测请求，获取预测结果:  

 ```python tools/test_hubserving.py server_url image_path```  
@@ -144,38 +176,46 @@ hub serving start -c deploy/hubserving/ocr_system/config.json
 需要给脚本传递2个参数：  
 - **server_url**：服务地址，格式为  
 `http://[ip_address]:[port]/predict/[module_name]`  
-例如，如果使用配置文件启动分类，检测、识别，检测+分类+识别3阶段服务，那么发送请求的url将分别是：  
+例如，如果使用配置文件启动分类，检测、识别，检测+分类+识别3阶段，表格识别和PP-Structure服务，那么发送请求的url将分别是：  
 `http://127.0.0.1:8865/predict/ocr_det`  
 `http://127.0.0.1:8866/predict/ocr_cls`  
 `http://127.0.0.1:8867/predict/ocr_rec`  
 `http://127.0.0.1:8868/predict/ocr_system`  
- **image_path**：测试图像路径，可以是单张图片路径，也可以是图像集合目录路径  
+`http://127.0.0.1:8869/predict/structure_table`  
+`http://127.0.0.1:8870/predict/structure_system`  
+- **image_dir**：测试图像路径，可以是单张图片路径，也可以是图像集合目录路径  
+- **visualize**：是否可视化结果，默认为False  
+- **output**：可视化结果保存路径，默认为`./hubserving_result`

 访问示例：  
-```python tools/test_hubserving.py http://127.0.0.1:8868/predict/ocr_system ./doc/imgs/```
+```python tools/test_hubserving.py --server_url=http://127.0.0.1:8868/predict/ocr_system --image_dir./doc/imgs/ --visualize=false```

-## 返回结果格式说明
+## 4. 返回结果格式说明
 返回结果为列表（list），列表中的每一项为词典（dict），词典一共可能包含3种字段，信息如下：

 |字段名称|数据类型|意义|
-|----|----|----|
+|---|---|---|
 |angle|str|文本角度|
 |text|str|文本内容|
 |confidence|float| 文本识别置信度或文本角度分类置信度|
 |text_region|list|文本位置坐标|
+|html|str|表格的html字符串|
+|regions|list|版面分析+表格识别+OCR的结果，每一项为一个list，包含表示区域坐标的`bbox`，区域类型的`type`和区域结果的`res`三个字段|

 不同模块返回的字段不同，如，文本识别服务模块返回结果不含`text_region`字段，具体信息如下：

-| 字段名/模块名 | ocr_det | ocr_cls | ocr_rec | ocr_system |
-|  ----  |  ----  |  ----  |  ----  |  ----  |
-|angle| | ✔ | | ✔ |
-|text| | |✔|✔|
-|confidence| |✔ |✔|✔|
-|text_region| ✔| | |✔ |
+| 字段名/模块名 | ocr_det | ocr_cls | ocr_rec | ocr_system | structure_table | structure_system |
+|  ---  |  ---  |  ---  |  ---  |  ---  | ---  |---  |
+|angle| | ✔ | | ✔ | ||
+|text| | |✔|✔| | ✔ |
+|confidence| |✔ |✔| | | ✔|
+|text_region| ✔| | |✔ | | ✔|
+|html| | | | |✔ |✔|
+|regions| | | | |✔ |✔ |

 **说明：** 如果需要增加、删除、修改返回字段，可在相应模块的`module.py`文件中进行修改，完整流程参考下一节自定义修改服务模块。

-## 自定义修改服务模块
+## 5. 自定义修改服务模块
 如果需要修改服务逻辑，你一般需要操作以下步骤（以修改`ocr_system`为例）：  

 - 1、 停止服务  

--- a/deploy/hubserving/readme_en.md
+++ b/deploy/hubserving/readme_en.md
 English | [简体中文](readme.md)

+- [Service deployment based on PaddleHub Serving](#service-deployment-based-on-paddlehub-serving)
+  - [1. Update](#1-update)
+  - [2. Quick start service](#2-quick-start-service)
+    - [2.1 Prepare the environment](#21-prepare-the-environment)
+    - [2.2 Download inference model](#22-download-inference-model)
+    - [2.3 Install Service Module](#23-install-service-module)
+    - [2.4 Start service](#24-start-service)
+      - [2.4.1 Start with command line parameters (CPU only)](#241-start-with-command-line-parameters-cpu-only)
+      - [2.4.2 Start with configuration file（CPU、GPU）](#242-start-with-configuration-filecpugpu)
+  - [3. Send prediction requests](#3-send-prediction-requests)
+  - [4. Returned result format](#4-returned-result-format)
+  - [5. User defined service module modification](#5-user-defined-service-module-modification)
+
+
 PaddleOCR provides 2 service deployment methods:
 - Based on **PaddleHub Serving**: Code path is "`./deploy/hubserving`". Please follow this tutorial.
 - Based on **PaddleServing**: Code path is "`./deploy/pdserving`". Please refer to the [tutorial](../../deploy/pdserving/README.md) for usage.

 # Service deployment based on PaddleHub Serving  

-The hubserving service deployment directory includes three service packages: detection, recognition, and two-stage series connection. Please select the corresponding service package to install and start service according to your needs. The directory is as follows:  
+The hubserving service deployment directory includes six service packages: text detection, text angle class, text recognition, text detection+text angle class+text recognition three-stage series connection, table recognition and PP-Structure. Please select the corresponding service package to install and start service according to your needs. The directory is as follows:  
 ```
 deploy/hubserving/
-  └─  ocr_det     detection module service package
-  └─  ocr_cls     angle class module service package
-  └─  ocr_rec     recognition module service package
-  └─  ocr_system  two-stage series connection service package
+  └─  ocr_det     text detection module service package
+  └─  ocr_cls     text angle class module service package
+  └─  ocr_rec     text recognition module service package
+  └─  ocr_system  text detection+text angle class+text recognition three-stage series connection service package
+  └─  structure_table  table recognition service package
+  └─  structure_system  PP-Structure service package
 ```

 Each service pack contains 3 files. Take the 2-stage series connection service package as an example, the directory is as follows:  
@@ -23,43 +39,54 @@ deploy/hubserving/ocr_system/
  └─  module.py      Main module file, required, contains the complete logic of the service
  └─  params.py      Parameter file, required, including parameters such as model path, pre- and post-processing parameters
 ```
+## 1. Update
+
+* 2022.03.30 add PP-Structure and table recognition services。

-## Quick start service
+
+## 2. Quick start service
 The following steps take the 2-stage series service as an example. If only the detection service or recognition service is needed, replace the corresponding file path.

-### 1. Prepare the environment
+### 2.1 Prepare the environment
 ```shell
 # Install paddlehub  
 # python>3.6.2 is required bt paddlehub
 pip3 install paddlehub==2.1.0 --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple
 ```

-### 2. Download inference model
+### 2.2 Download inference model
 Before installing the service module, you need to prepare the inference model and put it in the correct path. By default, the PP-OCRv2 models are used, and the default model path is:  
 ```
-detection model: ./inference/ch_PP-OCRv2_det_infer/
-recognition model: ./inference/ch_PP-OCRv2_rec_infer/
-text direction classifier: ./inference/ch_ppocr_mobile_v2.0_cls_infer/
+text detection model: ./inference/ch_PP-OCRv2_det_infer/
+text recognition model: ./inference/ch_PP-OCRv2_rec_infer/
+text angle classifier: ./inference/ch_ppocr_mobile_v2.0_cls_infer/
+tanle recognition: ./inference/en_ppocr_mobile_v2.0_table_structure_infer/
 ```  

 **The model path can be found and modified in `params.py`.** More models provided by PaddleOCR can be obtained from the [model library](../../doc/doc_en/models_list_en.md). You can also use models trained by yourself.

-### 3. Install Service Module
-PaddleOCR provides 3 kinds of service modules, install the required modules according to your needs.
+### 2.3 Install Service Module
+PaddleOCR provides 5 kinds of service modules, install the required modules according to your needs.

 * On Linux platform, the examples are as follows.
 ```shell
-# Install the detection service module:
+# Install the text detection service module:
 hub install deploy/hubserving/ocr_det/

-# Or, install the angle class service module:
+# Or, install the text angle class service module:
 hub install deploy/hubserving/ocr_cls/

-# Or, install the recognition service module:
+# Or, install the text recognition service module:
 hub install deploy/hubserving/ocr_rec/

 # Or, install the 2-stage series service module:
 hub install deploy/hubserving/ocr_system/
+
+# Or install table recognition service module
+hub install deploy/hubserving/structure_table/
+
+# Or install PP-Structure service module
+hub install deploy/hubserving/structure_system/
 ```

 * On Windows platform, the examples are as follows.
@@ -75,10 +102,16 @@ hub install deploy\hubserving\ocr_rec\

 # Or, install the 2-stage series service module:
 hub install deploy\hubserving\ocr_system\
+
+# Or install table recognition service module
+hub install deploy/hubserving/structure_table/
+
+# Or install PP-Structure service module
+hub install deploy\hubserving\structure_system\
 ```

-### 4. Start service
-#### Way 1. Start with command line parameters (CPU only)
+### 2.4 Start service
+#### 2.4.1 Start with command line parameters (CPU only)

 **start command：**  
 ```shell
@@ -90,7 +123,7 @@ $ hub serving start --modules [Module1==Version1, Module2==Version2, ...] \
 **parameters：**  

 |parameters|usage|  
-|-|-|  
+|---|---|  
 |--modules/-m|PaddleHub Serving pre-installed model, listed in the form of multiple Module==Version key-value pairs<br>*`When Version is not specified, the latest version is selected by default`*|
 |--port/-p|Service port, default is 8866|  
 |--use_multiprocess|Enable concurrent mode, the default is single-process mode, this mode is recommended for multi-core CPU machines<br>*`Windows operating system only supports single-process mode`*|
@@ -103,7 +136,7 @@ hub serving start -m ocr_system

 This completes the deployment of a service API, using the default port number 8866.  

-#### Way 2. Start with configuration file（CPU、GPU）
+#### 2.4.2 Start with configuration file（CPU、GPU）
 **start command：**  
 ```shell
 hub serving start --config/-c config.json
@@ -140,7 +173,7 @@ export CUDA_VISIBLE_DEVICES=3
 hub serving start -c deploy/hubserving/ocr_system/config.json
 ```  

-## Send prediction requests
+## 3. Send prediction requests
 After the service starts, you can use the following command to send a prediction request to obtain the prediction result:  
 ```shell
 python tools/test_hubserving.py server_url image_path
@@ -149,19 +182,24 @@ python tools/test_hubserving.py server_url image_path
 Two parameters need to be passed to the script:
 - **server_url**：service address，format of which is
 `http://[ip_address]:[port]/predict/[module_name]`  
-For example, if the detection, recognition and 2-stage serial services are started with provided configuration files, the respective `server_url` would be:  
+For example, if using the configuration file to start the text angle classification, text detection, text recognition, detection+classification+recognition 3 stages, table recognition and PP-Structure service, then the `server_url` to send the request will be:
+
 `http://127.0.0.1:8865/predict/ocr_det`  
 `http://127.0.0.1:8866/predict/ocr_cls`  
 `http://127.0.0.1:8867/predict/ocr_rec`  
 `http://127.0.0.1:8868/predict/ocr_system`  
- **image_path**：Test image path, can be a single image path or an image directory path
+`http://127.0.0.1:8869/predict/structure_table`
+`http://127.0.0.1:8870/predict/structure_system`  
+- **image_dir**：Test image path, can be a single image path or an image directory path
+- **visualize**：Whether to visualize the results, the default value is False
+- **output**：The floder to save Visualization result, default value is `./hubserving_result`

 **Eg.**
 ```shell
-python tools/test_hubserving.py http://127.0.0.1:8868/predict/ocr_system ./doc/imgs/
+python tools/test_hubserving.py --server_url=http://127.0.0.1:8868/predict/ocr_system --image_dir./doc/imgs/ --visualize=false`
 ```

-## Returned result format
+## 4. Returned result format
 The returned result is a list. Each item in the list is a dict. The dict may contain three fields. The information is as follows:

 |field name|data type|description|
@@ -170,19 +208,23 @@ The returned result is a list. Each item in the list is a dict. The dict may con
 |text|str|text content|
 |confidence|float|text recognition confidence|
 |text_region|list|text location coordinates|
+|html|str|table html str|
+|regions|list|The result of layout analysis + table recognition + OCR, each item is a list, including `bbox` indicating area coordinates, `type` of area type and `res` of area results|

 The fields returned by different modules are different. For example, the results returned by the text recognition service module do not contain `text_region`. The details are as follows:

-| field name/module name | ocr_det | ocr_cls | ocr_rec | ocr_system |
-|  ----  |  ----  |  ----  |  ----  |  ----  |
-|angle| | ✔ | | ✔ |
-|text| | |✔|✔|
-|confidence| |✔ |✔|✔|
-|text_region| ✔| | |✔ |
+| field name/module name | ocr_det | ocr_cls | ocr_rec | ocr_system | structure_table | structure_system |
+|  ---  |  ---  |  ---  |  ---  |  ---  | ---  |---  |
+|angle| | ✔ | | ✔ | ||
+|text| | |✔|✔| | ✔ |
+|confidence| |✔ |✔| | | ✔|
+|text_region| ✔| | |✔ | | ✔|
+|html| | | | |✔ |✔|
+|regions| | | | |✔ |✔ |

 **Note：** If you need to add, delete or modify the returned fields, you can modify the file `module.py` of the corresponding module. For the complete process, refer to the user-defined modification service module in the next section.

-## User defined service module modification
+## 5. User defined service module modification
 If you need to modify the service logic, the following steps are generally required (take the modification of `ocr_system` for example):

 - 1. Stop service

--- a/deploy/hubserving/structure_system/__init__.py
+++ b/deploy/hubserving/structure_system/__init__.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
\ No newline at end of file
--- a/deploy/hubserving/structure_system/config.json
+++ b/deploy/hubserving/structure_system/config.json
+{
+    "modules_info": {
+        "structure_system": {
+            "init_args": {
+                "version": "1.0.0",
+                "use_gpu": true
+            },
+            "predict_args": {
+            }
+        }
+    },
+    "port": 8870,
+    "use_multiprocess": false,
+    "workers": 2
+}
+
--- a/deploy/hubserving/structure_system/module.py
+++ b/deploy/hubserving/structure_system/module.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+sys.path.insert(0, ".")
+import copy
+
+import time
+import paddlehub
+from paddlehub.common.logger import logger
+from paddlehub.module.module import moduleinfo, runnable, serving
+import cv2
+import numpy as np
+import paddlehub as hub
+
+from tools.infer.utility import base64_to_cv2
+from ppstructure.predict_system import StructureSystem as PPStructureSystem
+from ppstructure.predict_system import save_structure_res
+from ppstructure.utility import parse_args
+from deploy.hubserving.structure_system.params import read_params
+
+
+@moduleinfo(
+    name="structure_system",
+    version="1.0.0",
+    summary="PP-Structure system service",
+    author="paddle-dev",
+    author_email="paddle-dev@baidu.com",
+    type="cv/structure_system")
+class StructureSystem(hub.Module):
+    def _initialize(self, use_gpu=False, enable_mkldnn=False):
+        """
+        initialize with the necessary elements
+        """
+        cfg = self.merge_configs()
+
+        cfg.use_gpu = use_gpu
+        if use_gpu:
+            try:
+                _places = os.environ["CUDA_VISIBLE_DEVICES"]
+                int(_places[0])
+                print("use gpu: ", use_gpu)
+                print("CUDA_VISIBLE_DEVICES: ", _places)
+                cfg.gpu_mem = 8000
+            except:
+                raise RuntimeError(
+                    "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id."
+                )
+        cfg.ir_optim = True
+        cfg.enable_mkldnn = enable_mkldnn
+
+        self.table_sys = PPStructureSystem(cfg)
+
+    def merge_configs(self):
+        # deafult cfg
+        backup_argv = copy.deepcopy(sys.argv)
+        sys.argv = sys.argv[:1]
+        cfg = parse_args()
+
+        update_cfg_map = vars(read_params())
+
+        for key in update_cfg_map:
+            cfg.__setattr__(key, update_cfg_map[key])
+
+        sys.argv = copy.deepcopy(backup_argv)
+        return cfg
+
+    def read_images(self, paths=[]):
+        images = []
+        for img_path in paths:
+            assert os.path.isfile(
+                img_path), "The {} isn't a valid file.".format(img_path)
+            img = cv2.imread(img_path)
+            if img is None:
+                logger.info("error in loading image:{}".format(img_path))
+                continue
+            images.append(img)
+        return images
+
+    def predict(self, images=[], paths=[]):
+        """
+        Get the chinese texts in the predicted images.
+        Args:
+            images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths
+            paths (list[str]): The paths of images. If paths not images
+        Returns:
+            res (list): The result of chinese texts and save path of images.
+        """
+
+        if images != [] and isinstance(images, list) and paths == []:
+            predicted_data = images
+        elif images == [] and isinstance(paths, list) and paths != []:
+            predicted_data = self.read_images(paths)
+        else:
+            raise TypeError("The input data is inconsistent with expectations.")
+
+        assert predicted_data != [], "There is not any image to be predicted. Please check the input data."
+
+        all_results = []
+        for img in predicted_data:
+            if img is None:
+                logger.info("error in loading image")
+                all_results.append([])
+                continue
+            starttime = time.time()
+            res = self.table_sys(img)
+            elapse = time.time() - starttime
+            logger.info("Predict time: {}".format(elapse))
+
+            # parse result
+            res_final = []
+            for region in res:
+                region.pop('img')
+                res_final.append(region)
+            all_results.append({'regions': res_final})
+        return all_results
+
+    @serving
+    def serving_method(self, images, **kwargs):
+        """
+        Run as a service.
+        """
+        images_decode = [base64_to_cv2(image) for image in images]
+        results = self.predict(images_decode, **kwargs)
+        return results
+
+
+if __name__ == '__main__':
+    structure_system = StructureSystem()
+    structure_system._initialize()
+    image_path = ['./doc/table/1.png']
+    res = structure_system.predict(paths=image_path)
+    print(res)
--- a/deploy/hubserving/structure_system/params.py
+++ b/deploy/hubserving/structure_system/params.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from deploy.hubserving.structure_table.params import read_params as table_read_params
+
+
+def read_params():
+    cfg = table_read_params()
+
+    # params for layout parser model
+    cfg.layout_path_model = 'lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config'
+    cfg.layout_label_map = None
+
+    cfg.mode = 'structure'
+    cfg.output = './output'
+    return cfg
--- a/deploy/hubserving/structure_table/__init__.py
+++ b/deploy/hubserving/structure_table/__init__.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
\ No newline at end of file
--- a/deploy/hubserving/structure_table/config.json
+++ b/deploy/hubserving/structure_table/config.json
+{
+    "modules_info": {
+        "structure_table": {
+            "init_args": {
+                "version": "1.0.0",
+                "use_gpu": true
+            },
+            "predict_args": {
+            }
+        }
+    },
+    "port": 8869,
+    "use_multiprocess": false,
+    "workers": 2
+}
+
--- a/deploy/hubserving/structure_table/module.py
+++ b/deploy/hubserving/structure_table/module.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+sys.path.insert(0, ".")
+import copy
+
+import time
+import paddlehub
+from paddlehub.common.logger import logger
+from paddlehub.module.module import moduleinfo, runnable, serving
+import cv2
+import numpy as np
+import paddlehub as hub
+
+from tools.infer.utility import base64_to_cv2
+from ppstructure.table.predict_table import TableSystem as _TableSystem
+from ppstructure.predict_system import save_structure_res
+from ppstructure.utility import parse_args
+from deploy.hubserving.structure_table.params import read_params
+
+
+@moduleinfo(
+    name="structure_table",
+    version="1.0.0",
+    summary="PP-Structure table service",
+    author="paddle-dev",
+    author_email="paddle-dev@baidu.com",
+    type="cv/structure_table")
+class TableSystem(hub.Module):
+    def _initialize(self, use_gpu=False, enable_mkldnn=False):
+        """
+        initialize with the necessary elements
+        """
+        cfg = self.merge_configs()
+        cfg.use_gpu = use_gpu
+        if use_gpu:
+            try:
+                _places = os.environ["CUDA_VISIBLE_DEVICES"]
+                int(_places[0])
+                print("use gpu: ", use_gpu)
+                print("CUDA_VISIBLE_DEVICES: ", _places)
+                cfg.gpu_mem = 8000
+            except:
+                raise RuntimeError(
+                    "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id."
+                )
+        cfg.ir_optim = True
+        cfg.enable_mkldnn = enable_mkldnn
+
+        self.table_sys = _TableSystem(cfg)
+
+    def merge_configs(self):
+        # deafult cfg
+        backup_argv = copy.deepcopy(sys.argv)
+        sys.argv = sys.argv[:1]
+        cfg = parse_args()
+
+        update_cfg_map = vars(read_params())
+
+        for key in update_cfg_map:
+            cfg.__setattr__(key, update_cfg_map[key])
+
+        sys.argv = copy.deepcopy(backup_argv)
+        return cfg
+
+    def read_images(self, paths=[]):
+        images = []
+        for img_path in paths:
+            assert os.path.isfile(
+                img_path), "The {} isn't a valid file.".format(img_path)
+            img = cv2.imread(img_path)
+            if img is None:
+                logger.info("error in loading image:{}".format(img_path))
+                continue
+            images.append(img)
+        return images
+
+    def predict(self, images=[], paths=[]):
+        """
+        Get the chinese texts in the predicted images.
+        Args:
+            images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths
+            paths (list[str]): The paths of images. If paths not images
+        Returns:
+            res (list): The result of chinese texts and save path of images.
+        """
+
+        if images != [] and isinstance(images, list) and paths == []:
+            predicted_data = images
+        elif images == [] and isinstance(paths, list) and paths != []:
+            predicted_data = self.read_images(paths)
+        else:
+            raise TypeError("The input data is inconsistent with expectations.")
+
+        assert predicted_data != [], "There is not any image to be predicted. Please check the input data."
+
+        all_results = []
+        for img in predicted_data:
+            if img is None:
+                logger.info("error in loading image")
+                all_results.append([])
+                continue
+            starttime = time.time()
+            pred_html = self.table_sys(img)
+            elapse = time.time() - starttime
+            logger.info("Predict time: {}".format(elapse))
+
+            all_results.append({'html': pred_html})
+        return all_results
+
+    @serving
+    def serving_method(self, images, **kwargs):
+        """
+        Run as a service.
+        """
+        images_decode = [base64_to_cv2(image) for image in images]
+        results = self.predict(images_decode, **kwargs)
+        return results
+
+
+if __name__ == '__main__':
+    table_system = TableSystem()
+    table_system._initialize()
+    image_path = ['./doc/table/table.jpg']
+    res = table_system.predict(paths=image_path)
+    print(res)
--- a/deploy/hubserving/structure_table/params.py
+++ b/deploy/hubserving/structure_table/params.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from deploy.hubserving.ocr_system.params import read_params as pp_ocr_read_params
+
+
+def read_params():
+    cfg = pp_ocr_read_params()
+
+    # params for table structure model
+    cfg.table_max_len = 488
+    cfg.table_model_dir = './inference/en_ppocr_mobile_v2.0_table_structure_infer/'
+    cfg.table_char_type = 'en'
+    cfg.table_char_dict_path = './ppocr/utils/dict/table_structure_dict.txt'
+    cfg.show_log = False
+    return cfg
--- a/deploy/ios_demo/README.md
+++ b/deploy/ios_demo/README.md
+# ios Demo
+
+参考 https://github.com/PaddlePaddle/Paddle-Lite-Demo/blob/develop/ocr/ios/ppocr_demo/ppocr_demo/README.md
--- a/deploy/slim/quantization/quant.py
+++ b/deploy/slim/quantization/quant.py
@@ -118,6 +118,11 @@ def main(config, device, logger, vdl_writer):
            config['Architecture']["Head"]['out_channels'] = char_num
    model = build_model(config['Architecture'])

+    pre_best_model_dict = dict()
+    # load fp32 model to begin quantization
+    if config["Global"]["pretrained_model"] is not None:
+        pre_best_model_dict = load_model(config, model)
+
    quanter = QAT(config=quant_config, act_preprocess=PACT)
    quanter.quantize(model)

@@ -134,10 +139,12 @@ def main(config, device, logger, vdl_writer):
        step_each_epoch=len(train_dataloader),
        parameters=model.parameters())

+    # resume PACT training process
+    if config["Global"]["checkpoints"] is not None:
+        pre_best_model_dict = load_model(config, model, optimizer)
+
    # build metric
    eval_class = build_metric(config['Metric'])
-    # load pretrain model
-    pre_best_model_dict = load_model(config, model, optimizer)

    logger.info('train dataloader has {} iters, valid dataloader has {} iters'.
                format(len(train_dataloader), len(valid_dataloader)))

--- a/doc/doc_ch/finetune.md
+++ b/doc/doc_ch/finetune.md
+# 模型微调
+
+## 1. 模型微调背景与意义
+
+PaddleOCR提供的PP-OCR系列模型在通用场景中性能优异，能够解决绝大多数情况下的检测与识别问题。在垂类场景中，如果希望获取更优的模型效果，可以通过模型微调的方法，进一步提升PP-OCR系列检测与识别模型的精度。
+
+本文主要介绍文本检测与识别模型在模型微调时的一些注意事项，最终希望您在自己的场景中，通过模型微调，可以获取精度更高的文本检测与识别模型。
+
+本文核心要点如下所示。
+
+1. PP-OCR提供的预训练模型有较好的泛化能力
+2. 加入少量真实数据（检测任务>=500张, 识别任务>=5000张），会大幅提升垂类场景的检测与识别效果
+3. 在模型微调时，加入真实通用场景数据，可以进一步提升模型精度与泛化性能
+4. 在图像检测任务中，增大图像的预测尺度，能够进一步提升较小文字区域的检测效果
+5. 在模型微调时，需要适当调整超参数（学习率，batch size最为重要），以获得更优的微调效果。
+
+更多详细内容，请参考第2章与第3章。
+
+## 2. 文本检测模型微调
+
+### 2.1 数据选择
+
+* 数据量：建议至少准备500张的文本检测数据集用于模型微调。
+
+* 数据标注：单行文本标注格式，建议标注的检测框与实际语义内容一致。如在火车票场景中，姓氏与名字可能离得较远，但是它们在语义上属于同一个检测字段，这里也需要将整个姓名标注为1个检测框。
+
+### 2.2 模型选择
+
+建议选择PP-OCRv2模型（配置文件：[ch_PP-OCRv2_det_student.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_student.yml)，预训练模型：[ch_PP-OCRv2_det_distill_train.tar](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar)）进行微调，其精度与泛化性能是目前提供的最优预训练模型。
+
+更多PP-OCR系列模型，请参考[PaddleOCR 首页说明文档](../../README_ch.md)。
+
+注意：在使用上述预训练模型的时候，由于保存的模型中包含教师模型，因此需要将其中的学生模型单独提取出来，再加载学生模型即可进行模型微调。
+
+```python
+import paddle
+# 加载完整的检测预训练模型
+a = paddle.load("ch_PP-OCRv2_det_distill_train/best_accuracy.pdparams")
+# 提取学生模型的参数
+b = {k[len("student_model."):]: a[k] for k in a if "student_model." in k}
+# 保存模型，用于后续模型微调
+paddle.save(b, "ch_PP-OCRv2_det_student.pdparams")
+```
+
+
+### 2.3 训练超参选择
+
+在模型微调的时候，最重要的超参就是预训练模型路径`pretrained_model`, 学习率`learning_rate`与`batch_size`，部分配置文件如下所示。
+
+```yaml
+Global:
+  pretrained_model: ./pretrain_models/student.pdparams # 预训练模型路径
+Optimizer:
+  lr:
+    name: Cosine
+    learning_rate: 0.001 # 学习率
+    warmup_epoch: 2
+  regularizer:
+    name: 'L2'
+    factor: 0
+
+Train:
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 8  # 单卡batch size
+    num_workers: 4
+```
+
+上述配置文件中，首先需要将`pretrained_model`字段指定为2.2章节中提取出来的`ch_PP-OCRv2_det_student.pdparams`文件路径。
+
+PaddleOCR提供的配置文件是在8卡训练（相当于总的batch size是`8*8=64`）、且没有加载预训练模型情况下的配置文件，因此您的场景中，学习率与总的batch size需要对应线性调整，例如
+
+* 如果您的场景中是单卡训练，单卡batch_size=8，则总的batch_size=8，建议将学习率调整为`1e-4`左右。
+* 如果您的场景中是单卡训练，由于显存限制，只能设置单卡batch_size=4，则总的batch_size=4，建议将学习率调整为`5e-5`左右。
+
+### 2.4 预测超参选择
+
+对训练好的模型导出并进行推理时，可以通过进一步调整预测的图像尺度，来提升小面积文本的检测效果，下面是DBNet推理时的一些超参数，可以通过适当调整，提升效果。
+
+| 参数名称 | 类型 | 默认值 | 含义 |
+| :--: | :--: | :--: | :--: |
+|  det_db_thresh | float | 0.3 | DB输出的概率图中，得分大于该阈值的像素点才会被认为是文字像素点 |
+|  det_db_box_thresh | float | 0.6 | 检测结果边框内，所有像素点的平均得分大于该阈值时，该结果会被认为是文字区域 |
+|  det_db_unclip_ratio | float | 1.5 | `Vatti clipping`算法的扩张系数，使用该方法对文字区域进行扩张 |
+|  max_batch_size | int | 10 | 预测的batch size |
+|  use_dilation | bool | False | 是否对分割结果进行膨胀以获取更优检测效果 |
+|  det_db_score_mode | str | "fast" | DB的检测结果得分计算方法，支持`fast`和`slow`，`fast`是根据polygon的外接矩形边框内的所有像素计算平均得分，`slow`是根据原始polygon内的所有像素计算平均得分，计算速度相对较慢一些，但是更加准确一些。 |
+
+
+更多关于推理方法的介绍可以参考[Paddle Inference推理教程](./inference.md)。
+
+
+## 3. 文本识别模型微调
+
+
+### 3.1 数据选择
+
+* 数据量：不更换字典的情况下，建议至少准备5000张的文本识别数据集用于模型微调；如果更换了字典（不建议），需要的数量更多。
+
+* 数据分布：建议分布与实测场景尽量一致。如果实测场景包含大量短文本，则训练数据中建议也包含较多短文本，如果实测场景对于空格识别效果要求较高，则训练数据中建议也包含较多带空格的文本内容。
+
+
+* 通用中英文数据：在训练的时候，可以在训练集中添加通用真实数据（如在不更换字典的微调场景中，建议添加LSVT、RCTW、MTWI等真实数据），进一步提升模型的泛化性能。
+
+### 3.2 模型选择
+
+建议选择PP-OCRv2模型（配置文件：[ch_PP-OCRv2_rec_distillation.yml](../../configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml)，预训练模型：[ch_PP-OCRv2_rec_train.tar](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar)）进行微调，其精度与泛化性能是目前提供的最优预训练模型。
+
+更多PP-OCR系列，模型请参考[PaddleOCR 首页说明文档](../../README_ch.md)。
+
+
+### 3.3 训练超参选择
+
+与文本检测任务微调相同，在识别模型微调的时候，最重要的超参就是预训练模型路径`pretrained_model`, 学习率`learning_rate`与`batch_size`，部分默认配置文件如下所示。
+
+```yaml
+Global:
+  pretrained_model:  # 预训练模型路径
+Optimizer:
+  lr:
+    name: Piecewise
+    decay_epochs : [700, 800]
+    values : [0.001, 0.0001]  # 学习率
+    warmup_epoch: 5
+  regularizer:
+    name: 'L2'
+    factor: 0
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_list:
+    - ./train_data/train_list.txt
+    ratio_list: [1.0] # 采样比例，默认值是[1.0]
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 128 # 单卡batch size
+    num_workers: 8
+
+```
+
+
+上述配置文件中，首先需要将`pretrained_model`字段指定为2.2章节中解压得到的`ch_PP-OCRv2_rec_train/best_accuracy.pdparams`文件路径。
+
+PaddleOCR提供的配置文件是在8卡训练（相当于总的batch size是`8*128=1024`）、且没有加载预训练模型情况下的配置文件，因此您的场景中，学习率与总的batch size需要对应线性调整，例如：
+
+* 如果您的场景中是单卡训练，单卡batch_size=128，则总的batch_size=128，在加载预训练模型的情况下，建议将学习率调整为`[1e-4, 2e-5]`左右（piecewise学习率策略，需设置2个值，下同）。
+* 如果您的场景中是单卡训练，因为显存限制，只能设置单卡batch_size=64，则总的batch_size=64，在加载预训练模型的情况下，建议将学习率调整为`[5e-5, 1e-5]`左右。
+
+
+如果有通用真实场景数据加进来，建议每个epoch中，垂类场景数据与真实场景的数据量保持在1:1左右。
+
+比如：您自己的垂类场景识别数据量为1W，数据标签文件为`vertical.txt`，收集到的通用场景识别数据量为10W，数据标签文件为`general.txt`，
+
+
+那么，可以设置`label_file_list`和`ratio_list`参数如下所示。每个epoch中，`vertical.txt`中会进行全采样（采样比例为1.0），包含1W条数据；`general.txt`中会按照0.1的采样比例进行采样，包含`10W*0.1=1W`条数据，最终二者的比例为`1:1`。
+
+```yaml
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_list:
+    - vertical.txt
+    - general.txt
+    ratio_list: [1.0, 0.1]
+```
--- a/doc/doc_ch/inference.md
+++ b/doc/doc_ch/inference.md
@@ -36,6 +36,8 @@ inference 模型（`paddle.jit.save`保存的模型）

 - [六、参数解释](#参数解释)

+- [七、FAQ](#FAQ)
+

 <a name="训练模型转inference模型"></a>
 ## 一、训练模型转inference模型
@@ -520,3 +522,9 @@ PSE算法相关参数如下
 |  label_list | list | ['0', '180'] | class id对应的角度值 |
 |  cls_batch_num | int | 6 | 方向分类器预测的batch size |
 |  cls_thresh | float | 0.9 | 预测阈值，模型预测结果为180度，且得分大于该阈值时，认为最终预测结果为180度，需要翻转 |
+
+
+
+# 七、FAQ
+
+* 如果是使用paddle2.0之前版本的代码导出的`inference模型`，则其文件名为`model`与`params`，分别对应paddle2.0或者之后版本导出的`inference.pdmodel`与`inference.pdiparams`；不过目前PaddleOCR的release分支已经不支持paddle2.0之前版本导出的inference 模型，如果希望使用，需要使用develop分支（静态图分支）的代码与文档。
--- a/doc/doc_ch/recognition.md
+++ b/doc/doc_ch/recognition.md
@@ -75,9 +75,9 @@ train_data/rec/train/word_002.jpg   用科技让复杂的世界更简单
 上述示例标注文件中，"11.jpg"和"12.jpg"的标签相同，都是`简单可依赖`，在训练的时候，对于该行标注，会随机选择其中的一张图片进行训练。


- 测试集
+- 验证集

-同训练集类似，测试集也需要提供一个包含所有图片的文件夹（test）和一个rec_gt_test.txt，测试集的结构如下所示：
+同训练集类似，验证集也需要提供一个包含所有图片的文件夹（test）和一个rec_gt_test.txt，验证集的结构如下所示：

 ```
 |-train_data
@@ -247,7 +247,10 @@ PaddleOCR支持训练和评估交替进行, 可以在 `configs/rec/rec_icdar15_t
 | rec_r31_sar.yml               | SAR | ResNet31 | None | LSTM encoder | LSTM decoder |
 | rec_resnet_stn_bilstm_att.yml | SEED | Aster_Resnet | STN | BiLSTM | att |

-*其中SEED模型需要额外加载FastText训练好的[语言模型](https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz)
+*其中SEED模型需要额外加载FastText训练好的[语言模型](https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz) ,并且安装 fasttext 依赖：
+```
+python3.7 -m pip install fasttext==0.9.1
+```

 训练中文数据，推荐使用[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)，如您希望尝试其他算法在中文数据集上的效果，请参考下列说明修改配置文件：


--- a/doc/doc_ch/whl.md
+++ b/doc/doc_ch/whl.md
@@ -418,7 +418,7 @@ im_show.save('result.jpg')
 | det                     | 前向时使用启动检测                                                                                                                                                                                                   | TRUE                    |
 | rec                     | 前向时是否启动识别                                                                                                                                                                                                   | TRUE                    |
 | cls                     | 前向时是否启动分类 (命令行模式下使用use_angle_cls控制前向是否启动分类)                                                                                                                                                                                                | FALSE                    |
-| show_log                     | 是否打印det和rec等信息                                                                                                                                                                                                | FALSE                    |
+| show_log                     | 是否打印logger信息                                                                                                                                               | FALSE                    |
 | type                     | 执行ocr或者表格结构化, 值可选['ocr','structure']                                                                                                                                                                                             | ocr                    |
 | ocr_version                     | OCR模型版本，可选PP-OCRv2, PP-OCR。PP-OCRv2 目前仅支持中文的检测和识别模型，PP-OCR支持中文的检测，识别，多语种识别，方向分类器等模型                                                                                                                                        | PP-OCRv2                   |
 | structure_version                     | 表格结构化模型版本，可选 STRUCTURE。STRUCTURE支持表格结构化模型                                                                                                                                                                                        | STRUCTURE                    |
--- a/doc/doc_en/whl_en.md
+++ b/doc/doc_en/whl_en.md
@@ -365,7 +365,7 @@ im_show.save('result.jpg')
 | det                     | Enable detction when `ppocr.ocr` func exec                                                                                                                                                                                                   | TRUE                    |
 | rec                     | Enable recognition when `ppocr.ocr` func exec                                                                                                                                                                                                   | TRUE                    |
 | cls                     | Enable classification when `ppocr.ocr` func exec((Use use_angle_cls in command line mode to control whether to start classification in the forward direction)                                                                                                                                                                                                   | FALSE                    |
-| show_log                     | Whether to print log in det and rec | FALSE                    |
+| show_log                     | Whether to print log| FALSE                    |
 | type                     | Perform ocr or table structuring, the value is selected in ['ocr','structure']                                                                                                                                                                                             | ocr                    |
 | ocr_version                     | OCR Model version number, the current model support list is as follows: PP-OCRv2 support Chinese detection and recognition model, PP-OCR support Chinese detection, recognition and direction classifier, multilingual recognition model | PP-OCRv2                 |
 | structure_version                     | table structure Model version number, the current model support list is as follows: STRUCTURE support english table structure model | STRUCTURE                 |
--- a/paddleocr.py
+++ b/paddleocr.py
@@ -14,6 +14,7 @@

 import os
 import sys
+import importlib

 __dir__ = os.path.dirname(__file__)

@@ -26,6 +27,10 @@ import logging
 import numpy as np
 from pathlib import Path

+tools = importlib.import_module('.', 'tools')
+ppocr = importlib.import_module('.', 'ppocr')
+ppstructure = importlib.import_module('.', 'ppstructure')
+
 from tools.infer import predict_system
 from ppocr.utils.logging import get_logger

@@ -34,7 +39,7 @@ from ppocr.utils.utility import check_and_read_gif, get_image_file_list
 from ppocr.utils.network import maybe_download, download_with_progressbar, is_link, confirm_model_dir_url
 from tools.infer.utility import draw_ocr, str2bool, check_gpu
 from ppstructure.utility import init_args, draw_structure_result
-from ppstructure.predict_system import OCRSystem, save_structure_res
+from ppstructure.predict_system import StructureSystem, save_structure_res

 __all__ = [
    'PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result',
@@ -42,7 +47,7 @@ __all__ = [
 ]

 SUPPORT_DET_MODEL = ['DB']
-VERSION = '2.4'
+VERSION = '2.4.0.4'
 SUPPORT_REC_MODEL = ['CRNN']
 BASE_DIR = os.path.expanduser("~/.paddleocr/")

@@ -308,20 +313,18 @@ class PaddleOCR(predict_system.TextSystem):
                                            det_lang)
        params.det_model_dir, det_url = confirm_model_dir_url(
            params.det_model_dir,
-            os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang),
+            os.path.join(BASE_DIR, 'whl', 'det', det_lang),
            det_model_config['url'])
        rec_model_config = get_model_config('OCR', params.ocr_version, 'rec',
                                            lang)
        params.rec_model_dir, rec_url = confirm_model_dir_url(
            params.rec_model_dir,
-            os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang),
-            rec_model_config['url'])
+            os.path.join(BASE_DIR, 'whl', 'rec', lang), rec_model_config['url'])
        cls_model_config = get_model_config('OCR', params.ocr_version, 'cls',
                                            'ch')
        params.cls_model_dir, cls_url = confirm_model_dir_url(
            params.cls_model_dir,
-            os.path.join(BASE_DIR, VERSION, 'ocr', 'cls'),
-            cls_model_config['url'])
+            os.path.join(BASE_DIR, 'whl', 'cls'), cls_model_config['url'])
        # download model
        maybe_download(params.det_model_dir, det_url)
        maybe_download(params.rec_model_dir, rec_url)
@@ -338,7 +341,7 @@ class PaddleOCR(predict_system.TextSystem):
            params.rec_char_dict_path = str(
                Path(__file__).parent / rec_model_config['dict_path'])

-        print(params)
+        logger.debug(params)
        # init det_model and rec_model
        super().__init__(params)

@@ -395,7 +398,7 @@ class PaddleOCR(predict_system.TextSystem):
            return rec_res


-class PPStructure(OCRSystem):
+class PPStructure(StructureSystem):
    def __init__(self, **kwargs):
        params = parse_args(mMain=False)
        params.__dict__.update(**kwargs)
@@ -412,20 +415,18 @@ class PPStructure(OCRSystem):
                                            det_lang)
        params.det_model_dir, det_url = confirm_model_dir_url(
            params.det_model_dir,
-            os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang),
+            os.path.join(BASE_DIR, 'whl', 'det', det_lang),
            det_model_config['url'])
        rec_model_config = get_model_config('OCR', params.ocr_version, 'rec',
                                            lang)
        params.rec_model_dir, rec_url = confirm_model_dir_url(
            params.rec_model_dir,
-            os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang),
-            rec_model_config['url'])
+            os.path.join(BASE_DIR, 'whl', 'rec', lang), rec_model_config['url'])
        table_model_config = get_model_config(
            'STRUCTURE', params.structure_version, 'table', 'en')
        params.table_model_dir, table_url = confirm_model_dir_url(
            params.table_model_dir,
-            os.path.join(BASE_DIR, VERSION, 'ocr', 'table'),
-            table_model_config['url'])
+            os.path.join(BASE_DIR, 'whl', 'table'), table_model_config['url'])
        # download model
        maybe_download(params.det_model_dir, det_url)
        maybe_download(params.rec_model_dir, rec_url)
@@ -438,7 +439,7 @@ class PPStructure(OCRSystem):
            params.table_char_dict_path = str(
                Path(__file__).parent / table_model_config['dict_path'])

-        print(params)
+        logger.debug(params)
        super().__init__(params)

    def __call__(self, img):

--- a/ppocr/losses/det_pse_loss.py
+++ b/ppocr/losses/det_pse_loss.py
@@ -121,9 +121,9 @@ class PSELoss(nn.Layer):

        if neg_num == 0:
            selected_mask = training_mask
-            selected_mask = selected_mask.view(
-                1, selected_mask.shape[0],
-                selected_mask.shape[1]).astype('float32')
+            selected_mask = selected_mask.reshape(
+                [1, selected_mask.shape[0], selected_mask.shape[1]]).astype(
+                    'float32')
            return selected_mask

        neg_score = paddle.masked_select(score, gt_text <= 0.5)

--- a/ppocr/losses/kie_sdmgr_loss.py
+++ b/ppocr/losses/kie_sdmgr_loss.py
-# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference from : https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/kie/losses/sdmgr_loss.py
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppocr/metrics/kie_metric.py
+++ b/ppocr/metrics/kie_metric.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# The code is refer from: https://github.com/open-mmlab/mmocr/blob/main/mmocr/core/evaluation/kie_metric.py

 from __future__ import absolute_import
 from __future__ import division

--- a/ppocr/modeling/heads/kie_sdmgr_head.py
+++ b/ppocr/modeling/heads/kie_sdmgr_head.py
-# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# reference from : https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/kie/heads/sdmgr_head.py

 from __future__ import absolute_import
 from __future__ import division

--- a/ppocr/modeling/heads/rec_sar_head.py
+++ b/ppocr/modeling/heads/rec_sar_head.py
@@ -216,7 +216,7 @@ class ParallelSARDecoder(BaseDecoder):
        self.pred_dropout = nn.Dropout(pred_dropout)
        pred_num_classes = self.num_classes - 1
        if pred_concat:
-            fc_in_channel = decoder_rnn_out_size + d_model + d_enc
+            fc_in_channel = decoder_rnn_out_size + d_model + encoder_rnn_out_size
        else:
            fc_in_channel = d_model
        self.prediction = nn.Linear(fc_in_channel, pred_num_classes)

--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -54,22 +54,24 @@ class BaseRecLabelDecode(object):
        ignored_tokens = self.get_ignored_tokens()
        batch_size = len(text_index)
        for batch_idx in range(batch_size):
-            char_list = []
-            conf_list = []
-            for idx in range(len(text_index[batch_idx])):
-                if text_index[batch_idx][idx] in ignored_tokens:
-                    continue
-                if is_remove_duplicate:
-                    # only for predict
-                    if idx > 0 and text_index[batch_idx][idx - 1] == text_index[
-                            batch_idx][idx]:
-                        continue
-                char_list.append(self.character[int(text_index[batch_idx][
-                    idx])])
-                if text_prob is not None:
-                    conf_list.append(text_prob[batch_idx][idx])
-                else:
-                    conf_list.append(1)
+            selection = np.ones(len(text_index[batch_idx]), dtype=bool)
+            if is_remove_duplicate:
+                selection[1:] = text_index[batch_idx][1:] != text_index[
+                    batch_idx][:-1]
+            for ignored_token in ignored_tokens:
+                selection &= text_index[batch_idx] != ignored_token
+
+            char_list = [
+                self.character[text_id]
+                for text_id in text_index[batch_idx][selection]
+            ]
+            if text_prob is not None:
+                conf_list = text_prob[batch_idx][selection]
+            else:
+                conf_list = [1] * len(selection)
+            if len(conf_list) == 0:
+                conf_list = [0]
+
            text = ''.join(char_list)
            result_list.append((text, np.mean(conf_list)))
        return result_list

--- a/ppocr/utils/logging.py
+++ b/ppocr/utils/logging.py
@@ -26,7 +26,7 @@ logger_initialized = {}


 @functools.lru_cache()
-def get_logger(name='root', log_file=None, log_level=logging.DEBUG):
+def get_logger(name='ppocr', log_file=None, log_level=logging.DEBUG):
    """Initialize and get a logger by name.
    If the logger has not been initialized, this method will initialize the
    logger by adding one or two handlers, otherwise the initialized logger will
@@ -67,4 +67,5 @@ def get_logger(name='root', log_file=None, log_level=logging.DEBUG):
    else:
        logger.setLevel(logging.ERROR)
    logger_initialized[name] = True
+    logger.propagate = False
    return logger
--- a/ppstructure/predict_system.py
+++ b/ppstructure/predict_system.py
@@ -22,6 +22,7 @@ sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))

 os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
 import cv2
+import json
 import numpy as np
 import time
 import logging
@@ -35,7 +36,7 @@ from ppstructure.utility import parse_args, draw_structure_result
 logger = get_logger()


-class OCRSystem(object):
+class StructureSystem(object):
    def __init__(self, args):
        self.mode = args.mode
        if self.mode == 'structure':
@@ -66,8 +67,7 @@ class OCRSystem(object):
            self.use_angle_cls = args.use_angle_cls
            self.drop_score = args.drop_score
        elif self.mode == 'vqa':
-            from ppstructure.vqa.infer_ser_e2e import SerPredictor, draw_ser_results
-            self.vqa_engine = SerPredictor(args)
+            raise NotImplementedError

    def __call__(self, img):
        if self.mode == 'structure':
@@ -82,24 +82,24 @@ class OCRSystem(object):
                    res = self.table_system(roi_img)
                else:
                    filter_boxes, filter_rec_res = self.text_system(roi_img)
-                    filter_boxes = [x + [x1, y1] for x in filter_boxes]
-                    filter_boxes = [
-                        x.reshape(-1).tolist() for x in filter_boxes
-                    ]
                    # remove style char
                    style_token = [
                        '<strike>', '<strike>', '<sup>', '</sub>', '<b>',
                        '</b>', '<sub>', '</sup>', '<overline>', '</overline>',
                        '<underline>', '</underline>', '<i>', '</i>'
                    ]
-                    filter_rec_res_tmp = []
-                    for rec_res in filter_rec_res:
+                    res = []
+                    for box, rec_res in zip(filter_boxes, filter_rec_res):
                        rec_str, rec_conf = rec_res
                        for token in style_token:
                            if token in rec_str:
                                rec_str = rec_str.replace(token, '')
-                        filter_rec_res_tmp.append((rec_str, rec_conf))
-                    res = (filter_boxes, filter_rec_res_tmp)
+                        box += [x1, y1]
+                        res.append({
+                            'text': rec_str,
+                            'confidence': float(rec_conf),
+                            'text_region': box.tolist()
+                        })
                res_list.append({
                    'type': region.type,
                    'bbox': [x1, y1, x2, y2],
@@ -107,7 +107,7 @@ class OCRSystem(object):
                    'res': res
                })
        elif self.mode == 'vqa':
-            res_list, _ = self.vqa_engine(img)
+            raise NotImplementedError
        return res_list


@@ -123,15 +123,14 @@ def save_structure_res(res, save_folder, img_name):
                excel_path = os.path.join(excel_save_folder,
                                          '{}.xlsx'.format(region['bbox']))
                to_excel(region['res'], excel_path)
-            if region['type'] == 'Figure':
+            elif region['type'] == 'Figure':
                roi_img = region['img']
                img_path = os.path.join(excel_save_folder,
                                        '{}.jpg'.format(region['bbox']))
                cv2.imwrite(img_path, roi_img)
            else:
-                for box, rec_res in zip(region['res'][0], region['res'][1]):
-                    f.write('{}\t{}\n'.format(
-                        np.array(box).reshape(-1).tolist(), rec_res))
+                for text_result in region['res']:
+                    f.write('{}\n'.format(json.dumps(text_result)))


 def main(args):
@@ -139,7 +138,7 @@ def main(args):
    image_file_list = image_file_list
    image_file_list = image_file_list[args.process_id::args.total_process_num]

-    structure_sys = OCRSystem(args)
+    structure_sys = StructureSystem(args)
    img_num = len(image_file_list)
    save_folder = os.path.join(args.output, structure_sys.mode)
    os.makedirs(save_folder, exist_ok=True)
@@ -162,8 +161,9 @@ def main(args):
            draw_img = draw_structure_result(img, res, args.vis_font_path)
            img_save_path = os.path.join(save_folder, img_name, 'show.jpg')
        elif structure_sys.mode == 'vqa':
-            draw_img = draw_ser_results(img, res, args.vis_font_path)
-            img_save_path = os.path.join(save_folder, img_name + '.jpg')
+            raise NotImplementedError
+            # draw_img = draw_ser_results(img, res, args.vis_font_path)
+            # img_save_path = os.path.join(save_folder, img_name + '.jpg')
        cv2.imwrite(img_save_path, draw_img)
        logger.info('result save to {}'.format(img_save_path))
        elapse = time.time() - starttime

--- a/ppstructure/utility.py
+++ b/ppstructure/utility.py
@@ -40,12 +40,6 @@ def init_args():
        type=ast.literal_eval,
        default=None,
        help='label map according to ppstructure/layout/README_ch.md')
-    # params for ser
-    parser.add_argument("--model_name_or_path", type=str)
-    parser.add_argument("--max_seq_length", type=int, default=512)
-    parser.add_argument(
-        "--label_map_path", type=str, default='./vqa/labels/labels_ser.txt')
-
    parser.add_argument(
        "--mode",
        type=str,
@@ -67,10 +61,10 @@ def draw_structure_result(image, result, font_path):
        if region['type'] == 'Table':
            pass
        else:
-            for box, rec_res in zip(region['res'][0], region['res'][1]):
-                boxes.append(np.array(box).reshape(-1, 2))
-                txts.append(rec_res[0])
-                scores.append(rec_res[1])
+            for text_result in region['res']:
+                boxes.append(np.array(text_result['text_region']))
+                txts.append(text_result['text'])
+                scores.append(text_result['confidence'])
    im_show = draw_ocr_box_txt(
        image, boxes, txts, scores, font_path=font_path, drop_score=0)
    return im_show
--- a/ppstructure/vqa/README.md
+++ b/ppstructure/vqa/README.md
@@ -242,3 +242,7 @@ python3 tools/infer_vqa_token_ser_re.py -c configs/vqa/re/layoutxlm.yml -o Archi
 - LayoutXLM: Multimodal Pre-training for Multilingual Visually-rich Document Understanding, https://arxiv.org/pdf/2104.08836.pdf
 - microsoft/unilm/layoutxlm, https://github.com/microsoft/unilm/tree/master/layoutxlm
 - XFUND dataset, https://github.com/doc-analysis/XFUND
+
+## License
+
+The content of this project itself is licensed under the [Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/)
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,4 +12,3 @@ cython
 lxml
 premailer
 openpyxl
-fasttext==0.9.1
--- a/test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml
+++ b/test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml
@@ -56,7 +56,7 @@ PostProcess:
  thresh: 0
  box_thresh: 0.85
  min_area: 16
-  box_type: box # 'box' or 'poly'
+  box_type: quad # 'quad' or 'poly'
  scale: 1

 Metric:
@@ -132,4 +132,4 @@ Eval:
    shuffle: False
    drop_last: False
    batch_size_per_card: 1 # must be 1
-    num_workers: 8
\ No newline at end of file
+    num_workers: 8
--- a/test_tipc/configs/det_r18_vd_v2_0/train_infer_python.txt
+++ b/test_tipc/configs/det_r18_vd_v2_0/train_infer_python.txt
--- a/test_tipc/configs/det_r50_vd_pse_v2_0/det_r50_vd_pse.yml
+++ b/test_tipc/configs/det_r50_vd_pse_v2_0/det_r50_vd_pse.yml
@@ -55,7 +55,7 @@ PostProcess:
  thresh: 0
  box_thresh: 0.85
  min_area: 16
-  box_type: box # 'box' or 'poly'
+  box_type: quad # 'quad' or 'poly'
  scale: 1

 Metric:
@@ -131,4 +131,4 @@ Eval:
    shuffle: False
    drop_last: False
    batch_size_per_card: 1 # must be 1
-    num_workers: 8
\ No newline at end of file
+    num_workers: 8
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -60,6 +60,13 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
    ln -s ./icdar2015_lite ./icdar2015
    cd ../
    cd ./inference && tar xf rec_inference.tar && cd ../
+    if [ ${model_name} == "ch_PPOCRv2_det" ] || [ ${model_name} == "ch_PPOCRv2_det_PACT" ]; then
+        wget  -nc -P  ./pretrain_models/  https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar --no-check-certificate
+        cd ./pretrain_models/ && tar xf ch_ppocr_server_v2.0_det_train.tar  && cd ../
+    fi
+    if [ ${model_name} == "det_r18_db_v2_0" ]; then
+        wget -nc -P ./pretrain_models/  https://paddleocr.bj.bcebos.com/pretrained/ResNet18_vd_pretrained.pdparams  --no-check-certificate
+    fi
    if [ ${model_name} == "en_server_pgnetA" ]; then
        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/total_text_lite.tar --no-check-certificate
        wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar --no-check-certificate

--- a/test_tipc/supplementary/config.py
+++ b/test_tipc/supplementary/config.py
@@ -122,7 +122,7 @@ def preprocess(is_train=False):
        log_file = '{}/train.log'.format(save_model_dir)
    else:
        log_file = None
-    logger = get_logger(name='root', log_file=log_file)
+    logger = get_logger(log_file=log_file)

    # check if set use_gpu=True in paddlepaddle cpu version
    use_gpu = config['use_gpu']

--- a/tools/end2end/convert_ppocr_label.py
+++ b/tools/end2end/convert_ppocr_label.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import json
+import os
+
+
+def poly_to_string(poly):
+    if len(poly.shape) > 1:
+        poly = np.array(poly).flatten()
+
+    string = "\t".join(str(i) for i in poly)
+    return string
+
+
+def convert_label(label_dir, mode="gt", save_dir="./save_results/"):
+    if not os.path.exists(label_dir):
+        raise ValueError(f"The file {label_dir} does not exist!")
+
+    assert label_dir != save_dir, "hahahhaha"
+
+    label_file = open(label_dir, 'r')
+    data = label_file.readlines()
+
+    gt_dict = {}
+
+    for line in data:
+        try:
+            tmp = line.split('\t')
+            assert len(tmp) == 2, ""
+        except:
+            tmp = line.strip().split('    ')
+
+        gt_lists = []
+
+        if tmp[0].split('/')[0] is not None:
+            img_path = tmp[0]
+            anno = json.loads(tmp[1])
+            gt_collect = []
+            for dic in anno:
+                #txt = dic['transcription'].replace(' ', '')  # ignore blank
+                txt = dic['transcription']
+                if 'score' in dic and float(dic['score']) < 0.5:
+                    continue
+                if u'\u3000' in txt: txt = txt.replace(u'\u3000', u' ')
+                #while ' ' in txt:
+                #    txt = txt.replace(' ', '')
+                poly = np.array(dic['points']).flatten()
+                if txt == "###":
+                    txt_tag = 1  ## ignore 1
+                else:
+                    txt_tag = 0
+                if mode == "gt":
+                    gt_label = poly_to_string(poly) + "\t" + str(
+                        txt_tag) + "\t" + txt + "\n"
+                else:
+                    gt_label = poly_to_string(poly) + "\t" + txt + "\n"
+
+                gt_lists.append(gt_label)
+
+            gt_dict[img_path] = gt_lists
+        else:
+            continue
+
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+
+    for img_name in gt_dict.keys():
+        save_name = img_name.split("/")[-1]
+        save_file = os.path.join(save_dir, save_name + ".txt")
+        with open(save_file, "w") as f:
+            f.writelines(gt_dict[img_name])
+
+    print("The convert label saved in {}".format(save_dir))
+
+
+if __name__ == "__main__":
+
+    ppocr_label_gt = "/paddle/Datasets/chinese/test_set/Label_refine_310_V2.txt"
+    convert_label(ppocr_label_gt, "gt", "./save_gt_310_V2/")
+
+    ppocr_label_gt = "./infer_results/ch_PPOCRV2_infer.txt"
+    convert_label(ppocr_label_gt_en, "pred", "./save_PPOCRV2_infer/")
--- a/tools/end2end/draw_html.py
+++ b/tools/end2end/draw_html.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import argparse
+
+
+def str2bool(v):
+    return v.lower() in ("true", "t", "1")
+
+
+def init_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--image_dir", type=str, default="")
+    parser.add_argument("--save_html_path", type=str, default="./default.html")
+    parser.add_argument("--width", type=int, default=640)
+    return parser
+
+
+def parse_args():
+    parser = init_args()
+    return parser.parse_args()
+
+
+def draw_debug_img(args):
+
+    html_path = args.save_html_path
+
+    err_cnt = 0
+    with open(html_path, 'w') as html:
+        html.write('<html>\n<body>\n')
+        html.write('<table border="1">\n')
+        html.write(
+            "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />"
+        )
+        image_list = []
+        path = args.image_dir
+        for i, filename in enumerate(sorted(os.listdir(path))):
+            if filename.endswith("txt"): continue
+            # The image path
+            base = "{}/{}".format(path, filename)
+            html.write("<tr>\n")
+            html.write(f'<td> {filename}\n GT')
+            html.write(f'<td>GT\n<img src="{base}" width={args.width}></td>')
+
+            html.write("</tr>\n")
+        html.write('<style>\n')
+        html.write('span {\n')
+        html.write('    color: red;\n')
+        html.write('}\n')
+        html.write('</style>\n')
+        html.write('</table>\n')
+        html.write('</html>\n</body>\n')
+    print(f"The html file saved in {html_path}")
+    return
+
+
+if __name__ == "__main__":
+
+    args = parse_args()
+
+    draw_debug_img(args)
--- a/tools/end2end/eval_end2end.py
+++ b/tools/end2end/eval_end2end.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+import sys
+import shapely
+from shapely.geometry import Polygon
+import numpy as np
+from collections import defaultdict
+import operator
+import editdistance
+
+
+def strQ2B(ustring):
+    rstring = ""
+    for uchar in ustring:
+        inside_code = ord(uchar)
+        if inside_code == 12288:
+            inside_code = 32
+        elif (inside_code >= 65281 and inside_code <= 65374):
+            inside_code -= 65248
+        rstring += chr(inside_code)
+    return rstring
+
+
+def polygon_from_str(polygon_points):
+    """
+    Create a shapely polygon object from gt or dt line.
+    """
+    polygon_points = np.array(polygon_points).reshape(4, 2)
+    polygon = Polygon(polygon_points).convex_hull
+    return polygon
+
+
+def polygon_iou(poly1, poly2):
+    """
+    Intersection over union between two shapely polygons.
+    """
+    if not poly1.intersects(
+            poly2):  # this test is fast and can accelerate calculation
+        iou = 0
+    else:
+        try:
+            inter_area = poly1.intersection(poly2).area
+            union_area = poly1.area + poly2.area - inter_area
+            iou = float(inter_area) / union_area
+        except shapely.geos.TopologicalError:
+            # except Exception as e:
+            #     print(e)
+            print('shapely.geos.TopologicalError occured, iou set to 0')
+            iou = 0
+    return iou
+
+
+def ed(str1, str2):
+    return editdistance.eval(str1, str2)
+
+
+def e2e_eval(gt_dir, res_dir, ignore_blank=False):
+    print('start testing...')
+    iou_thresh = 0.5
+    val_names = os.listdir(gt_dir)
+    num_gt_chars = 0
+    gt_count = 0
+    dt_count = 0
+    hit = 0
+    ed_sum = 0
+
+    for i, val_name in enumerate(val_names):
+        with open(os.path.join(gt_dir, val_name), encoding='utf-8') as f:
+            gt_lines = [o.strip() for o in f.readlines()]
+        gts = []
+        ignore_masks = []
+        for line in gt_lines:
+            parts = line.strip().split('\t')
+            # ignore illegal data
+            if len(parts) < 9:
+                continue
+            assert (len(parts) < 11)
+            if len(parts) == 9:
+                gts.append(parts[:8] + [''])
+            else:
+                gts.append(parts[:8] + [parts[-1]])
+
+            ignore_masks.append(parts[8])
+
+        val_path = os.path.join(res_dir, val_name)
+        if not os.path.exists(val_path):
+            dt_lines = []
+        else:
+            with open(val_path, encoding='utf-8') as f:
+                dt_lines = [o.strip() for o in f.readlines()]
+        dts = []
+        for line in dt_lines:
+            # print(line)
+            parts = line.strip().split("\t")
+            assert (len(parts) < 10), "line error: {}".format(line)
+            if len(parts) == 8:
+                dts.append(parts + [''])
+            else:
+                dts.append(parts)
+
+        dt_match = [False] * len(dts)
+        gt_match = [False] * len(gts)
+        all_ious = defaultdict(tuple)
+        for index_gt, gt in enumerate(gts):
+            gt_coors = [float(gt_coor) for gt_coor in gt[0:8]]
+            gt_poly = polygon_from_str(gt_coors)
+            for index_dt, dt in enumerate(dts):
+                dt_coors = [float(dt_coor) for dt_coor in dt[0:8]]
+                dt_poly = polygon_from_str(dt_coors)
+                iou = polygon_iou(dt_poly, gt_poly)
+                if iou >= iou_thresh:
+                    all_ious[(index_gt, index_dt)] = iou
+        sorted_ious = sorted(
+            all_ious.items(), key=operator.itemgetter(1), reverse=True)
+        sorted_gt_dt_pairs = [item[0] for item in sorted_ious]
+
+        # matched gt and dt
+        for gt_dt_pair in sorted_gt_dt_pairs:
+            index_gt, index_dt = gt_dt_pair
+            if gt_match[index_gt] == False and dt_match[index_dt] == False:
+                gt_match[index_gt] = True
+                dt_match[index_dt] = True
+                if ignore_blank:
+                    gt_str = strQ2B(gts[index_gt][8]).replace(" ", "")
+                    dt_str = strQ2B(dts[index_dt][8]).replace(" ", "")
+                else:
+                    gt_str = strQ2B(gts[index_gt][8])
+                    dt_str = strQ2B(dts[index_dt][8])
+                if ignore_masks[index_gt] == '0':
+                    ed_sum += ed(gt_str, dt_str)
+                    num_gt_chars += len(gt_str)
+                    if gt_str == dt_str:
+                        hit += 1
+                    gt_count += 1
+                    dt_count += 1
+
+        # unmatched dt
+        for tindex, dt_match_flag in enumerate(dt_match):
+            if dt_match_flag == False:
+                dt_str = dts[tindex][8]
+                gt_str = ''
+                ed_sum += ed(dt_str, gt_str)
+                dt_count += 1
+
+        # unmatched gt
+        for tindex, gt_match_flag in enumerate(gt_match):
+            if gt_match_flag == False and ignore_masks[tindex] == '0':
+                dt_str = ''
+                gt_str = gts[tindex][8]
+                ed_sum += ed(gt_str, dt_str)
+                num_gt_chars += len(gt_str)
+                gt_count += 1
+
+    eps = 1e-9
+    print('hit, dt_count, gt_count', hit, dt_count, gt_count)
+    precision = hit / (dt_count + eps)
+    recall = hit / (gt_count + eps)
+    fmeasure = 2.0 * precision * recall / (precision + recall + eps)
+    avg_edit_dist_img = ed_sum / len(val_names)
+    avg_edit_dist_field = ed_sum / (gt_count + eps)
+    character_acc = 1 - ed_sum / (num_gt_chars + eps)
+
+    print('character_acc: %.2f' % (character_acc * 100) + "%")
+    print('avg_edit_dist_field: %.2f' % (avg_edit_dist_field))
+    print('avg_edit_dist_img: %.2f' % (avg_edit_dist_img))
+    print('precision: %.2f' % (precision * 100) + "%")
+    print('recall: %.2f' % (recall * 100) + "%")
+    print('fmeasure: %.2f' % (fmeasure * 100) + "%")
+
+
+if __name__ == '__main__':
+    # if len(sys.argv) != 3:
+    #     print("python3 ocr_e2e_eval.py gt_dir res_dir")
+    #     exit(-1)
+    # gt_folder = sys.argv[1]
+    # pred_folder = sys.argv[2]
+    gt_folder = sys.argv[1]
+    pred_folder = sys.argv[2]
+    e2e_eval(gt_folder, pred_folder)
--- a/tools/end2end/readme.md
+++ b/tools/end2end/readme.md
+
+# 简介
+
+`tools/end2end`目录下存放了文本检测+文本识别pipeline串联预测的指标评测代码以及可视化工具。本节介绍文本检测+文本识别的端对端指标评估方式。
+
+
+## 端对端评测步骤
+
+**步骤一：**
+
+运行`tools/infer/predict_system.py`，得到保存的结果:
+
+```
+python3 tools/infer/predict_system.py  --det_model_dir=./ch_PP-OCRv2_det_infer/ --rec_model_dir=./ch_PP-OCRv2_rec_infer/  --image_dir=./datasets/img_dir/ --draw_img_save_dir=./ch_PP-OCRv2_results/ --is_visualize=True
+```
+
+文本检测识别可视化图默认保存在`./ch_PP-OCRv2_results/`目录下，预测结果默认保存在`./ch_PP-OCRv2_results/system_results.txt`中，格式如下：
+```
+all-sum-510/00224225.jpg        [{"transcription": "超赞", "points": [[8.0, 48.0], [157.0, 44.0], [159.0, 115.0], [10.0, 119.0]], "score": "0.99396634"}, {"transcription": "中", "points": [[202.0, 152.0], [230.0, 152.0], [230.0, 163.0], [202.0, 163.0]], "score": "0.09310734"}, {"transcription": "58.0m", "points": [[196.0, 192.0], [444.0, 192.0], [444.0, 240.0], [196.0, 240.0]], "score": "0.44041982"}, {"transcription": "汽配", "points": [[55.0, 263.0], [95.0, 263.0], [95.0, 281.0], [55.0, 281.0]], "score": "0.9986651"}, {"transcription": "成总店", "points": [[120.0, 262.0], [176.0, 262.0], [176.0, 283.0], [120.0, 283.0]], "score": "0.9929402"}, {"transcription": "K", "points": [[237.0, 286.0], [311.0, 286.0], [311.0, 345.0], [237.0, 345.0]], "score": "0.6074794"}, {"transcription": "88：-8", "points": [[203.0, 405.0], [477.0, 414.0], [475.0, 459.0], [201.0, 450.0]], "score": "0.7106863"}]
+```
+
+
+**步骤二：**
+
+将步骤一保存的数据转换为端对端评测需要的数据格式：
+修改 `tools/convert_ppocr_label.py`中的代码，convert_label函数中设置输入标签路径，Mode，保存标签路径等，对预测数据的GTlabel和预测结果的label格式进行转换。
+
+```
+ppocr_label_gt =  "gt_label.txt"
+convert_label(ppocr_label_gt, "gt", "./save_gt_label/")
+
+ppocr_label_gt =  "./ch_PP-OCRv2_results/system_results.txt"
+convert_label(ppocr_label_gt_en, "pred", "./save_PPOCRV2_infer/")
+```
+
+运行`convert_ppocr_label.py`:
+```
+python3 tools/convert_ppocr_label.py
+```
+
+得到如下结果：
+```
+├── ./save_gt_label/
+├── ./save_PPOCRV2_infer/
+```
+
+**步骤三：**
+
+执行端对端评测，运行`tools/eval_end2end.py`计算端对端指标，运行方式如下：
+
+```
+python3 tools/eval_end2end.py "gt_label_dir"  "predict_label_dir"
+```
+
+比如：
+
+```
+python3 tools/eval_end2end.py ./save_gt_label/ ./save_PPOCRV2_infer/
+```
+将得到如下结果，fmeasure为主要关注的指标：
+```
+hit, dt_count, gt_count 1557 2693 3283
+character_acc: 61.77%
+avg_edit_dist_field: 3.08
+avg_edit_dist_img: 51.82
+precision: 57.82%
+recall: 47.43%
+fmeasure: 52.11%
+```
--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -150,29 +150,15 @@ class TextDetector(object):
                logger=logger)

    def order_points_clockwise(self, pts):
-        """
-        reference from: https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py
-        # sort the points based on their x-coordinates
-        """
-        xSorted = pts[np.argsort(pts[:, 0]), :]
-
-        # grab the left-most and right-most points from the sorted
-        # x-roodinate points
-        leftMost = xSorted[:2, :]
-        rightMost = xSorted[2:, :]
-
-        # now, sort the left-most coordinates according to their
-        # y-coordinates so we can grab the top-left and bottom-left
-        # points, respectively
-        leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
-        (tl, bl) = leftMost
-
-        rightMost = rightMost[np.argsort(rightMost[:, 1]), :]
-        (tr, br) = rightMost
-
-        rect = np.array([tl, tr, br, bl], dtype="float32")
+        rect = np.zeros((4, 2), dtype="float32")
+        s = pts.sum(axis=1)
+        rect[0] = pts[np.argmin(s)]
+        rect[2] = pts[np.argmax(s)]
+        diff = np.diff(pts, axis=1)
+        rect[1] = pts[np.argmin(diff)]
+        rect[3] = pts[np.argmax(diff)]
        return rect
-
+    
    def clip_det_res(self, points, img_height, img_width):
        for pno in range(points.shape[0]):
            points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1))

--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -622,7 +622,6 @@ def get_rotate_crop_image(img, points):

 def check_gpu(use_gpu):
    if use_gpu and not paddle.is_compiled_with_cuda():
-
        use_gpu = False
    return use_gpu


--- a/tools/infer_vqa_token_ser_re.py
+++ b/tools/infer_vqa_token_ser_re.py
@@ -151,7 +151,7 @@ def preprocess():
    ser_config = load_config(FLAGS.config_ser)
    ser_config = merge_config(ser_config, FLAGS.opt_ser)

-    logger = get_logger(name='root')
+    logger = get_logger()

    # check if set use_gpu=True in paddlepaddle cpu version
    use_gpu = config['Global']['use_gpu']

--- a/tools/program.py
+++ b/tools/program.py
@@ -525,7 +525,7 @@ def preprocess(is_train=False):
        log_file = '{}/train.log'.format(save_model_dir)
    else:
        log_file = None
-    logger = get_logger(name='root', log_file=log_file)
+    logger = get_logger(log_file=log_file)

    # check if set use_gpu=True in paddlepaddle cpu version
    use_gpu = config['Global']['use_gpu']

--- a/tools/test_hubserving.py
+++ b/tools/test_hubserving.py
@@ -25,7 +25,9 @@ import numpy as np
 import time
 from PIL import Image
 from ppocr.utils.utility import get_image_file_list
-from tools.infer.utility import draw_ocr, draw_boxes
+from tools.infer.utility import draw_ocr, draw_boxes, str2bool
+from ppstructure.utility import draw_structure_result
+from ppstructure.predict_system import to_excel

 import requests
 import json
@@ -69,8 +71,33 @@ def draw_server_result(image_file, res):
        return draw_img


-def main(url, image_path):
-    image_file_list = get_image_file_list(image_path)
+def save_structure_res(res, save_folder, image_file):
+    img = cv2.imread(image_file)
+    excel_save_folder = os.path.join(save_folder, os.path.basename(image_file))
+    os.makedirs(excel_save_folder, exist_ok=True)
+    # save res
+    with open(
+            os.path.join(excel_save_folder, 'res.txt'), 'w',
+            encoding='utf8') as f:
+        for region in res:
+            if region['type'] == 'Table':
+                excel_path = os.path.join(excel_save_folder,
+                                          '{}.xlsx'.format(region['bbox']))
+                to_excel(region['res'], excel_path)
+            elif region['type'] == 'Figure':
+                x1, y1, x2, y2 = region['bbox']
+                print(region['bbox'])
+                roi_img = img[y1:y2, x1:x2, :]
+                img_path = os.path.join(excel_save_folder,
+                                        '{}.jpg'.format(region['bbox']))
+                cv2.imwrite(img_path, roi_img)
+            else:
+                for text_result in region['res']:
+                    f.write('{}\n'.format(json.dumps(text_result)))
+
+
+def main(args):
+    image_file_list = get_image_file_list(args.image_dir)
    is_visualize = False
    headers = {"Content-type": "application/json"}
    cnt = 0
@@ -80,38 +107,51 @@ def main(url, image_path):
        if img is None:
            logger.info("error in loading image:{}".format(image_file))
            continue
-
-        # 发送HTTP请求
+        img_name = os.path.basename(image_file)
+        # seed http request
        starttime = time.time()
        data = {'images': [cv2_to_base64(img)]}
-        r = requests.post(url=url, headers=headers, data=json.dumps(data))
+        r = requests.post(
+            url=args.server_url, headers=headers, data=json.dumps(data))
        elapse = time.time() - starttime
        total_time += elapse
        logger.info("Predict time of %s: %.3fs" % (image_file, elapse))
        res = r.json()["results"][0]
        logger.info(res)

-        if is_visualize:
-            draw_img = draw_server_result(image_file, res)
+        if args.visualize:
+            draw_img = None
+            if 'structure_table' in args.server_url:
+                to_excel(res['html'], './{}.xlsx'.format(img_name))
+            elif 'structure_system' in args.server_url:
+                save_structure_res(res['regions'], args.output, image_file)
+            else:
+                draw_img = draw_server_result(image_file, res)
            if draw_img is not None:
-                draw_img_save = "./server_results/"
-                if not os.path.exists(draw_img_save):
-                    os.makedirs(draw_img_save)
+                if not os.path.exists(args.output):
+                    os.makedirs(args.output)
                cv2.imwrite(
-                    os.path.join(draw_img_save, os.path.basename(image_file)),
+                    os.path.join(args.output, os.path.basename(image_file)),
                    draw_img[:, :, ::-1])
                logger.info("The visualized image saved in {}".format(
-                    os.path.join(draw_img_save, os.path.basename(image_file))))
+                    os.path.join(args.output, os.path.basename(image_file))))
        cnt += 1
        if cnt % 100 == 0:
            logger.info("{} processed".format(cnt))
    logger.info("avg time cost: {}".format(float(total_time) / cnt))


+def parse_args():
+    import argparse
+    parser = argparse.ArgumentParser(description="args for hub serving")
+    parser.add_argument("--server_url", type=str, required=True)
+    parser.add_argument("--image_dir", type=str, required=True)
+    parser.add_argument("--visualize", type=str2bool, default=False)
+    parser.add_argument("--output", type=str, default='./hubserving_result')
+    args = parser.parse_args()
+    return args
+
+
 if __name__ == '__main__':
-    if len(sys.argv) != 3:
-        logger.info("Usage: %s server_url image_path" % sys.argv[0])
-    else:
-        server_url = sys.argv[1]
-        image_path = sys.argv[2]
-        main(server_url, image_path)
+    args = parse_args()
+    main(args)