diff --git a/MANIFEST.in b/MANIFEST.in index 1fcf184dacee9dcaf3d5b2e62d12c7b156e068c7..f821618ab5e14b755539ce64c14c1862cecc5552 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,7 +1,7 @@ include LICENSE include README.md -recursive-include ppocr/utils *.txt utility.py logging.py network.py +recursive-include ppocr/utils *.* recursive-include ppocr/data *.py recursive-include ppocr/postprocess *.py recursive-include tools/infer *.py diff --git a/__init__.py b/__init__.py index e22e466a8426c437407c491bbae47c3b66defa2e..15a9aca4da19a981b9e678e7cc93e33cf40fc81c 100644 --- a/__init__.py +++ b/__init__.py @@ -11,8 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import paddleocr from .paddleocr import * __version__ = paddleocr.VERSION -__all__ = ['PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result', 'save_structure_res','download_with_progressbar'] +__all__ = [ + 'PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result', + 'save_structure_res', 'download_with_progressbar' +] diff --git a/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml b/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml index b75ca6fdc7e7543460ba2e561af27e4d4fbf3391..3833cb0bad9915a2169b116e7406e01cadd0ef62 100644 --- a/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml +++ b/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml @@ -8,7 +8,7 @@ Global: # evaluation is run every 5000 iterations after the 4000th iteration eval_batch_step: [3000, 2000] cal_metric_during_train: False - pretrained_model: ./pretrain_models/ch_PP-OCRv2_det_distill_train/best_accuracy + pretrained_model: checkpoints: save_inference_dir: use_visualdl: False diff --git a/deploy/cpp_infer/docs/imgs/cmake_step1.png b/deploy/cpp_infer/docs/imgs/cmake_step1.png new file mode 100644 index 0000000000000000000000000000000000000000..3b9a762b843e2063b93d9e90ab254fd96a62f4a2 Binary files /dev/null and b/deploy/cpp_infer/docs/imgs/cmake_step1.png differ diff --git a/deploy/cpp_infer/docs/imgs/cmake_step2.png b/deploy/cpp_infer/docs/imgs/cmake_step2.png new file mode 100644 index 0000000000000000000000000000000000000000..27435f4a03ef1cf62c1d6cb7b7f53cc99ddd8a2f Binary files /dev/null and b/deploy/cpp_infer/docs/imgs/cmake_step2.png differ diff --git a/deploy/cpp_infer/docs/imgs/cmake_step3.png b/deploy/cpp_infer/docs/imgs/cmake_step3.png new file mode 100644 index 0000000000000000000000000000000000000000..1d02b2c675960037f91e2c2cdd0988ea48a5e2c5 Binary files /dev/null and b/deploy/cpp_infer/docs/imgs/cmake_step3.png differ diff --git a/deploy/cpp_infer/docs/imgs/cmake_step4.png b/deploy/cpp_infer/docs/imgs/cmake_step4.png new file mode 100644 index 0000000000000000000000000000000000000000..5f100f7b9fdcd7b45193ea1a2b6fb9cc86a2c9c0 Binary files /dev/null and b/deploy/cpp_infer/docs/imgs/cmake_step4.png differ diff --git a/deploy/cpp_infer/docs/imgs/result.png b/deploy/cpp_infer/docs/imgs/result.png new file mode 100644 index 0000000000000000000000000000000000000000..50610eab279396d9adce68a58261e15b18fa2e46 Binary files /dev/null and b/deploy/cpp_infer/docs/imgs/result.png differ diff --git a/deploy/cpp_infer/docs/imgs/vs_step1.png b/deploy/cpp_infer/docs/imgs/vs_step1.png new file mode 100644 index 0000000000000000000000000000000000000000..a9ee0690071d55096dfb9624a25087ec84397895 Binary files /dev/null and b/deploy/cpp_infer/docs/imgs/vs_step1.png differ diff --git a/deploy/cpp_infer/docs/vs2019_build_withgpu_config.png b/deploy/cpp_infer/docs/vs2019_build_withgpu_config.png deleted file mode 100644 index beff2884480790d97ef3577c77c0336fc04557ed..0000000000000000000000000000000000000000 Binary files a/deploy/cpp_infer/docs/vs2019_build_withgpu_config.png and /dev/null differ diff --git a/deploy/cpp_infer/docs/windows_vs2019_build.md b/deploy/cpp_infer/docs/windows_vs2019_build.md index 24a1e55cd7e5728e9cd56da8a35a72892380d28b..e84aa54542d7e06b90adde972c599bf8a1fd17e0 100644 --- a/deploy/cpp_infer/docs/windows_vs2019_build.md +++ b/deploy/cpp_infer/docs/windows_vs2019_build.md @@ -1,18 +1,35 @@ +- [Visual Studio 2019 Community CMake 编译指南](#visual-studio-2019-community-cmake-编译指南) + - [1. 环境准备](#1-环境准备) + - [1.1 安装必须环境](#11-安装必须环境) + - [1.2 下载 PaddlePaddle C++ 预测库和 Opencv](#12-下载-paddlepaddle-c-预测库和-opencv) + - [1.2.1 下载 PaddlePaddle C++ 预测库](#121-下载-paddlepaddle-c-预测库) + - [1.2.2 安装配置OpenCV](#122-安装配置opencv) + - [1.2.3 下载PaddleOCR代码](#123-下载paddleocr代码) + - [2. 开始运行](#2-开始运行) + - [Step1: 构建Visual Studio项目](#step1-构建visual-studio项目) + - [Step2: 执行cmake配置](#step2-执行cmake配置) + - [Step3: 生成Visual Studio 项目](#step3-生成visual-studio-项目) + - [Step4: 预测](#step4-预测) + - [FAQ](#faq) + # Visual Studio 2019 Community CMake 编译指南 PaddleOCR在Windows 平台下基于`Visual Studio 2019 Community` 进行了测试。微软从`Visual Studio 2017`开始即支持直接管理`CMake`跨平台编译项目,但是直到`2019`才提供了稳定和完全的支持,所以如果你想使用CMake管理项目编译构建,我们推荐你使用`Visual Studio 2019`环境下构建。 +**下面所有示例以工作目录为 `D:\projects\cpp`演示**。 + +## 1. 环境准备 +### 1.1 安装必须环境 -## 前置条件 * Visual Studio 2019 * CUDA 10.2,cudnn 7+ (仅在使用GPU版本的预测库时需要) -* CMake 3.0+ +* CMake 3.22+ 请确保系统已经安装好上述基本软件,我们使用的是`VS2019`的社区版。 -**下面所有示例以工作目录为 `D:\projects`演示**。 +### 1.2 下载 PaddlePaddle C++ 预测库和 Opencv -### Step1: 下载PaddlePaddle C++ 预测库 paddle_inference +#### 1.2.1 下载 PaddlePaddle C++ 预测库 PaddlePaddle C++ 预测库针对不同的`CPU`和`CUDA`版本提供了不同的预编译版本,请根据实际情况下载: [C++预测库下载列表](https://paddleinference.paddlepaddle.org.cn/user_guides/download_lib.html#windows) @@ -26,87 +43,94 @@ paddle_inference └── version.txt # 版本和编译信息 ``` -### Step2: 安装配置OpenCV +#### 1.2.2 安装配置OpenCV -1. 在OpenCV官网下载适用于Windows平台的3.4.6版本, [下载地址](https://sourceforge.net/projects/opencvlibrary/files/3.4.6/opencv-3.4.6-vc14_vc15.exe/download) -2. 运行下载的可执行文件,将OpenCV解压至指定目录,如`D:\projects\opencv` -3. 配置环境变量,如下流程所示 - - 我的电脑->属性->高级系统设置->环境变量 - - 在系统变量中找到Path(如没有,自行创建),并双击编辑 - - 新建,将opencv路径填入并保存,如`D:\projects\opencv\build\x64\vc14\bin` +1. 在OpenCV官网下载适用于Windows平台的Opencv, [下载地址](https://github.com/opencv/opencv/releases) +2. 运行下载的可执行文件,将OpenCV解压至指定目录,如`D:\projects\cpp\opencv` -### Step3: 使用Visual Studio 2019直接编译CMake +#### 1.2.3 下载PaddleOCR代码 +```bash +git clone -b dygraph https://github.com/PaddlePaddle/PaddleOCR +``` -1. 打开Visual Studio 2019 Community,点击`继续但无需代码` -![step2](https://paddleseg.bj.bcebos.com/inference/vs2019_step1.png) -2. 点击: `文件`->`打开`->`CMake` -![step2.1](https://paddleseg.bj.bcebos.com/inference/vs2019_step2.png) +## 2. 开始运行 -选择项目代码所在路径,并打开`CMakeList.txt`: +### Step1: 构建Visual Studio项目 +cmake安装完后后系统里会有一个cmake-gui程序,打开cmake-gui,在第一个输入框处填写源代码路径,第二个输入框处填写编译输出路径 -![step2.2](https://paddleseg.bj.bcebos.com/inference/vs2019_step3.png) +![step1](imgs/cmake_step1.png) -3. 点击:`项目`->`CMake设置` +### Step2: 执行cmake配置 +点击界面下方的`Configure`按钮,第一次点击会弹出提示框进行Visual Studio配置,如下图,选择你的Visual Studio版本即可,目标平台选择x64。然后点击`finish`按钮即开始自动执行配置。 -![step3](https://paddleseg.bj.bcebos.com/inference/vs2019_step4.png) +![step2](imgs/cmake_step2.png) -4. 分别设置编译选项指定`CUDA`、`CUDNN_LIB`、`OpenCV`、`Paddle预测库`的路径 +第一次执行会报错,这是正常现象,接下来进行Opencv和预测库的配置 -三个编译参数的含义说明如下(带`*`表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐): +* cpu版本,仅需考虑OPENCV_DIR、OpenCV_DIR、PADDLE_LIB三个参数 -| 参数名 | 含义 | -| ---- | ---- | -| *CUDA_LIB | CUDA的库路径 | -| *CUDNN_LIB | CUDNN的库路径 | -| OPENCV_DIR | OpenCV的安装路径 | -| PADDLE_LIB | Paddle预测库的路径 | + - OPENCV_DIR:填写opencv lib文件夹所在位置 + - OpenCV_DIR:同填写opencv lib文件夹所在位 + - PADDLE_LIB:paddle_inference文件夹所在位置 -**注意:** - 1. 使用`CPU`版预测库,请把`WITH_GPU`的勾去掉 - 2. 如果使用的是`openblas`版本,请把`WITH_MKL`勾去掉 +* GPU版本,在cpu版本的基础上,还需填写以下变量 +CUDA_LIB、CUDNN_LIB、TENSORRT_DIR、WITH_GPU、WITH_TENSORRT -![step4](https://paddleseg.bj.bcebos.com/inference/vs2019_step5.png) +- CUDA_LIB: CUDA地址,如 `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\lib\x64` +- CUDNN_LIB: 和CUDA_LIB一致 +- TENSORRT_DIR:TRT下载后解压缩的位置 +- WITH_GPU: 打钩 +- WITH_TENSORRT:打勾 + +配置好的截图如下 + +![step3](imgs/cmake_step3.png) + +配置完成后,再次点击`Configure`按钮。 -下面给出with GPU的配置示例: -![step5](./vs2019_build_withgpu_config.png) **注意:** - CMAKE_BACKWARDS的版本要根据平台安装cmake的版本进行设置。 + 1. 如果使用的是`openblas`版本,请把`WITH_MKL`勾去掉 + 2. 遇到报错 `unable to access 'https://github.com/LDOUBLEV/AutoLog.git/': gnutls_handshake() failed: The TLS connection was non-properly terminated.`, 将 `deploy/cpp_infer/external-cmake/auto-log.cmake` 中的github地址改为 https://gitee.com/Double_V/AutoLog 地址即可。 + +### Step3: 生成Visual Studio 项目 -**设置完成后**, 点击上图中`保存并生成CMake缓存以加载变量`。 +点击`Generate`按钮即可生成Visual Studio 项目的sln文件。 +![step4](imgs/cmake_step4.png) -5. 点击`生成`->`全部生成` +点击`Open Project`按钮即可在Visual Studio 中打开项目。打开后截图如下 -![step6](https://paddleseg.bj.bcebos.com/inference/vs2019_step6.png) +![step5](imgs/vs_step1.png) +在开始生成解决方案之前,执行下面步骤: +1. 将`Debug`改为`Release` +2. 下载[dirent.h](https://paddleocr.bj.bcebos.com/deploy/cpp_infer/cpp_files/dirent.h),并拷贝到 Visual Studio 的 include 文件夹下,如`C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\VS\include`。 + +点击`生成->生成解决方案`,即可在`build/Release/`文件夹下看见`ppocr.exe`文件。 + +运行之前,将下面文件拷贝到`build/Release/`文件夹下 +1. `paddle_inference/paddle/lib/paddle_inference.dll` +2. `opencv/build/x64/vc15/bin/opencv_world455.dll` ### Step4: 预测 -上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release\Release`目录下,打开`cmd`,并切换到`D:\projects\PaddleOCR\deploy\cpp_infer\`: +上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release\Release`目录下,打开`cmd`,并切换到`D:\projects\cpp\PaddleOCR\deploy\cpp_infer\`: ``` -cd D:\projects\PaddleOCR\deploy\cpp_infer +cd /d D:\projects\cpp\PaddleOCR\deploy\cpp_infer ``` 可执行文件`ppocr.exe`即为样例的预测程序,其主要使用方法如下,更多使用方法可以参考[说明文档](../readme.md)`运行demo`部分。 ```shell -#识别中文图片 `D:\projects\PaddleOCR\doc\imgs_words\ch\` -.\out\build\x64-Release\Release\ppocr.exe rec --rec_model_dir=D:\projects\PaddleOCR\ch_ppocr_mobile_v2.0_rec_infer --image_dir=D:\projects\PaddleOCR\doc\imgs_words\ch\ - -#识别英文图片 'D:\projects\PaddleOCR\doc\imgs_words\en\' -.\out\build\x64-Release\Release\ppocr.exe rec --rec_model_dir=D:\projects\PaddleOCR\inference\rec_mv3crnn --image_dir=D:\projects\PaddleOCR\doc\imgs_words\en\ --char_list_file=D:\projects\PaddleOCR\ppocr\utils\dict\en_dict.txt +# 切换终端编码为utf8 +CHCP 65001 +# 执行预测 +.\build\Release\ppocr.exe system --det_model_dir=D:\projects\cpp\ch_PP-OCRv2_det_slim_quant_infer --rec_model_dir=D:\projects\cpp\ch_PP-OCRv2_rec_slim_quant_infer --image_dir=D:\projects\cpp\PaddleOCR\doc\imgs\11.jpg ``` +识别结果如下 +![result](imgs/result.png) -第一个参数为配置文件路径,第二个参数为需要预测的图片路径,第三个参数为配置文本识别的字典。 - - -### FQA -* 在Windows下的终端中执行文件exe时,可能会发生乱码的现象,此时需要在终端中输入`CHCP 65001`,将终端的编码方式由GBK编码(默认)改为UTF-8编码,更加具体的解释可以参考这篇博客:[https://blog.csdn.net/qq_35038153/article/details/78430359](https://blog.csdn.net/qq_35038153/article/details/78430359)。 - -* 编译时,如果报错`错误:C1083 无法打开包括文件:"dirent.h":No such file or directory`,可以参考该[文档](https://blog.csdn.net/Dora_blank/article/details/117740837#41_C1083_direnthNo_such_file_or_directory_54),新建`dirent.h`文件,并添加到`utility.cpp`的头文件引用中。同时修改`utility.cpp`70行:`lstat`改成`stat`。 - -* 编译时,如果报错`Autolog未定义`,新建`autolog.h`文件,内容为:[autolog.h](https://github.com/LDOUBLEV/AutoLog/blob/main/auto_log/autolog.h),并添加到`main.cpp`的头文件引用中,再次编译。 -* 运行时,如果弹窗报错找不到`paddle_inference.dll`或者`openblas.dll`,在`D:\projects\paddle_inference`预测库内找到这两个文件,复制到`D:\projects\PaddleOCR\deploy\cpp_infer\out\build\x64-Release\Release`目录下。不用重新编译,再次运行即可。 +## FAQ * 运行时,弹窗报错提示`应用程序无法正常启动(0xc0000142)`,并且`cmd`窗口内提示`You are using Paddle compiled with TensorRT, but TensorRT dynamic library is not found.`,把tensort目录下的lib里面的所有dll文件复制到release目录下,再次运行即可。 diff --git a/deploy/cpp_infer/external-cmake/auto-log.cmake b/deploy/cpp_infer/external-cmake/auto-log.cmake index becbff0f45df51e5db541889ae1ffdacf2c4fc78..c998b3b14570aa77b9a307b0477f4caa7160e2a5 100644 --- a/deploy/cpp_infer/external-cmake/auto-log.cmake +++ b/deploy/cpp_infer/external-cmake/auto-log.cmake @@ -6,6 +6,7 @@ set(FETCHCONTENT_BASE_DIR "${CMAKE_CURRENT_BINARY_DIR}/third-party") FetchContent_Declare( extern_Autolog PREFIX autolog + # If you don't have access to github, replace it with https://gitee.com/Double_V/AutoLog GIT_REPOSITORY https://github.com/LDOUBLEV/AutoLog.git GIT_TAG main ) diff --git a/deploy/cpp_infer/include/ocr_det.h b/deploy/cpp_infer/include/ocr_det.h index 657ab25d8854ec54c27d71485fe9eeddc65013c3..6e4086fbaa6945b9f685e6844b7e701283de2dae 100644 --- a/deploy/cpp_infer/include/ocr_det.h +++ b/deploy/cpp_infer/include/ocr_det.h @@ -46,8 +46,7 @@ public: const double &det_db_box_thresh, const double &det_db_unclip_ratio, const bool &use_polygon_score, const bool &use_dilation, - const bool &visualize, const bool &use_tensorrt, - const std::string &precision) { + const bool &use_tensorrt, const std::string &precision) { this->use_gpu_ = use_gpu; this->gpu_id_ = gpu_id; this->gpu_mem_ = gpu_mem; @@ -62,7 +61,6 @@ public: this->use_polygon_score_ = use_polygon_score; this->use_dilation_ = use_dilation; - this->visualize_ = visualize; this->use_tensorrt_ = use_tensorrt; this->precision_ = precision; diff --git a/deploy/cpp_infer/include/ocr_rec.h b/deploy/cpp_infer/include/ocr_rec.h index ff80ba5299014885fc4c900fb87b5dcc6042744a..4052553d967fb365c3fb895c9d5b8145935fd45d 100644 --- a/deploy/cpp_infer/include/ocr_rec.h +++ b/deploy/cpp_infer/include/ocr_rec.h @@ -44,7 +44,8 @@ public: const int &gpu_id, const int &gpu_mem, const int &cpu_math_library_num_threads, const bool &use_mkldnn, const string &label_path, - const bool &use_tensorrt, const std::string &precision, + const bool &use_tensorrt, + const std::string &precision, const int &rec_batch_num) { this->use_gpu_ = use_gpu; this->gpu_id_ = gpu_id; @@ -66,7 +67,8 @@ public: // Load Paddle inference model void LoadModel(const std::string &model_dir); - void Run(std::vector img_list, std::vector *times); + void Run(std::vector img_list, std::vector &rec_texts, + std::vector &rec_text_scores, std::vector *times); private: std::shared_ptr predictor_; @@ -85,7 +87,7 @@ private: bool use_tensorrt_ = false; std::string precision_ = "fp32"; int rec_batch_num_ = 6; - + // pre-process CrnnResizeImg resize_op_; Normalize normalize_op_; diff --git a/deploy/cpp_infer/include/utility.h b/deploy/cpp_infer/include/utility.h index 5797559f7550da6bb38b014c46c1492124a9e065..f0dddacdac31e979a96648433662c76ccf972ad2 100644 --- a/deploy/cpp_infer/include/utility.h +++ b/deploy/cpp_infer/include/utility.h @@ -38,7 +38,8 @@ public: static void VisualizeBboxes(const cv::Mat &srcimg, - const std::vector>> &boxes); + const std::vector>> &boxes, + const std::string &save_path); template inline static size_t argmax(ForwardIterator first, ForwardIterator last) { @@ -47,12 +48,13 @@ public: static void GetAllFiles(const char *dir_name, std::vector &all_inputs); - + static cv::Mat GetRotateCropImage(const cv::Mat &srcimage, - std::vector> box); - - static std::vector argsort(const std::vector& array); + std::vector> box); + + static std::vector argsort(const std::vector &array); + static std::string basename(const std::string &filename); }; } // namespace PaddleOCR \ No newline at end of file diff --git a/deploy/cpp_infer/readme.md b/deploy/cpp_infer/readme.md index 725197ad5cf9c7bf54be445f2bb3698096e7f9fb..e7104881027b111de6821af8244ea2a6092fc14b 100644 --- a/deploy/cpp_infer/readme.md +++ b/deploy/cpp_infer/readme.md @@ -1,9 +1,3 @@ -# 服务器端C++预测 - -本章节介绍PaddleOCR 模型的的C++部署方法,与之对应的python预测部署方式参考[文档](../../doc/doc_ch/inference.md)。 -C++在性能计算上优于python,因此,在大多数CPU、GPU部署场景,多采用C++的部署方式,本节将介绍如何在Linux\Windows (CPU\GPU)环境下配置C++环境并完成 -PaddleOCR模型部署。 - - [服务器端C++预测](#服务器端c预测) - [1. 准备环境](#1-准备环境) - [1.0 运行准备](#10-运行准备) @@ -18,6 +12,14 @@ PaddleOCR模型部署。 - [1. 只调用检测:](#1-只调用检测) - [2. 只调用识别:](#2-只调用识别) - [3. 调用串联:](#3-调用串联) + - [3. FAQ](#3-faq) + +# 服务器端C++预测 + +本章节介绍PaddleOCR 模型的的C++部署方法,与之对应的python预测部署方式参考[文档](../../doc/doc_ch/inference.md)。 +C++在性能计算上优于python,因此,在大多数CPU、GPU部署场景,多采用C++的部署方式,本节将介绍如何在Linux\Windows (CPU\GPU)环境下配置C++环境并完成 +PaddleOCR模型部署。 + @@ -28,7 +30,7 @@ PaddleOCR模型部署。 ### 1.0 运行准备 - Linux环境,推荐使用docker。 -- Windows环境,目前支持基于`Visual Studio 2019 Community`进行编译。 +- Windows环境。 * 该文档主要介绍基于Linux环境的PaddleOCR C++预测流程,如果需要在Windows下基于预测库进行C++预测,具体编译方法请参考[Windows下编译教程](./docs/windows_vs2019_build.md) @@ -254,6 +256,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir |gpu_mem|int|4000|申请的GPU内存| |cpu_math_library_num_threads|int|10|CPU预测时的线程数,在机器核数充足的情况下,该值越大,预测速度越快| |enable_mkldnn|bool|true|是否使用mkldnn库| +|output|str|./output|可视化结果保存的路径| - 检测模型相关 @@ -265,7 +268,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir |det_db_box_thresh|float|0.5|DB后处理过滤box的阈值,如果检测存在漏框情况,可酌情减小| |det_db_unclip_ratio|float|1.6|表示文本框的紧致程度,越小则文本框更靠近文本| |use_polygon_score|bool|false|是否使用多边形框计算bbox score,false表示使用矩形框计算。矩形框计算速度更快,多边形框对弯曲文本区域计算更准确。| -|visualize|bool|true|是否对结果进行可视化,为1时,会在当前文件夹下保存文件名为`ocr_vis.png`的预测结果。| +|visualize|bool|true|是否对结果进行可视化,为1时,预测结果会保存在`output`字段指定的文件夹下和输入图像同名的图像上。| - 方向分类器相关 @@ -280,10 +283,10 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir |参数名称|类型|默认参数|意义| | :---: | :---: | :---: | :---: | |rec_model_dir|string|-|识别模型inference model地址| -|char_list_file|string|../../ppocr/utils/ppocr_keys_v1.txt|字典文件| +|rec_char_dict_path|string|../../ppocr/utils/ppocr_keys_v1.txt|字典文件| -* PaddleOCR也支持多语言的预测,更多支持的语言和模型可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分,如果希望进行多语言预测,只需将修改`char_list_file`(字典文件路径)以及`rec_model_dir`(inference模型路径)字段即可。 +* PaddleOCR也支持多语言的预测,更多支持的语言和模型可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分,如果希望进行多语言预测,只需将修改`rec_char_dict_path`(字典文件路径)以及`rec_model_dir`(inference模型路径)字段即可。 最终屏幕上会输出检测结果如下。 @@ -291,5 +294,6 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir +## 3. FAQ -**注意:在使用Paddle预测库时,推荐使用2.0.0版本的预测库。** + 1. 遇到报错 `unable to access 'https://github.com/LDOUBLEV/AutoLog.git/': gnutls_handshake() failed: The TLS connection was non-properly terminated.`, 将 `deploy/cpp_infer/external-cmake/auto-log.cmake` 中的github地址改为 https://gitee.com/Double_V/AutoLog 地址即可。 diff --git a/deploy/cpp_infer/readme_en.md b/deploy/cpp_infer/readme_en.md index f4cfab24350c1a6be3d8ebebf6b47b0baaa4f26e..61d65095394a9f5b7323bf8eb7324cd1e91b1346 100644 --- a/deploy/cpp_infer/readme_en.md +++ b/deploy/cpp_infer/readme_en.md @@ -1,3 +1,19 @@ +- [Server-side C++ Inference](#server-side-c-inference) + - [1. Prepare the Environment](#1-prepare-the-environment) + - [Environment](#environment) + - [1.1 Compile OpenCV](#11-compile-opencv) + - [1.2 Compile or Download or the Paddle Inference Library](#12-compile-or-download-or-the-paddle-inference-library) + - [1.2.1 Direct download and installation](#121-direct-download-and-installation) + - [1.2.2 Compile the inference source code](#122-compile-the-inference-source-code) + - [2. Compile and Run the Demo](#2-compile-and-run-the-demo) + - [2.1 Export the inference model](#21-export-the-inference-model) + - [2.2 Compile PaddleOCR C++ inference demo](#22-compile-paddleocr-c-inference-demo) + - [Run the demo](#run-the-demo) + - [1. run det demo:](#1-run-det-demo) + - [2. run rec demo:](#2-run-rec-demo) + - [3. run system demo:](#3-run-system-demo) + - [3. FAQ](#3-faq) + # Server-side C++ Inference This chapter introduces the C++ deployment steps of the PaddleOCR model. The corresponding Python predictive deployment method refers to [document](../../doc/doc_ch/inference.md). @@ -10,6 +26,7 @@ This section will introduce how to configure the C++ environment and deploy Padd ### Environment - Linux, docker is recommended. +- Windows. ### 1.1 Compile OpenCV @@ -232,6 +249,7 @@ More parameters are as follows, |gpu_mem|int|4000|GPU memory requested| |cpu_math_library_num_threads|int|10|Number of threads when using CPU inference. When machine cores is enough, the large the value, the faster the inference speed| |enable_mkldnn|bool|true|Whether to use mkdlnn library| +|output|str|./output|Path where visualization results are saved| - Detection related parameters @@ -243,7 +261,7 @@ More parameters are as follows, |det_db_box_thresh|float|0.5|DB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate| |det_db_unclip_ratio|float|1.6|Indicates the compactness of the text box, the smaller the value, the closer the text box to the text| |use_polygon_score|bool|false|Whether to use polygon box to calculate bbox score, false means to use rectangle box to calculate. Use rectangular box to calculate faster, and polygonal box more accurate for curved text area.| -|visualize|bool|true|Whether to visualize the results,when it is set as true, The prediction result will be save in the image file `./ocr_vis.png`.| +|visualize|bool|true|Whether to visualize the results,when it is set as true, the prediction results will be saved in the folder specified by the `output` field on an image with the same name as the input image.| - Classifier related parameters @@ -258,9 +276,9 @@ More parameters are as follows, |parameter|data type|default|meaning| | --- | --- | --- | --- | |rec_model_dir|string|-|Address of recognition inference model| -|char_list_file|string|../../ppocr/utils/ppocr_keys_v1.txt|dictionary file| +|rec_char_dict_path|string|../../ppocr/utils/ppocr_keys_v1.txt|dictionary file| -* Multi-language inference is also supported in PaddleOCR, you can refer to [recognition tutorial](../../doc/doc_en/recognition_en.md) for more supported languages and models in PaddleOCR. Specifically, if you want to infer using multi-language models, you just need to modify values of `char_list_file` and `rec_model_dir`. +* Multi-language inference is also supported in PaddleOCR, you can refer to [recognition tutorial](../../doc/doc_en/recognition_en.md) for more supported languages and models in PaddleOCR. Specifically, if you want to infer using multi-language models, you just need to modify values of `rec_char_dict_path` and `rec_model_dir`. The detection results will be shown on the screen, which is as follows. @@ -270,6 +288,6 @@ The detection results will be shown on the screen, which is as follows. -### 2.3 Notes +## 3. FAQ -* Paddle 2.0.0 inference model library is recommended for this tutorial. + 1. Encountered the error `unable to access 'https://github.com/LDOUBLEV/AutoLog.git/': gnutls_handshake() failed: The TLS connection was non-properly terminated.`, change the github address in `deploy/cpp_infer/external-cmake/auto-log.cmake` to the https://gitee.com/Double_V/AutoLog address. diff --git a/deploy/cpp_infer/src/main.cpp b/deploy/cpp_infer/src/main.cpp index 664b10b2f579fd8681c65dcf1ded5ebe53d0424c..efc1e50ce929b4f68dff3437faa05b9ac46c2aa0 100644 --- a/deploy/cpp_infer/src/main.cpp +++ b/deploy/cpp_infer/src/main.cpp @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "glog/logging.h" #include "omp.h" #include "opencv2/core.hpp" #include "opencv2/imgcodecs.hpp" @@ -21,13 +20,13 @@ #include #include #include +#include #include #include #include #include -#include #include #include #include @@ -45,7 +44,7 @@ DEFINE_bool(enable_mkldnn, false, "Whether use mkldnn with CPU."); DEFINE_bool(use_tensorrt, false, "Whether use tensorrt."); DEFINE_string(precision, "fp32", "Precision be one of fp32/fp16/int8"); DEFINE_bool(benchmark, false, "Whether use benchmark."); -DEFINE_string(save_log_path, "./log_output/", "Save benchmark log path."); +DEFINE_string(output, "./output/", "Save benchmark log path."); // detection related DEFINE_string(image_dir, "", "Dir of input image."); DEFINE_string(det_model_dir, "", "Path of det inference model."); @@ -63,7 +62,7 @@ DEFINE_double(cls_thresh, 0.9, "Threshold of cls_thresh."); // recognition related DEFINE_string(rec_model_dir, "", "Path of rec inference model."); DEFINE_int32(rec_batch_num, 6, "rec_batch_num."); -DEFINE_string(char_list_file, "../../ppocr/utils/ppocr_keys_v1.txt", +DEFINE_string(rec_char_dict_path, "../../ppocr/utils/ppocr_keys_v1.txt", "Path of dictionary."); using namespace std; @@ -86,11 +85,17 @@ int main_det(std::vector cv_all_img_names) { FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh, FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio, - FLAGS_use_polygon_score, FLAGS_use_dilation, FLAGS_visualize, + FLAGS_use_polygon_score, FLAGS_use_dilation, FLAGS_use_tensorrt, FLAGS_precision); + if (!PathExists(FLAGS_output)) { + mkdir(FLAGS_output.c_str(), 0777); + } + for (int i = 0; i < cv_all_img_names.size(); ++i) { - // LOG(INFO) << "The predict img: " << cv_all_img_names[i]; + if (!FLAGS_benchmark) { + cout << "The predict img: " << cv_all_img_names[i] << endl; + } cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); if (!srcimg.data) { @@ -102,7 +107,11 @@ int main_det(std::vector cv_all_img_names) { std::vector det_times; det.Run(srcimg, boxes, &det_times); - + // visualization + if (FLAGS_visualize) { + std::string file_name = Utility::basename(cv_all_img_names[i]); + Utility::VisualizeBboxes(srcimg, boxes, FLAGS_output + "/" + file_name); + } time_info[0] += det_times[0]; time_info[1] += det_times[1]; time_info[2] += det_times[2]; @@ -130,20 +139,18 @@ int main_det(std::vector cv_all_img_names) { int main_rec(std::vector cv_all_img_names) { std::vector time_info = {0, 0, 0}; - std::string char_list_file = FLAGS_char_list_file; + std::string rec_char_dict_path = FLAGS_rec_char_dict_path; if (FLAGS_benchmark) - char_list_file = FLAGS_char_list_file.substr(6); - cout << "label file: " << char_list_file << endl; + rec_char_dict_path = FLAGS_rec_char_dict_path.substr(6); + cout << "label file: " << rec_char_dict_path << endl; CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, - char_list_file, FLAGS_use_tensorrt, FLAGS_precision, + rec_char_dict_path, FLAGS_use_tensorrt, FLAGS_precision, FLAGS_rec_batch_num); std::vector img_list; for (int i = 0; i < cv_all_img_names.size(); ++i) { - LOG(INFO) << "The predict img: " << cv_all_img_names[i]; - cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); if (!srcimg.data) { std::cerr << "[ERROR] image read failed! image path: " @@ -152,8 +159,15 @@ int main_rec(std::vector cv_all_img_names) { } img_list.push_back(srcimg); } + std::vector rec_texts(img_list.size(), ""); + std::vector rec_text_scores(img_list.size(), 0); std::vector rec_times; - rec.Run(img_list, &rec_times); + rec.Run(img_list, rec_texts, rec_text_scores, &rec_times); + // output rec results + for (int i = 0; i < rec_texts.size(); i++) { + cout << "The predict img: " << cv_all_img_names[i] << "\t" << rec_texts[i] + << "\t" << rec_text_scores[i] << endl; + } time_info[0] += rec_times[0]; time_info[1] += rec_times[1]; time_info[2] += rec_times[2]; @@ -172,11 +186,15 @@ int main_system(std::vector cv_all_img_names) { std::vector time_info_det = {0, 0, 0}; std::vector time_info_rec = {0, 0, 0}; + if (!PathExists(FLAGS_output)) { + mkdir(FLAGS_output.c_str(), 0777); + } + DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh, FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio, - FLAGS_use_polygon_score, FLAGS_use_dilation, FLAGS_visualize, + FLAGS_use_polygon_score, FLAGS_use_dilation, FLAGS_use_tensorrt, FLAGS_precision); Classifier *cls = nullptr; @@ -186,18 +204,18 @@ int main_system(std::vector cv_all_img_names) { FLAGS_cls_thresh, FLAGS_use_tensorrt, FLAGS_precision); } - std::string char_list_file = FLAGS_char_list_file; + std::string rec_char_dict_path = FLAGS_rec_char_dict_path; if (FLAGS_benchmark) - char_list_file = FLAGS_char_list_file.substr(6); - cout << "label file: " << char_list_file << endl; + rec_char_dict_path = FLAGS_rec_char_dict_path.substr(6); + cout << "label file: " << rec_char_dict_path << endl; CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, - char_list_file, FLAGS_use_tensorrt, FLAGS_precision, + rec_char_dict_path, FLAGS_use_tensorrt, FLAGS_precision, FLAGS_rec_batch_num); for (int i = 0; i < cv_all_img_names.size(); ++i) { - LOG(INFO) << "The predict img: " << cv_all_img_names[i]; + cout << "The predict img: " << cv_all_img_names[i] << endl; cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); if (!srcimg.data) { @@ -205,15 +223,21 @@ int main_system(std::vector cv_all_img_names) { << cv_all_img_names[i] << endl; exit(1); } + // det std::vector>> boxes; std::vector det_times; std::vector rec_times; det.Run(srcimg, boxes, &det_times); + if (FLAGS_visualize) { + std::string file_name = Utility::basename(cv_all_img_names[i]); + Utility::VisualizeBboxes(srcimg, boxes, FLAGS_output + "/" + file_name); + } time_info_det[0] += det_times[0]; time_info_det[1] += det_times[1]; time_info_det[2] += det_times[2]; + // rec std::vector img_list; for (int j = 0; j < boxes.size(); j++) { cv::Mat crop_img; @@ -223,8 +247,14 @@ int main_system(std::vector cv_all_img_names) { } img_list.push_back(crop_img); } - - rec.Run(img_list, &rec_times); + std::vector rec_texts(img_list.size(), ""); + std::vector rec_text_scores(img_list.size(), 0); + rec.Run(img_list, rec_texts, rec_text_scores, &rec_times); + // output rec results + for (int i = 0; i < rec_texts.size(); i++) { + std::cout << i << "\t" << rec_texts[i] << "\t" << rec_text_scores[i] + << std::endl; + } time_info_rec[0] += rec_times[0]; time_info_rec[1] += rec_times[1]; time_info_rec[2] += rec_times[2]; diff --git a/deploy/cpp_infer/src/ocr_det.cpp b/deploy/cpp_infer/src/ocr_det.cpp index ad78999449d94dcaf2e336087de5c6837f3b233c..d72dc40cddb0845c370f5ad4bb9b6e2f6fe0bf2f 100644 --- a/deploy/cpp_infer/src/ocr_det.cpp +++ b/deploy/cpp_infer/src/ocr_det.cpp @@ -175,11 +175,6 @@ void DBDetector::Run(cv::Mat &img, std::chrono::duration postprocess_diff = postprocess_end - postprocess_start; times->push_back(double(postprocess_diff.count() * 1000)); - - //// visualization - if (this->visualize_) { - Utility::VisualizeBboxes(srcimg, boxes); - } } } // namespace PaddleOCR diff --git a/deploy/cpp_infer/src/ocr_rec.cpp b/deploy/cpp_infer/src/ocr_rec.cpp index 25224f88acecd33f5efaa34a9dfc71639663d53f..4c94e8f3fc966d2a4de8c7aad0e5ef4d4b69c804 100644 --- a/deploy/cpp_infer/src/ocr_rec.cpp +++ b/deploy/cpp_infer/src/ocr_rec.cpp @@ -17,6 +17,8 @@ namespace PaddleOCR { void CRNNRecognizer::Run(std::vector img_list, + std::vector &rec_texts, + std::vector &rec_text_scores, std::vector *times) { std::chrono::duration preprocess_diff = std::chrono::steady_clock::now() - std::chrono::steady_clock::now(); @@ -86,7 +88,7 @@ void CRNNRecognizer::Run(std::vector img_list, // ctc decode auto postprocess_start = std::chrono::steady_clock::now(); for (int m = 0; m < predict_shape[0]; m++) { - std::vector str_res; + std::string str_res; int argmax_idx; int last_index = 0; float score = 0.f; @@ -104,17 +106,16 @@ void CRNNRecognizer::Run(std::vector img_list, if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) { score += max_value; count += 1; - str_res.push_back(label_list_[argmax_idx]); + str_res += label_list_[argmax_idx]; } last_index = argmax_idx; } score /= count; - if (isnan(score)) + if (isnan(score)) { continue; - for (int i = 0; i < str_res.size(); i++) { - std::cout << str_res[i]; } - std::cout << "\tscore: " << score << std::endl; + rec_texts[indices[beg_img_no + m]] = str_res; + rec_text_scores[indices[beg_img_no + m]] = score; } auto postprocess_end = std::chrono::steady_clock::now(); postprocess_diff += postprocess_end - postprocess_start; diff --git a/deploy/cpp_infer/src/utility.cpp b/deploy/cpp_infer/src/utility.cpp index c3c7b8485520579e8e2a23ae03543e3a9fc821bf..034df07804745178368a621936cd1ddabfd3a050 100644 --- a/deploy/cpp_infer/src/utility.cpp +++ b/deploy/cpp_infer/src/utility.cpp @@ -40,7 +40,8 @@ std::vector Utility::ReadDict(const std::string &path) { void Utility::VisualizeBboxes( const cv::Mat &srcimg, - const std::vector>> &boxes) { + const std::vector>> &boxes, + const std::string &save_path) { cv::Mat img_vis; srcimg.copyTo(img_vis); for (int n = 0; n < boxes.size(); n++) { @@ -54,8 +55,8 @@ void Utility::VisualizeBboxes( cv::polylines(img_vis, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0); } - cv::imwrite("./ocr_vis.png", img_vis); - std::cout << "The detection visualized image saved in ./ocr_vis.png" + cv::imwrite(save_path, img_vis); + std::cout << "The detection visualized image saved in " + save_path << std::endl; } @@ -67,7 +68,7 @@ void Utility::GetAllFiles(const char *dir_name, return; } struct stat s; - lstat(dir_name, &s); + stat(dir_name, &s); if (!S_ISDIR(s.st_mode)) { std::cout << "dir_name is not a valid directory !" << std::endl; all_inputs.push_back(dir_name); @@ -93,7 +94,7 @@ void Utility::GetAllFiles(const char *dir_name, } cv::Mat Utility::GetRotateCropImage(const cv::Mat &srcimage, - std::vector> box) { + std::vector> box) { cv::Mat image; srcimage.copyTo(image); std::vector> points = box; @@ -147,17 +148,52 @@ cv::Mat Utility::GetRotateCropImage(const cv::Mat &srcimage, } } -std::vector Utility::argsort(const std::vector& array) -{ - const int array_len(array.size()); - std::vector array_index(array_len, 0); - for (int i = 0; i < array_len; ++i) - array_index[i] = i; +std::vector Utility::argsort(const std::vector &array) { + const int array_len(array.size()); + std::vector array_index(array_len, 0); + for (int i = 0; i < array_len; ++i) + array_index[i] = i; - std::sort(array_index.begin(), array_index.end(), - [&array](int pos1, int pos2) {return (array[pos1] < array[pos2]); }); + std::sort( + array_index.begin(), array_index.end(), + [&array](int pos1, int pos2) { return (array[pos1] < array[pos2]); }); - return array_index; + return array_index; +} + +std::string Utility::basename(const std::string &filename) { + if (filename.empty()) { + return ""; + } + + auto len = filename.length(); + auto index = filename.find_last_of("/\\"); + + if (index == std::string::npos) { + return filename; + } + + if (index + 1 >= len) { + + len--; + index = filename.substr(0, len).find_last_of("/\\"); + + if (len == 0) { + return filename; + } + + if (index == 0) { + return filename.substr(1, len - 1); + } + + if (index == std::string::npos) { + return filename.substr(0, len); + } + + return filename.substr(index + 1, len - index - 1); + } + + return filename.substr(index + 1, len - index); } } // namespace PaddleOCR \ No newline at end of file diff --git a/deploy/hubserving/ocr_cls/__init__.py b/deploy/hubserving/ocr_cls/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..c747d3e7aeca842933e083dffc01ef1fba3f4e85 100644 --- a/deploy/hubserving/ocr_cls/__init__.py +++ b/deploy/hubserving/ocr_cls/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/deploy/hubserving/ocr_cls/module.py b/deploy/hubserving/ocr_cls/module.py index e159e0d3f23e9654c2d0342fbe6fa86b257ed24b..8b70f0376e5ebf5a960a73115c65a7a1d3d0011e 100644 --- a/deploy/hubserving/ocr_cls/module.py +++ b/deploy/hubserving/ocr_cls/module.py @@ -1,4 +1,17 @@ -# -*- coding:utf-8 -*- +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -7,7 +20,7 @@ import os import sys sys.path.insert(0, ".") import copy - +import paddlehub from paddlehub.common.logger import logger from paddlehub.module.module import moduleinfo, runnable, serving import cv2 @@ -22,10 +35,10 @@ from deploy.hubserving.ocr_cls.params import read_params @moduleinfo( name="ocr_cls", version="1.0.0", - summary="ocr recognition service", + summary="ocr angle cls service", author="paddle-dev", author_email="paddle-dev@baidu.com", - type="cv/text_recognition") + type="cv/text_angle_cls") class OCRCls(hub.Module): def _initialize(self, use_gpu=False, enable_mkldnn=False): """ @@ -128,6 +141,7 @@ class OCRCls(hub.Module): if __name__ == '__main__': ocr = OCRCls() + ocr._initialize() image_path = [ './doc/imgs_words/ch/word_1.jpg', './doc/imgs_words/ch/word_2.jpg', diff --git a/deploy/hubserving/ocr_cls/params.py b/deploy/hubserving/ocr_cls/params.py index 982f013647b69cdc47c13e6206177fe74849da41..fe4e84843a434e2ca712cdb68d026520de8bf635 100755 --- a/deploy/hubserving/ocr_cls/params.py +++ b/deploy/hubserving/ocr_cls/params.py @@ -1,4 +1,17 @@ -# -*- coding:utf-8 -*- +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/deploy/hubserving/ocr_det/__init__.py b/deploy/hubserving/ocr_det/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..c747d3e7aeca842933e083dffc01ef1fba3f4e85 100644 --- a/deploy/hubserving/ocr_det/__init__.py +++ b/deploy/hubserving/ocr_det/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/deploy/hubserving/ocr_det/module.py b/deploy/hubserving/ocr_det/module.py index c7d253f5ec8d626279c9eb493e15d1c4c83cfbfd..8fef3be017eef1c6a52395348624f5bfcb6260e7 100644 --- a/deploy/hubserving/ocr_det/module.py +++ b/deploy/hubserving/ocr_det/module.py @@ -1,4 +1,17 @@ -# -*- coding:utf-8 -*- +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -8,7 +21,7 @@ import sys sys.path.insert(0, ".") import copy - +import paddlehub from paddlehub.common.logger import logger from paddlehub.module.module import moduleinfo, runnable, serving import cv2 @@ -27,7 +40,7 @@ from deploy.hubserving.ocr_system.params import read_params summary="ocr detection service", author="paddle-dev", author_email="paddle-dev@baidu.com", - type="cv/text_recognition") + type="cv/text_detection") class OCRDet(hub.Module): def _initialize(self, use_gpu=False, enable_mkldnn=False): """ @@ -126,6 +139,7 @@ class OCRDet(hub.Module): if __name__ == '__main__': ocr = OCRDet() + ocr._initialize() image_path = [ './doc/imgs/11.jpg', './doc/imgs/12.jpg', diff --git a/deploy/hubserving/ocr_det/params.py b/deploy/hubserving/ocr_det/params.py index 2587a297662cb34d22dbdfe191439e61066cda78..ba41dd07f135402b5878add415c482edf12e2695 100755 --- a/deploy/hubserving/ocr_det/params.py +++ b/deploy/hubserving/ocr_det/params.py @@ -1,4 +1,17 @@ -# -*- coding:utf-8 -*- +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/deploy/hubserving/ocr_rec/__init__.py b/deploy/hubserving/ocr_rec/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..c747d3e7aeca842933e083dffc01ef1fba3f4e85 100644 --- a/deploy/hubserving/ocr_rec/__init__.py +++ b/deploy/hubserving/ocr_rec/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/deploy/hubserving/ocr_rec/module.py b/deploy/hubserving/ocr_rec/module.py index 2bec3fcdbe00e3693557815b8ed81dd38f0c3b28..9fae54e2a317a7020543675d49fe0b7e07b4f7cd 100644 --- a/deploy/hubserving/ocr_rec/module.py +++ b/deploy/hubserving/ocr_rec/module.py @@ -1,4 +1,17 @@ -# -*- coding:utf-8 -*- +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -7,7 +20,7 @@ import os import sys sys.path.insert(0, ".") import copy - +import paddlehub from paddlehub.common.logger import logger from paddlehub.module.module import moduleinfo, runnable, serving import cv2 @@ -128,6 +141,7 @@ class OCRRec(hub.Module): if __name__ == '__main__': ocr = OCRRec() + ocr._initialize() image_path = [ './doc/imgs_words/ch/word_1.jpg', './doc/imgs_words/ch/word_2.jpg', diff --git a/deploy/hubserving/ocr_rec/params.py b/deploy/hubserving/ocr_rec/params.py index 5e11c3cfee0c9387fce7f465f15f9424b7b04e9d..70b50dd4d680f744dca5cf1cbe0ebe8f0984d93a 100644 --- a/deploy/hubserving/ocr_rec/params.py +++ b/deploy/hubserving/ocr_rec/params.py @@ -1,4 +1,17 @@ -# -*- coding:utf-8 -*- +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/deploy/hubserving/ocr_system/__init__.py b/deploy/hubserving/ocr_system/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..c747d3e7aeca842933e083dffc01ef1fba3f4e85 100644 --- a/deploy/hubserving/ocr_system/__init__.py +++ b/deploy/hubserving/ocr_system/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/deploy/hubserving/ocr_system/module.py b/deploy/hubserving/ocr_system/module.py index cbef8086ecff930e272bee16c16e52e2c934b0ad..71a19c6b7049ec1d779377e7c84cbfe7d2820991 100644 --- a/deploy/hubserving/ocr_system/module.py +++ b/deploy/hubserving/ocr_system/module.py @@ -1,4 +1,17 @@ -# -*- coding:utf-8 -*- +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -9,7 +22,7 @@ sys.path.insert(0, ".") import copy import time - +import paddlehub from paddlehub.common.logger import logger from paddlehub.module.module import moduleinfo, runnable, serving import cv2 @@ -28,7 +41,7 @@ from deploy.hubserving.ocr_system.params import read_params summary="ocr system service", author="paddle-dev", author_email="paddle-dev@baidu.com", - type="cv/text_recognition") + type="cv/PP-OCR_system") class OCRSystem(hub.Module): def _initialize(self, use_gpu=False, enable_mkldnn=False): """ @@ -134,6 +147,7 @@ class OCRSystem(hub.Module): if __name__ == '__main__': ocr = OCRSystem() + ocr._initialize() image_path = [ './doc/imgs/11.jpg', './doc/imgs/12.jpg', diff --git a/deploy/hubserving/ocr_system/params.py b/deploy/hubserving/ocr_system/params.py index 4698e8ce5d8f8c826fe04a85906189e729104ddb..6d74294438cfbc83a8445f994585e7d82ada5f7f 100755 --- a/deploy/hubserving/ocr_system/params.py +++ b/deploy/hubserving/ocr_system/params.py @@ -1,4 +1,17 @@ -# -*- coding:utf-8 -*- +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/deploy/hubserving/readme.md b/deploy/hubserving/readme.md index b52e3584c36173e4c607dbbd9679605c98de8a67..22699d7122faaab2cdeacad40dff3bbc9f981b03 100755 --- a/deploy/hubserving/readme.md +++ b/deploy/hubserving/readme.md @@ -1,18 +1,34 @@ [English](readme_en.md) | 简体中文 +- [基于PaddleHub Serving的服务部署](#基于paddlehub-serving的服务部署) + - [1. 近期更新](#1-近期更新) + - [2. 快速启动服务](#2-快速启动服务) + - [2.1 准备环境](#21-准备环境) + - [2.2 下载推理模型](#22-下载推理模型) + - [2.3 安装服务模块](#23-安装服务模块) + - [2.4 启动服务](#24-启动服务) + - [2.4.1. 命令行命令启动(仅支持CPU)](#241-命令行命令启动仅支持cpu) + - [2.4.2 配置文件启动(支持CPU、GPU)](#242-配置文件启动支持cpugpu) + - [3. 发送预测请求](#3-发送预测请求) + - [4. 返回结果格式说明](#4-返回结果格式说明) + - [5. 自定义修改服务模块](#5-自定义修改服务模块) + + PaddleOCR提供2种服务部署方式: - 基于PaddleHub Serving的部署:代码路径为"`./deploy/hubserving`",按照本教程使用; - 基于PaddleServing的部署:代码路径为"`./deploy/pdserving`",使用方法参考[文档](../../deploy/pdserving/README_CN.md)。 # 基于PaddleHub Serving的服务部署 -hubserving服务部署目录下包括检测、识别、2阶段串联三种服务包,请根据需求选择相应的服务包进行安装和启动。目录结构如下: +hubserving服务部署目录下包括文本检测、文本方向分类,文本识别、文本检测+文本方向分类+文本识别3阶段串联,表格识别和PP-Structure六种服务包,请根据需求选择相应的服务包进行安装和启动。目录结构如下: ``` deploy/hubserving/ - └─ ocr_cls 分类模块服务包 - └─ ocr_det 检测模块服务包 - └─ ocr_rec 识别模块服务包 - └─ ocr_system 检测+识别串联服务包 + └─ ocr_cls 文本方向分类模块服务包 + └─ ocr_det 文本检测模块服务包 + └─ ocr_rec 文本识别模块服务包 + └─ ocr_system 文本检测+文本方向分类+文本识别串联服务包 + └─ structure_table 表格识别服务包 + └─ structure_system PP-Structure服务包 ``` 每个服务包下包含3个文件。以2阶段串联服务包为例,目录如下: @@ -23,28 +39,32 @@ deploy/hubserving/ocr_system/ └─ module.py 主模块,必选,包含服务的完整逻辑 └─ params.py 参数文件,必选,包含模型路径、前后处理参数等参数 ``` +## 1. 近期更新 -## 快速启动服务 +* 2022.03.30 新增PP-Structure和表格识别两种服务。 + +## 2. 快速启动服务 以下步骤以检测+识别2阶段串联服务为例,如果只需要检测服务或识别服务,替换相应文件路径即可。 -### 1. 准备环境 +### 2.1 准备环境 ```shell # 安装paddlehub # paddlehub 需要 python>3.6.2 -pip3 install paddlehub==2.1.0 --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple +pip3 install paddlehub==2.1.0 --upgrade -i https://mirror.baidu.com/pypi/simple ``` -### 2. 下载推理模型 +### 2.2 下载推理模型 安装服务模块前,需要准备推理模型并放到正确路径。默认使用的是PP-OCRv2模型,默认模型路径为: ``` 检测模型:./inference/ch_PP-OCRv2_det_infer/ 识别模型:./inference/ch_PP-OCRv2_rec_infer/ 方向分类器:./inference/ch_ppocr_mobile_v2.0_cls_infer/ +表格结构识别模型:./inference/en_ppocr_mobile_v2.0_table_structure_infer/ ``` -**模型路径可在`params.py`中查看和修改。** 更多模型可以从PaddleOCR提供的[模型库](../../doc/doc_ch/models_list.md)下载,也可以替换成自己训练转换好的模型。 +**模型路径可在`params.py`中查看和修改。** 更多模型可以从PaddleOCR提供的模型库[PP-OCR](../../doc/doc_ch/models_list.md)和[PP-Structure](../../ppstructure/docs/models_list.md)下载,也可以替换成自己训练转换好的模型。 -### 3. 安装服务模块 -PaddleOCR提供3种服务模块,根据需要安装所需模块。 +### 2.3 安装服务模块 +PaddleOCR提供5种服务模块,根据需要安装所需模块。 * 在Linux环境下,安装示例如下: ```shell @@ -59,6 +79,12 @@ hub install deploy/hubserving/ocr_rec/ # 或,安装检测+识别串联服务模块: hub install deploy/hubserving/ocr_system/ + +# 或,安装表格识别服务模块: +hub install deploy/hubserving/structure_table/ + +# 或,安装PP-Structure服务模块: +hub install deploy/hubserving/structure_system/ ``` * 在Windows环境下(文件夹的分隔符为`\`),安装示例如下: @@ -74,10 +100,16 @@ hub install deploy\hubserving\ocr_rec\ # 或,安装检测+识别串联服务模块: hub install deploy\hubserving\ocr_system\ + +# 或,安装表格识别服务模块: +hub install deploy\hubserving\structure_table\ + +# 或,安装PP-Structure服务模块: +hub install deploy\hubserving\structure_system\ ``` -### 4. 启动服务 -#### 方式1. 命令行命令启动(仅支持CPU) +### 2.4 启动服务 +#### 2.4.1. 命令行命令启动(仅支持CPU) **启动命令:** ```shell $ hub serving start --modules [Module1==Version1, Module2==Version2, ...] \ @@ -89,7 +121,7 @@ $ hub serving start --modules [Module1==Version1, Module2==Version2, ...] \ **参数:** |参数|用途| -|-|-| +|---|---| |--modules/-m|PaddleHub Serving预安装模型,以多个Module==Version键值对的形式列出
*`当不指定Version时,默认选择最新版本`*| |--port/-p|服务端口,默认为8866| |--use_multiprocess|是否启用并发方式,默认为单进程方式,推荐多核CPU机器使用此方式
*`Windows操作系统只支持单进程方式`*| @@ -99,7 +131,7 @@ $ hub serving start --modules [Module1==Version1, Module2==Version2, ...] \ 这样就完成了一个服务化API的部署,使用默认端口号8866。 -#### 方式2. 配置文件启动(支持CPU、GPU) +#### 2.4.2 配置文件启动(支持CPU、GPU) **启动命令:** ```hub serving start -c config.json``` @@ -136,7 +168,7 @@ export CUDA_VISIBLE_DEVICES=3 hub serving start -c deploy/hubserving/ocr_system/config.json ``` -## 发送预测请求 +## 3. 发送预测请求 配置好服务端,可使用以下命令发送预测请求,获取预测结果: ```python tools/test_hubserving.py server_url image_path``` @@ -144,38 +176,46 @@ hub serving start -c deploy/hubserving/ocr_system/config.json 需要给脚本传递2个参数: - **server_url**:服务地址,格式为 `http://[ip_address]:[port]/predict/[module_name]` -例如,如果使用配置文件启动分类,检测、识别,检测+分类+识别3阶段服务,那么发送请求的url将分别是: +例如,如果使用配置文件启动分类,检测、识别,检测+分类+识别3阶段,表格识别和PP-Structure服务,那么发送请求的url将分别是: `http://127.0.0.1:8865/predict/ocr_det` `http://127.0.0.1:8866/predict/ocr_cls` `http://127.0.0.1:8867/predict/ocr_rec` `http://127.0.0.1:8868/predict/ocr_system` -- **image_path**:测试图像路径,可以是单张图片路径,也可以是图像集合目录路径 +`http://127.0.0.1:8869/predict/structure_table` +`http://127.0.0.1:8870/predict/structure_system` +- **image_dir**:测试图像路径,可以是单张图片路径,也可以是图像集合目录路径 +- **visualize**:是否可视化结果,默认为False +- **output**:可视化结果保存路径,默认为`./hubserving_result` 访问示例: -```python tools/test_hubserving.py http://127.0.0.1:8868/predict/ocr_system ./doc/imgs/``` +```python tools/test_hubserving.py --server_url=http://127.0.0.1:8868/predict/ocr_system --image_dir./doc/imgs/ --visualize=false``` -## 返回结果格式说明 +## 4. 返回结果格式说明 返回结果为列表(list),列表中的每一项为词典(dict),词典一共可能包含3种字段,信息如下: |字段名称|数据类型|意义| -|----|----|----| +|---|---|---| |angle|str|文本角度| |text|str|文本内容| |confidence|float| 文本识别置信度或文本角度分类置信度| |text_region|list|文本位置坐标| +|html|str|表格的html字符串| +|regions|list|版面分析+表格识别+OCR的结果,每一项为一个list,包含表示区域坐标的`bbox`,区域类型的`type`和区域结果的`res`三个字段| 不同模块返回的字段不同,如,文本识别服务模块返回结果不含`text_region`字段,具体信息如下: -| 字段名/模块名 | ocr_det | ocr_cls | ocr_rec | ocr_system | -| ---- | ---- | ---- | ---- | ---- | -|angle| | ✔ | | ✔ | -|text| | |✔|✔| -|confidence| |✔ |✔|✔| -|text_region| ✔| | |✔ | +| 字段名/模块名 | ocr_det | ocr_cls | ocr_rec | ocr_system | structure_table | structure_system | +| --- | --- | --- | --- | --- | --- |--- | +|angle| | ✔ | | ✔ | || +|text| | |✔|✔| | ✔ | +|confidence| |✔ |✔| | | ✔| +|text_region| ✔| | |✔ | | ✔| +|html| | | | |✔ |✔| +|regions| | | | |✔ |✔ | **说明:** 如果需要增加、删除、修改返回字段,可在相应模块的`module.py`文件中进行修改,完整流程参考下一节自定义修改服务模块。 -## 自定义修改服务模块 +## 5. 自定义修改服务模块 如果需要修改服务逻辑,你一般需要操作以下步骤(以修改`ocr_system`为例): - 1、 停止服务 diff --git a/deploy/hubserving/readme_en.md b/deploy/hubserving/readme_en.md index 3bbcf98cd8b78407613e6bdfb5d5ab8b0a25a084..b32e6aa822c55771bbebdf49bb81b9c9202279f5 100755 --- a/deploy/hubserving/readme_en.md +++ b/deploy/hubserving/readme_en.md @@ -1,18 +1,34 @@ English | [简体中文](readme.md) +- [Service deployment based on PaddleHub Serving](#service-deployment-based-on-paddlehub-serving) + - [1. Update](#1-update) + - [2. Quick start service](#2-quick-start-service) + - [2.1 Prepare the environment](#21-prepare-the-environment) + - [2.2 Download inference model](#22-download-inference-model) + - [2.3 Install Service Module](#23-install-service-module) + - [2.4 Start service](#24-start-service) + - [2.4.1 Start with command line parameters (CPU only)](#241-start-with-command-line-parameters-cpu-only) + - [2.4.2 Start with configuration file(CPU、GPU)](#242-start-with-configuration-filecpugpu) + - [3. Send prediction requests](#3-send-prediction-requests) + - [4. Returned result format](#4-returned-result-format) + - [5. User defined service module modification](#5-user-defined-service-module-modification) + + PaddleOCR provides 2 service deployment methods: - Based on **PaddleHub Serving**: Code path is "`./deploy/hubserving`". Please follow this tutorial. - Based on **PaddleServing**: Code path is "`./deploy/pdserving`". Please refer to the [tutorial](../../deploy/pdserving/README.md) for usage. # Service deployment based on PaddleHub Serving -The hubserving service deployment directory includes three service packages: detection, recognition, and two-stage series connection. Please select the corresponding service package to install and start service according to your needs. The directory is as follows: +The hubserving service deployment directory includes six service packages: text detection, text angle class, text recognition, text detection+text angle class+text recognition three-stage series connection, table recognition and PP-Structure. Please select the corresponding service package to install and start service according to your needs. The directory is as follows: ``` deploy/hubserving/ - └─ ocr_det detection module service package - └─ ocr_cls angle class module service package - └─ ocr_rec recognition module service package - └─ ocr_system two-stage series connection service package + └─ ocr_det text detection module service package + └─ ocr_cls text angle class module service package + └─ ocr_rec text recognition module service package + └─ ocr_system text detection+text angle class+text recognition three-stage series connection service package + └─ structure_table table recognition service package + └─ structure_system PP-Structure service package ``` Each service pack contains 3 files. Take the 2-stage series connection service package as an example, the directory is as follows: @@ -23,43 +39,54 @@ deploy/hubserving/ocr_system/ └─ module.py Main module file, required, contains the complete logic of the service └─ params.py Parameter file, required, including parameters such as model path, pre- and post-processing parameters ``` +## 1. Update + +* 2022.03.30 add PP-Structure and table recognition services。 -## Quick start service + +## 2. Quick start service The following steps take the 2-stage series service as an example. If only the detection service or recognition service is needed, replace the corresponding file path. -### 1. Prepare the environment +### 2.1 Prepare the environment ```shell # Install paddlehub # python>3.6.2 is required bt paddlehub pip3 install paddlehub==2.1.0 --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple ``` -### 2. Download inference model +### 2.2 Download inference model Before installing the service module, you need to prepare the inference model and put it in the correct path. By default, the PP-OCRv2 models are used, and the default model path is: ``` -detection model: ./inference/ch_PP-OCRv2_det_infer/ -recognition model: ./inference/ch_PP-OCRv2_rec_infer/ -text direction classifier: ./inference/ch_ppocr_mobile_v2.0_cls_infer/ +text detection model: ./inference/ch_PP-OCRv2_det_infer/ +text recognition model: ./inference/ch_PP-OCRv2_rec_infer/ +text angle classifier: ./inference/ch_ppocr_mobile_v2.0_cls_infer/ +tanle recognition: ./inference/en_ppocr_mobile_v2.0_table_structure_infer/ ``` **The model path can be found and modified in `params.py`.** More models provided by PaddleOCR can be obtained from the [model library](../../doc/doc_en/models_list_en.md). You can also use models trained by yourself. -### 3. Install Service Module -PaddleOCR provides 3 kinds of service modules, install the required modules according to your needs. +### 2.3 Install Service Module +PaddleOCR provides 5 kinds of service modules, install the required modules according to your needs. * On Linux platform, the examples are as follows. ```shell -# Install the detection service module: +# Install the text detection service module: hub install deploy/hubserving/ocr_det/ -# Or, install the angle class service module: +# Or, install the text angle class service module: hub install deploy/hubserving/ocr_cls/ -# Or, install the recognition service module: +# Or, install the text recognition service module: hub install deploy/hubserving/ocr_rec/ # Or, install the 2-stage series service module: hub install deploy/hubserving/ocr_system/ + +# Or install table recognition service module +hub install deploy/hubserving/structure_table/ + +# Or install PP-Structure service module +hub install deploy/hubserving/structure_system/ ``` * On Windows platform, the examples are as follows. @@ -75,10 +102,16 @@ hub install deploy\hubserving\ocr_rec\ # Or, install the 2-stage series service module: hub install deploy\hubserving\ocr_system\ + +# Or install table recognition service module +hub install deploy/hubserving/structure_table/ + +# Or install PP-Structure service module +hub install deploy\hubserving\structure_system\ ``` -### 4. Start service -#### Way 1. Start with command line parameters (CPU only) +### 2.4 Start service +#### 2.4.1 Start with command line parameters (CPU only) **start command:** ```shell @@ -90,7 +123,7 @@ $ hub serving start --modules [Module1==Version1, Module2==Version2, ...] \ **parameters:** |parameters|usage| -|-|-| +|---|---| |--modules/-m|PaddleHub Serving pre-installed model, listed in the form of multiple Module==Version key-value pairs
*`When Version is not specified, the latest version is selected by default`*| |--port/-p|Service port, default is 8866| |--use_multiprocess|Enable concurrent mode, the default is single-process mode, this mode is recommended for multi-core CPU machines
*`Windows operating system only supports single-process mode`*| @@ -103,7 +136,7 @@ hub serving start -m ocr_system This completes the deployment of a service API, using the default port number 8866. -#### Way 2. Start with configuration file(CPU、GPU) +#### 2.4.2 Start with configuration file(CPU、GPU) **start command:** ```shell hub serving start --config/-c config.json @@ -140,7 +173,7 @@ export CUDA_VISIBLE_DEVICES=3 hub serving start -c deploy/hubserving/ocr_system/config.json ``` -## Send prediction requests +## 3. Send prediction requests After the service starts, you can use the following command to send a prediction request to obtain the prediction result: ```shell python tools/test_hubserving.py server_url image_path @@ -149,19 +182,24 @@ python tools/test_hubserving.py server_url image_path Two parameters need to be passed to the script: - **server_url**:service address,format of which is `http://[ip_address]:[port]/predict/[module_name]` -For example, if the detection, recognition and 2-stage serial services are started with provided configuration files, the respective `server_url` would be: +For example, if using the configuration file to start the text angle classification, text detection, text recognition, detection+classification+recognition 3 stages, table recognition and PP-Structure service, then the `server_url` to send the request will be: + `http://127.0.0.1:8865/predict/ocr_det` `http://127.0.0.1:8866/predict/ocr_cls` `http://127.0.0.1:8867/predict/ocr_rec` `http://127.0.0.1:8868/predict/ocr_system` -- **image_path**:Test image path, can be a single image path or an image directory path +`http://127.0.0.1:8869/predict/structure_table` +`http://127.0.0.1:8870/predict/structure_system` +- **image_dir**:Test image path, can be a single image path or an image directory path +- **visualize**:Whether to visualize the results, the default value is False +- **output**:The floder to save Visualization result, default value is `./hubserving_result` **Eg.** ```shell -python tools/test_hubserving.py http://127.0.0.1:8868/predict/ocr_system ./doc/imgs/ +python tools/test_hubserving.py --server_url=http://127.0.0.1:8868/predict/ocr_system --image_dir./doc/imgs/ --visualize=false` ``` -## Returned result format +## 4. Returned result format The returned result is a list. Each item in the list is a dict. The dict may contain three fields. The information is as follows: |field name|data type|description| @@ -170,19 +208,23 @@ The returned result is a list. Each item in the list is a dict. The dict may con |text|str|text content| |confidence|float|text recognition confidence| |text_region|list|text location coordinates| +|html|str|table html str| +|regions|list|The result of layout analysis + table recognition + OCR, each item is a list, including `bbox` indicating area coordinates, `type` of area type and `res` of area results| The fields returned by different modules are different. For example, the results returned by the text recognition service module do not contain `text_region`. The details are as follows: -| field name/module name | ocr_det | ocr_cls | ocr_rec | ocr_system | -| ---- | ---- | ---- | ---- | ---- | -|angle| | ✔ | | ✔ | -|text| | |✔|✔| -|confidence| |✔ |✔|✔| -|text_region| ✔| | |✔ | +| field name/module name | ocr_det | ocr_cls | ocr_rec | ocr_system | structure_table | structure_system | +| --- | --- | --- | --- | --- | --- |--- | +|angle| | ✔ | | ✔ | || +|text| | |✔|✔| | ✔ | +|confidence| |✔ |✔| | | ✔| +|text_region| ✔| | |✔ | | ✔| +|html| | | | |✔ |✔| +|regions| | | | |✔ |✔ | **Note:** If you need to add, delete or modify the returned fields, you can modify the file `module.py` of the corresponding module. For the complete process, refer to the user-defined modification service module in the next section. -## User defined service module modification +## 5. User defined service module modification If you need to modify the service logic, the following steps are generally required (take the modification of `ocr_system` for example): - 1. Stop service diff --git a/deploy/hubserving/structure_system/__init__.py b/deploy/hubserving/structure_system/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c747d3e7aeca842933e083dffc01ef1fba3f4e85 --- /dev/null +++ b/deploy/hubserving/structure_system/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/deploy/hubserving/structure_system/config.json b/deploy/hubserving/structure_system/config.json new file mode 100644 index 0000000000000000000000000000000000000000..642aa94a2a25759469f74280f6aab9a2495f493f --- /dev/null +++ b/deploy/hubserving/structure_system/config.json @@ -0,0 +1,16 @@ +{ + "modules_info": { + "structure_system": { + "init_args": { + "version": "1.0.0", + "use_gpu": true + }, + "predict_args": { + } + } + }, + "port": 8870, + "use_multiprocess": false, + "workers": 2 +} + diff --git a/deploy/hubserving/structure_system/module.py b/deploy/hubserving/structure_system/module.py new file mode 100644 index 0000000000000000000000000000000000000000..92846edc6698d0d75224a2b2a844c572fcb17a56 --- /dev/null +++ b/deploy/hubserving/structure_system/module.py @@ -0,0 +1,149 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +sys.path.insert(0, ".") +import copy + +import time +import paddlehub +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, runnable, serving +import cv2 +import numpy as np +import paddlehub as hub + +from tools.infer.utility import base64_to_cv2 +from ppstructure.predict_system import StructureSystem as PPStructureSystem +from ppstructure.predict_system import save_structure_res +from ppstructure.utility import parse_args +from deploy.hubserving.structure_system.params import read_params + + +@moduleinfo( + name="structure_system", + version="1.0.0", + summary="PP-Structure system service", + author="paddle-dev", + author_email="paddle-dev@baidu.com", + type="cv/structure_system") +class StructureSystem(hub.Module): + def _initialize(self, use_gpu=False, enable_mkldnn=False): + """ + initialize with the necessary elements + """ + cfg = self.merge_configs() + + cfg.use_gpu = use_gpu + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + print("use gpu: ", use_gpu) + print("CUDA_VISIBLE_DEVICES: ", _places) + cfg.gpu_mem = 8000 + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id." + ) + cfg.ir_optim = True + cfg.enable_mkldnn = enable_mkldnn + + self.table_sys = PPStructureSystem(cfg) + + def merge_configs(self): + # deafult cfg + backup_argv = copy.deepcopy(sys.argv) + sys.argv = sys.argv[:1] + cfg = parse_args() + + update_cfg_map = vars(read_params()) + + for key in update_cfg_map: + cfg.__setattr__(key, update_cfg_map[key]) + + sys.argv = copy.deepcopy(backup_argv) + return cfg + + def read_images(self, paths=[]): + images = [] + for img_path in paths: + assert os.path.isfile( + img_path), "The {} isn't a valid file.".format(img_path) + img = cv2.imread(img_path) + if img is None: + logger.info("error in loading image:{}".format(img_path)) + continue + images.append(img) + return images + + def predict(self, images=[], paths=[]): + """ + Get the chinese texts in the predicted images. + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths + paths (list[str]): The paths of images. If paths not images + Returns: + res (list): The result of chinese texts and save path of images. + """ + + if images != [] and isinstance(images, list) and paths == []: + predicted_data = images + elif images == [] and isinstance(paths, list) and paths != []: + predicted_data = self.read_images(paths) + else: + raise TypeError("The input data is inconsistent with expectations.") + + assert predicted_data != [], "There is not any image to be predicted. Please check the input data." + + all_results = [] + for img in predicted_data: + if img is None: + logger.info("error in loading image") + all_results.append([]) + continue + starttime = time.time() + res = self.table_sys(img) + elapse = time.time() - starttime + logger.info("Predict time: {}".format(elapse)) + + # parse result + res_final = [] + for region in res: + region.pop('img') + res_final.append(region) + all_results.append({'regions': res_final}) + return all_results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.predict(images_decode, **kwargs) + return results + + +if __name__ == '__main__': + structure_system = StructureSystem() + structure_system._initialize() + image_path = ['./doc/table/1.png'] + res = structure_system.predict(paths=image_path) + print(res) diff --git a/deploy/hubserving/structure_system/params.py b/deploy/hubserving/structure_system/params.py new file mode 100755 index 0000000000000000000000000000000000000000..3cc6a2794f80bcd68e254b82e45a05eb17811f65 --- /dev/null +++ b/deploy/hubserving/structure_system/params.py @@ -0,0 +1,31 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from deploy.hubserving.structure_table.params import read_params as table_read_params + + +def read_params(): + cfg = table_read_params() + + # params for layout parser model + cfg.layout_path_model = 'lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config' + cfg.layout_label_map = None + + cfg.mode = 'structure' + cfg.output = './output' + return cfg diff --git a/deploy/hubserving/structure_table/__init__.py b/deploy/hubserving/structure_table/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c747d3e7aeca842933e083dffc01ef1fba3f4e85 --- /dev/null +++ b/deploy/hubserving/structure_table/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/deploy/hubserving/structure_table/config.json b/deploy/hubserving/structure_table/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d0e3cb1523daaec85cc0dc6099969fcfb380184b --- /dev/null +++ b/deploy/hubserving/structure_table/config.json @@ -0,0 +1,16 @@ +{ + "modules_info": { + "structure_table": { + "init_args": { + "version": "1.0.0", + "use_gpu": true + }, + "predict_args": { + } + } + }, + "port": 8869, + "use_multiprocess": false, + "workers": 2 +} + diff --git a/deploy/hubserving/structure_table/module.py b/deploy/hubserving/structure_table/module.py new file mode 100644 index 0000000000000000000000000000000000000000..00393daa037368191201a5afed4aa29a3920c268 --- /dev/null +++ b/deploy/hubserving/structure_table/module.py @@ -0,0 +1,143 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +sys.path.insert(0, ".") +import copy + +import time +import paddlehub +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, runnable, serving +import cv2 +import numpy as np +import paddlehub as hub + +from tools.infer.utility import base64_to_cv2 +from ppstructure.table.predict_table import TableSystem as _TableSystem +from ppstructure.predict_system import save_structure_res +from ppstructure.utility import parse_args +from deploy.hubserving.structure_table.params import read_params + + +@moduleinfo( + name="structure_table", + version="1.0.0", + summary="PP-Structure table service", + author="paddle-dev", + author_email="paddle-dev@baidu.com", + type="cv/structure_table") +class TableSystem(hub.Module): + def _initialize(self, use_gpu=False, enable_mkldnn=False): + """ + initialize with the necessary elements + """ + cfg = self.merge_configs() + cfg.use_gpu = use_gpu + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + print("use gpu: ", use_gpu) + print("CUDA_VISIBLE_DEVICES: ", _places) + cfg.gpu_mem = 8000 + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id." + ) + cfg.ir_optim = True + cfg.enable_mkldnn = enable_mkldnn + + self.table_sys = _TableSystem(cfg) + + def merge_configs(self): + # deafult cfg + backup_argv = copy.deepcopy(sys.argv) + sys.argv = sys.argv[:1] + cfg = parse_args() + + update_cfg_map = vars(read_params()) + + for key in update_cfg_map: + cfg.__setattr__(key, update_cfg_map[key]) + + sys.argv = copy.deepcopy(backup_argv) + return cfg + + def read_images(self, paths=[]): + images = [] + for img_path in paths: + assert os.path.isfile( + img_path), "The {} isn't a valid file.".format(img_path) + img = cv2.imread(img_path) + if img is None: + logger.info("error in loading image:{}".format(img_path)) + continue + images.append(img) + return images + + def predict(self, images=[], paths=[]): + """ + Get the chinese texts in the predicted images. + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths + paths (list[str]): The paths of images. If paths not images + Returns: + res (list): The result of chinese texts and save path of images. + """ + + if images != [] and isinstance(images, list) and paths == []: + predicted_data = images + elif images == [] and isinstance(paths, list) and paths != []: + predicted_data = self.read_images(paths) + else: + raise TypeError("The input data is inconsistent with expectations.") + + assert predicted_data != [], "There is not any image to be predicted. Please check the input data." + + all_results = [] + for img in predicted_data: + if img is None: + logger.info("error in loading image") + all_results.append([]) + continue + starttime = time.time() + pred_html = self.table_sys(img) + elapse = time.time() - starttime + logger.info("Predict time: {}".format(elapse)) + + all_results.append({'html': pred_html}) + return all_results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.predict(images_decode, **kwargs) + return results + + +if __name__ == '__main__': + table_system = TableSystem() + table_system._initialize() + image_path = ['./doc/table/table.jpg'] + res = table_system.predict(paths=image_path) + print(res) diff --git a/deploy/hubserving/structure_table/params.py b/deploy/hubserving/structure_table/params.py new file mode 100755 index 0000000000000000000000000000000000000000..cc1a73687b22e73346addb35e702254ef67ee8db --- /dev/null +++ b/deploy/hubserving/structure_table/params.py @@ -0,0 +1,31 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from deploy.hubserving.ocr_system.params import read_params as pp_ocr_read_params + + +def read_params(): + cfg = pp_ocr_read_params() + + # params for table structure model + cfg.table_max_len = 488 + cfg.table_model_dir = './inference/en_ppocr_mobile_v2.0_table_structure_infer/' + cfg.table_char_type = 'en' + cfg.table_char_dict_path = './ppocr/utils/dict/table_structure_dict.txt' + cfg.show_log = False + return cfg diff --git a/deploy/ios_demo/README.md b/deploy/ios_demo/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3000495e328eb396304001fff686cf9c4cbc8ab5 --- /dev/null +++ b/deploy/ios_demo/README.md @@ -0,0 +1,3 @@ +# ios Demo + +参考 https://github.com/PaddlePaddle/Paddle-Lite-Demo/blob/develop/ocr/ios/ppocr_demo/ppocr_demo/README.md diff --git a/deploy/slim/quantization/quant.py b/deploy/slim/quantization/quant.py index 941cfb36b291dcd1dbedbf51de5edd2cf0017167..1dffaab0eef35ec41c27c9c6e00f25dda048d490 100755 --- a/deploy/slim/quantization/quant.py +++ b/deploy/slim/quantization/quant.py @@ -118,6 +118,11 @@ def main(config, device, logger, vdl_writer): config['Architecture']["Head"]['out_channels'] = char_num model = build_model(config['Architecture']) + pre_best_model_dict = dict() + # load fp32 model to begin quantization + if config["Global"]["pretrained_model"] is not None: + pre_best_model_dict = load_model(config, model) + quanter = QAT(config=quant_config, act_preprocess=PACT) quanter.quantize(model) @@ -134,10 +139,12 @@ def main(config, device, logger, vdl_writer): step_each_epoch=len(train_dataloader), parameters=model.parameters()) + # resume PACT training process + if config["Global"]["checkpoints"] is not None: + pre_best_model_dict = load_model(config, model, optimizer) + # build metric eval_class = build_metric(config['Metric']) - # load pretrain model - pre_best_model_dict = load_model(config, model, optimizer) logger.info('train dataloader has {} iters, valid dataloader has {} iters'. format(len(train_dataloader), len(valid_dataloader))) diff --git a/doc/doc_ch/finetune.md b/doc/doc_ch/finetune.md new file mode 100644 index 0000000000000000000000000000000000000000..e8f146aadc079444c37e000d16ada8b6bda8ba18 --- /dev/null +++ b/doc/doc_ch/finetune.md @@ -0,0 +1,170 @@ +# 模型微调 + +## 1. 模型微调背景与意义 + +PaddleOCR提供的PP-OCR系列模型在通用场景中性能优异,能够解决绝大多数情况下的检测与识别问题。在垂类场景中,如果希望获取更优的模型效果,可以通过模型微调的方法,进一步提升PP-OCR系列检测与识别模型的精度。 + +本文主要介绍文本检测与识别模型在模型微调时的一些注意事项,最终希望您在自己的场景中,通过模型微调,可以获取精度更高的文本检测与识别模型。 + +本文核心要点如下所示。 + +1. PP-OCR提供的预训练模型有较好的泛化能力 +2. 加入少量真实数据(检测任务>=500张, 识别任务>=5000张),会大幅提升垂类场景的检测与识别效果 +3. 在模型微调时,加入真实通用场景数据,可以进一步提升模型精度与泛化性能 +4. 在图像检测任务中,增大图像的预测尺度,能够进一步提升较小文字区域的检测效果 +5. 在模型微调时,需要适当调整超参数(学习率,batch size最为重要),以获得更优的微调效果。 + +更多详细内容,请参考第2章与第3章。 + +## 2. 文本检测模型微调 + +### 2.1 数据选择 + +* 数据量:建议至少准备500张的文本检测数据集用于模型微调。 + +* 数据标注:单行文本标注格式,建议标注的检测框与实际语义内容一致。如在火车票场景中,姓氏与名字可能离得较远,但是它们在语义上属于同一个检测字段,这里也需要将整个姓名标注为1个检测框。 + +### 2.2 模型选择 + +建议选择PP-OCRv2模型(配置文件:[ch_PP-OCRv2_det_student.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_student.yml),预训练模型:[ch_PP-OCRv2_det_distill_train.tar](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar))进行微调,其精度与泛化性能是目前提供的最优预训练模型。 + +更多PP-OCR系列模型,请参考[PaddleOCR 首页说明文档](../../README_ch.md)。 + +注意:在使用上述预训练模型的时候,由于保存的模型中包含教师模型,因此需要将其中的学生模型单独提取出来,再加载学生模型即可进行模型微调。 + +```python +import paddle +# 加载完整的检测预训练模型 +a = paddle.load("ch_PP-OCRv2_det_distill_train/best_accuracy.pdparams") +# 提取学生模型的参数 +b = {k[len("student_model."):]: a[k] for k in a if "student_model." in k} +# 保存模型,用于后续模型微调 +paddle.save(b, "ch_PP-OCRv2_det_student.pdparams") +``` + + +### 2.3 训练超参选择 + +在模型微调的时候,最重要的超参就是预训练模型路径`pretrained_model`, 学习率`learning_rate`与`batch_size`,部分配置文件如下所示。 + +```yaml +Global: + pretrained_model: ./pretrain_models/student.pdparams # 预训练模型路径 +Optimizer: + lr: + name: Cosine + learning_rate: 0.001 # 学习率 + warmup_epoch: 2 + regularizer: + name: 'L2' + factor: 0 + +Train: + loader: + shuffle: True + drop_last: False + batch_size_per_card: 8 # 单卡batch size + num_workers: 4 +``` + +上述配置文件中,首先需要将`pretrained_model`字段指定为2.2章节中提取出来的`ch_PP-OCRv2_det_student.pdparams`文件路径。 + +PaddleOCR提供的配置文件是在8卡训练(相当于总的batch size是`8*8=64`)、且没有加载预训练模型情况下的配置文件,因此您的场景中,学习率与总的batch size需要对应线性调整,例如 + +* 如果您的场景中是单卡训练,单卡batch_size=8,则总的batch_size=8,建议将学习率调整为`1e-4`左右。 +* 如果您的场景中是单卡训练,由于显存限制,只能设置单卡batch_size=4,则总的batch_size=4,建议将学习率调整为`5e-5`左右。 + +### 2.4 预测超参选择 + +对训练好的模型导出并进行推理时,可以通过进一步调整预测的图像尺度,来提升小面积文本的检测效果,下面是DBNet推理时的一些超参数,可以通过适当调整,提升效果。 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| det_db_thresh | float | 0.3 | DB输出的概率图中,得分大于该阈值的像素点才会被认为是文字像素点 | +| det_db_box_thresh | float | 0.6 | 检测结果边框内,所有像素点的平均得分大于该阈值时,该结果会被认为是文字区域 | +| det_db_unclip_ratio | float | 1.5 | `Vatti clipping`算法的扩张系数,使用该方法对文字区域进行扩张 | +| max_batch_size | int | 10 | 预测的batch size | +| use_dilation | bool | False | 是否对分割结果进行膨胀以获取更优检测效果 | +| det_db_score_mode | str | "fast" | DB的检测结果得分计算方法,支持`fast`和`slow`,`fast`是根据polygon的外接矩形边框内的所有像素计算平均得分,`slow`是根据原始polygon内的所有像素计算平均得分,计算速度相对较慢一些,但是更加准确一些。 | + + +更多关于推理方法的介绍可以参考[Paddle Inference推理教程](./inference.md)。 + + +## 3. 文本识别模型微调 + + +### 3.1 数据选择 + +* 数据量:不更换字典的情况下,建议至少准备5000张的文本识别数据集用于模型微调;如果更换了字典(不建议),需要的数量更多。 + +* 数据分布:建议分布与实测场景尽量一致。如果实测场景包含大量短文本,则训练数据中建议也包含较多短文本,如果实测场景对于空格识别效果要求较高,则训练数据中建议也包含较多带空格的文本内容。 + + +* 通用中英文数据:在训练的时候,可以在训练集中添加通用真实数据(如在不更换字典的微调场景中,建议添加LSVT、RCTW、MTWI等真实数据),进一步提升模型的泛化性能。 + +### 3.2 模型选择 + +建议选择PP-OCRv2模型(配置文件:[ch_PP-OCRv2_rec_distillation.yml](../../configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml),预训练模型:[ch_PP-OCRv2_rec_train.tar](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar))进行微调,其精度与泛化性能是目前提供的最优预训练模型。 + +更多PP-OCR系列,模型请参考[PaddleOCR 首页说明文档](../../README_ch.md)。 + + +### 3.3 训练超参选择 + +与文本检测任务微调相同,在识别模型微调的时候,最重要的超参就是预训练模型路径`pretrained_model`, 学习率`learning_rate`与`batch_size`,部分默认配置文件如下所示。 + +```yaml +Global: + pretrained_model: # 预训练模型路径 +Optimizer: + lr: + name: Piecewise + decay_epochs : [700, 800] + values : [0.001, 0.0001] # 学习率 + warmup_epoch: 5 + regularizer: + name: 'L2' + factor: 0 + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ + label_file_list: + - ./train_data/train_list.txt + ratio_list: [1.0] # 采样比例,默认值是[1.0] + loader: + shuffle: True + drop_last: False + batch_size_per_card: 128 # 单卡batch size + num_workers: 8 + +``` + + +上述配置文件中,首先需要将`pretrained_model`字段指定为2.2章节中解压得到的`ch_PP-OCRv2_rec_train/best_accuracy.pdparams`文件路径。 + +PaddleOCR提供的配置文件是在8卡训练(相当于总的batch size是`8*128=1024`)、且没有加载预训练模型情况下的配置文件,因此您的场景中,学习率与总的batch size需要对应线性调整,例如: + +* 如果您的场景中是单卡训练,单卡batch_size=128,则总的batch_size=128,在加载预训练模型的情况下,建议将学习率调整为`[1e-4, 2e-5]`左右(piecewise学习率策略,需设置2个值,下同)。 +* 如果您的场景中是单卡训练,因为显存限制,只能设置单卡batch_size=64,则总的batch_size=64,在加载预训练模型的情况下,建议将学习率调整为`[5e-5, 1e-5]`左右。 + + +如果有通用真实场景数据加进来,建议每个epoch中,垂类场景数据与真实场景的数据量保持在1:1左右。 + +比如:您自己的垂类场景识别数据量为1W,数据标签文件为`vertical.txt`,收集到的通用场景识别数据量为10W,数据标签文件为`general.txt`, + + +那么,可以设置`label_file_list`和`ratio_list`参数如下所示。每个epoch中,`vertical.txt`中会进行全采样(采样比例为1.0),包含1W条数据;`general.txt`中会按照0.1的采样比例进行采样,包含`10W*0.1=1W`条数据,最终二者的比例为`1:1`。 + +```yaml +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ + label_file_list: + - vertical.txt + - general.txt + ratio_list: [1.0, 0.1] +``` diff --git a/doc/doc_ch/inference.md b/doc/doc_ch/inference.md index c02da14af495cd807668dca6d7f3823d1de6820d..ade1a2dbdf728ac785efef3e5a82b4c932674b87 100755 --- a/doc/doc_ch/inference.md +++ b/doc/doc_ch/inference.md @@ -36,6 +36,8 @@ inference 模型(`paddle.jit.save`保存的模型) - [六、参数解释](#参数解释) +- [七、FAQ](#FAQ) + ## 一、训练模型转inference模型 @@ -520,3 +522,9 @@ PSE算法相关参数如下 | label_list | list | ['0', '180'] | class id对应的角度值 | | cls_batch_num | int | 6 | 方向分类器预测的batch size | | cls_thresh | float | 0.9 | 预测阈值,模型预测结果为180度,且得分大于该阈值时,认为最终预测结果为180度,需要翻转 | + + + +# 七、FAQ + +* 如果是使用paddle2.0之前版本的代码导出的`inference模型`,则其文件名为`model`与`params`,分别对应paddle2.0或者之后版本导出的`inference.pdmodel`与`inference.pdiparams`;不过目前PaddleOCR的release分支已经不支持paddle2.0之前版本导出的inference 模型,如果希望使用,需要使用develop分支(静态图分支)的代码与文档。 diff --git a/doc/doc_ch/recognition.md b/doc/doc_ch/recognition.md index 26887f41cc73f74f592eea9d04fc9167c30fc68c..cf55af29e7b6a0c92022b35746081776451627a0 100644 --- a/doc/doc_ch/recognition.md +++ b/doc/doc_ch/recognition.md @@ -75,9 +75,9 @@ train_data/rec/train/word_002.jpg 用科技让复杂的世界更简单 上述示例标注文件中,"11.jpg"和"12.jpg"的标签相同,都是`简单可依赖`,在训练的时候,对于该行标注,会随机选择其中的一张图片进行训练。 -- 测试集 +- 验证集 -同训练集类似,测试集也需要提供一个包含所有图片的文件夹(test)和一个rec_gt_test.txt,测试集的结构如下所示: +同训练集类似,验证集也需要提供一个包含所有图片的文件夹(test)和一个rec_gt_test.txt,验证集的结构如下所示: ``` |-train_data @@ -247,7 +247,10 @@ PaddleOCR支持训练和评估交替进行, 可以在 `configs/rec/rec_icdar15_t | rec_r31_sar.yml | SAR | ResNet31 | None | LSTM encoder | LSTM decoder | | rec_resnet_stn_bilstm_att.yml | SEED | Aster_Resnet | STN | BiLSTM | att | -*其中SEED模型需要额外加载FastText训练好的[语言模型](https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz) +*其中SEED模型需要额外加载FastText训练好的[语言模型](https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz) ,并且安装 fasttext 依赖: +``` +python3.7 -m pip install fasttext==0.9.1 +``` 训练中文数据,推荐使用[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml),如您希望尝试其他算法在中文数据集上的效果,请参考下列说明修改配置文件: diff --git a/doc/doc_ch/whl.md b/doc/doc_ch/whl.md index 3b88709a2328409a266d0d482baa072dd7aa3824..2d524b83d73d4951939c7e0f108c494ab79a86c6 100644 --- a/doc/doc_ch/whl.md +++ b/doc/doc_ch/whl.md @@ -418,7 +418,7 @@ im_show.save('result.jpg') | det | 前向时使用启动检测 | TRUE | | rec | 前向时是否启动识别 | TRUE | | cls | 前向时是否启动分类 (命令行模式下使用use_angle_cls控制前向是否启动分类) | FALSE | -| show_log | 是否打印det和rec等信息 | FALSE | +| show_log | 是否打印logger信息 | FALSE | | type | 执行ocr或者表格结构化, 值可选['ocr','structure'] | ocr | | ocr_version | OCR模型版本,可选PP-OCRv2, PP-OCR。PP-OCRv2 目前仅支持中文的检测和识别模型,PP-OCR支持中文的检测,识别,多语种识别,方向分类器等模型 | PP-OCRv2 | | structure_version | 表格结构化模型版本,可选 STRUCTURE。STRUCTURE支持表格结构化模型 | STRUCTURE | diff --git a/doc/doc_en/whl_en.md b/doc/doc_en/whl_en.md index 62aa452dcd36906c6480031375e6ca94f8a36de3..2671fbb9f0b5653cff29908a0c40d14a25b2cc58 100644 --- a/doc/doc_en/whl_en.md +++ b/doc/doc_en/whl_en.md @@ -365,7 +365,7 @@ im_show.save('result.jpg') | det | Enable detction when `ppocr.ocr` func exec | TRUE | | rec | Enable recognition when `ppocr.ocr` func exec | TRUE | | cls | Enable classification when `ppocr.ocr` func exec((Use use_angle_cls in command line mode to control whether to start classification in the forward direction) | FALSE | -| show_log | Whether to print log in det and rec | FALSE | +| show_log | Whether to print log| FALSE | | type | Perform ocr or table structuring, the value is selected in ['ocr','structure'] | ocr | | ocr_version | OCR Model version number, the current model support list is as follows: PP-OCRv2 support Chinese detection and recognition model, PP-OCR support Chinese detection, recognition and direction classifier, multilingual recognition model | PP-OCRv2 | | structure_version | table structure Model version number, the current model support list is as follows: STRUCTURE support english table structure model | STRUCTURE | diff --git a/paddleocr.py b/paddleocr.py index f0938c6740606bdb2a96a6f9836602c0fb670650..d07082f0ddc1133b3e9b3a7a7703d87f7cfeeedb 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -14,6 +14,7 @@ import os import sys +import importlib __dir__ = os.path.dirname(__file__) @@ -26,6 +27,10 @@ import logging import numpy as np from pathlib import Path +tools = importlib.import_module('.', 'tools') +ppocr = importlib.import_module('.', 'ppocr') +ppstructure = importlib.import_module('.', 'ppstructure') + from tools.infer import predict_system from ppocr.utils.logging import get_logger @@ -34,7 +39,7 @@ from ppocr.utils.utility import check_and_read_gif, get_image_file_list from ppocr.utils.network import maybe_download, download_with_progressbar, is_link, confirm_model_dir_url from tools.infer.utility import draw_ocr, str2bool, check_gpu from ppstructure.utility import init_args, draw_structure_result -from ppstructure.predict_system import OCRSystem, save_structure_res +from ppstructure.predict_system import StructureSystem, save_structure_res __all__ = [ 'PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result', @@ -42,7 +47,7 @@ __all__ = [ ] SUPPORT_DET_MODEL = ['DB'] -VERSION = '2.4' +VERSION = '2.4.0.4' SUPPORT_REC_MODEL = ['CRNN'] BASE_DIR = os.path.expanduser("~/.paddleocr/") @@ -308,20 +313,18 @@ class PaddleOCR(predict_system.TextSystem): det_lang) params.det_model_dir, det_url = confirm_model_dir_url( params.det_model_dir, - os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang), + os.path.join(BASE_DIR, 'whl', 'det', det_lang), det_model_config['url']) rec_model_config = get_model_config('OCR', params.ocr_version, 'rec', lang) params.rec_model_dir, rec_url = confirm_model_dir_url( params.rec_model_dir, - os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang), - rec_model_config['url']) + os.path.join(BASE_DIR, 'whl', 'rec', lang), rec_model_config['url']) cls_model_config = get_model_config('OCR', params.ocr_version, 'cls', 'ch') params.cls_model_dir, cls_url = confirm_model_dir_url( params.cls_model_dir, - os.path.join(BASE_DIR, VERSION, 'ocr', 'cls'), - cls_model_config['url']) + os.path.join(BASE_DIR, 'whl', 'cls'), cls_model_config['url']) # download model maybe_download(params.det_model_dir, det_url) maybe_download(params.rec_model_dir, rec_url) @@ -338,7 +341,7 @@ class PaddleOCR(predict_system.TextSystem): params.rec_char_dict_path = str( Path(__file__).parent / rec_model_config['dict_path']) - print(params) + logger.debug(params) # init det_model and rec_model super().__init__(params) @@ -395,7 +398,7 @@ class PaddleOCR(predict_system.TextSystem): return rec_res -class PPStructure(OCRSystem): +class PPStructure(StructureSystem): def __init__(self, **kwargs): params = parse_args(mMain=False) params.__dict__.update(**kwargs) @@ -412,20 +415,18 @@ class PPStructure(OCRSystem): det_lang) params.det_model_dir, det_url = confirm_model_dir_url( params.det_model_dir, - os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang), + os.path.join(BASE_DIR, 'whl', 'det', det_lang), det_model_config['url']) rec_model_config = get_model_config('OCR', params.ocr_version, 'rec', lang) params.rec_model_dir, rec_url = confirm_model_dir_url( params.rec_model_dir, - os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang), - rec_model_config['url']) + os.path.join(BASE_DIR, 'whl', 'rec', lang), rec_model_config['url']) table_model_config = get_model_config( 'STRUCTURE', params.structure_version, 'table', 'en') params.table_model_dir, table_url = confirm_model_dir_url( params.table_model_dir, - os.path.join(BASE_DIR, VERSION, 'ocr', 'table'), - table_model_config['url']) + os.path.join(BASE_DIR, 'whl', 'table'), table_model_config['url']) # download model maybe_download(params.det_model_dir, det_url) maybe_download(params.rec_model_dir, rec_url) @@ -438,7 +439,7 @@ class PPStructure(OCRSystem): params.table_char_dict_path = str( Path(__file__).parent / table_model_config['dict_path']) - print(params) + logger.debug(params) super().__init__(params) def __call__(self, img): diff --git a/ppocr/losses/det_pse_loss.py b/ppocr/losses/det_pse_loss.py index 9b8ac4b5a5dfac176c398dd0a9e490e5ca67ad5f..6b31343ed4d1687ee8ca44592fba0331b0b287dc 100644 --- a/ppocr/losses/det_pse_loss.py +++ b/ppocr/losses/det_pse_loss.py @@ -121,9 +121,9 @@ class PSELoss(nn.Layer): if neg_num == 0: selected_mask = training_mask - selected_mask = selected_mask.view( - 1, selected_mask.shape[0], - selected_mask.shape[1]).astype('float32') + selected_mask = selected_mask.reshape( + [1, selected_mask.shape[0], selected_mask.shape[1]]).astype( + 'float32') return selected_mask neg_score = paddle.masked_select(score, gt_text <= 0.5) diff --git a/ppocr/losses/kie_sdmgr_loss.py b/ppocr/losses/kie_sdmgr_loss.py index 8f2173e49904926ebab2c450890c4fafe3f36b50..745671f58da91c108624097faea72d55c1877f6b 100644 --- a/ppocr/losses/kie_sdmgr_loss.py +++ b/ppocr/losses/kie_sdmgr_loss.py @@ -1,4 +1,4 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference from : https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/kie/losses/sdmgr_loss.py + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppocr/metrics/kie_metric.py b/ppocr/metrics/kie_metric.py index 761965cfcc25d2a6de30342769d01b36d6212d98..f3bce0411d6521b1756892cbd7b4c6fcb7bcfb6c 100644 --- a/ppocr/metrics/kie_metric.py +++ b/ppocr/metrics/kie_metric.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# The code is refer from: https://github.com/open-mmlab/mmocr/blob/main/mmocr/core/evaluation/kie_metric.py from __future__ import absolute_import from __future__ import division diff --git a/ppocr/modeling/heads/kie_sdmgr_head.py b/ppocr/modeling/heads/kie_sdmgr_head.py index 46ac0ed8dcaccb7628ef87fbe851a2b6acd60d55..ac5f73fa7e5b182faa1456e069da79118d6f7068 100644 --- a/ppocr/modeling/heads/kie_sdmgr_head.py +++ b/ppocr/modeling/heads/kie_sdmgr_head.py @@ -1,4 +1,4 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# reference from : https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/kie/heads/sdmgr_head.py from __future__ import absolute_import from __future__ import division diff --git a/ppocr/modeling/heads/rec_sar_head.py b/ppocr/modeling/heads/rec_sar_head.py index a46cce7de2c8e59cf797db96fc6fcb7e25fa549a..3b7674268772d8a332b963fd6b82dfb71ee40212 100644 --- a/ppocr/modeling/heads/rec_sar_head.py +++ b/ppocr/modeling/heads/rec_sar_head.py @@ -216,7 +216,7 @@ class ParallelSARDecoder(BaseDecoder): self.pred_dropout = nn.Dropout(pred_dropout) pred_num_classes = self.num_classes - 1 if pred_concat: - fc_in_channel = decoder_rnn_out_size + d_model + d_enc + fc_in_channel = decoder_rnn_out_size + d_model + encoder_rnn_out_size else: fc_in_channel = d_model self.prediction = nn.Linear(fc_in_channel, pred_num_classes) diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py index 93d385544e40af59a871d09ee6181888ce84691d..de771acca86a8956b06b366b840aac7e21f835a4 100644 --- a/ppocr/postprocess/rec_postprocess.py +++ b/ppocr/postprocess/rec_postprocess.py @@ -54,22 +54,24 @@ class BaseRecLabelDecode(object): ignored_tokens = self.get_ignored_tokens() batch_size = len(text_index) for batch_idx in range(batch_size): - char_list = [] - conf_list = [] - for idx in range(len(text_index[batch_idx])): - if text_index[batch_idx][idx] in ignored_tokens: - continue - if is_remove_duplicate: - # only for predict - if idx > 0 and text_index[batch_idx][idx - 1] == text_index[ - batch_idx][idx]: - continue - char_list.append(self.character[int(text_index[batch_idx][ - idx])]) - if text_prob is not None: - conf_list.append(text_prob[batch_idx][idx]) - else: - conf_list.append(1) + selection = np.ones(len(text_index[batch_idx]), dtype=bool) + if is_remove_duplicate: + selection[1:] = text_index[batch_idx][1:] != text_index[ + batch_idx][:-1] + for ignored_token in ignored_tokens: + selection &= text_index[batch_idx] != ignored_token + + char_list = [ + self.character[text_id] + for text_id in text_index[batch_idx][selection] + ] + if text_prob is not None: + conf_list = text_prob[batch_idx][selection] + else: + conf_list = [1] * len(selection) + if len(conf_list) == 0: + conf_list = [0] + text = ''.join(char_list) result_list.append((text, np.mean(conf_list))) return result_list diff --git a/ppocr/utils/logging.py b/ppocr/utils/logging.py index ce827e8b10c4b63b736886a2f72106c7570576b1..1eac8f351a4d30915d6f4ca863267cb73b9b1f19 100644 --- a/ppocr/utils/logging.py +++ b/ppocr/utils/logging.py @@ -26,7 +26,7 @@ logger_initialized = {} @functools.lru_cache() -def get_logger(name='root', log_file=None, log_level=logging.DEBUG): +def get_logger(name='ppocr', log_file=None, log_level=logging.DEBUG): """Initialize and get a logger by name. If the logger has not been initialized, this method will initialize the logger by adding one or two handlers, otherwise the initialized logger will @@ -67,4 +67,5 @@ def get_logger(name='root', log_file=None, log_level=logging.DEBUG): else: logger.setLevel(logging.ERROR) logger_initialized[name] = True + logger.propagate = False return logger diff --git a/ppstructure/predict_system.py b/ppstructure/predict_system.py index 3ae52fdd703670c4250f1b4a440004fa8b9082ad..96227aabbbf38904417f3e3a6fd6c49031c4bc58 100644 --- a/ppstructure/predict_system.py +++ b/ppstructure/predict_system.py @@ -22,6 +22,7 @@ sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) os.environ["FLAGS_allocator_strategy"] = 'auto_growth' import cv2 +import json import numpy as np import time import logging @@ -35,7 +36,7 @@ from ppstructure.utility import parse_args, draw_structure_result logger = get_logger() -class OCRSystem(object): +class StructureSystem(object): def __init__(self, args): self.mode = args.mode if self.mode == 'structure': @@ -66,8 +67,7 @@ class OCRSystem(object): self.use_angle_cls = args.use_angle_cls self.drop_score = args.drop_score elif self.mode == 'vqa': - from ppstructure.vqa.infer_ser_e2e import SerPredictor, draw_ser_results - self.vqa_engine = SerPredictor(args) + raise NotImplementedError def __call__(self, img): if self.mode == 'structure': @@ -82,24 +82,24 @@ class OCRSystem(object): res = self.table_system(roi_img) else: filter_boxes, filter_rec_res = self.text_system(roi_img) - filter_boxes = [x + [x1, y1] for x in filter_boxes] - filter_boxes = [ - x.reshape(-1).tolist() for x in filter_boxes - ] # remove style char style_token = [ '', '', '', '', '', '', '', '', '', '', '', '', '', '' ] - filter_rec_res_tmp = [] - for rec_res in filter_rec_res: + res = [] + for box, rec_res in zip(filter_boxes, filter_rec_res): rec_str, rec_conf = rec_res for token in style_token: if token in rec_str: rec_str = rec_str.replace(token, '') - filter_rec_res_tmp.append((rec_str, rec_conf)) - res = (filter_boxes, filter_rec_res_tmp) + box += [x1, y1] + res.append({ + 'text': rec_str, + 'confidence': float(rec_conf), + 'text_region': box.tolist() + }) res_list.append({ 'type': region.type, 'bbox': [x1, y1, x2, y2], @@ -107,7 +107,7 @@ class OCRSystem(object): 'res': res }) elif self.mode == 'vqa': - res_list, _ = self.vqa_engine(img) + raise NotImplementedError return res_list @@ -123,15 +123,14 @@ def save_structure_res(res, save_folder, img_name): excel_path = os.path.join(excel_save_folder, '{}.xlsx'.format(region['bbox'])) to_excel(region['res'], excel_path) - if region['type'] == 'Figure': + elif region['type'] == 'Figure': roi_img = region['img'] img_path = os.path.join(excel_save_folder, '{}.jpg'.format(region['bbox'])) cv2.imwrite(img_path, roi_img) else: - for box, rec_res in zip(region['res'][0], region['res'][1]): - f.write('{}\t{}\n'.format( - np.array(box).reshape(-1).tolist(), rec_res)) + for text_result in region['res']: + f.write('{}\n'.format(json.dumps(text_result))) def main(args): @@ -139,7 +138,7 @@ def main(args): image_file_list = image_file_list image_file_list = image_file_list[args.process_id::args.total_process_num] - structure_sys = OCRSystem(args) + structure_sys = StructureSystem(args) img_num = len(image_file_list) save_folder = os.path.join(args.output, structure_sys.mode) os.makedirs(save_folder, exist_ok=True) @@ -162,8 +161,9 @@ def main(args): draw_img = draw_structure_result(img, res, args.vis_font_path) img_save_path = os.path.join(save_folder, img_name, 'show.jpg') elif structure_sys.mode == 'vqa': - draw_img = draw_ser_results(img, res, args.vis_font_path) - img_save_path = os.path.join(save_folder, img_name + '.jpg') + raise NotImplementedError + # draw_img = draw_ser_results(img, res, args.vis_font_path) + # img_save_path = os.path.join(save_folder, img_name + '.jpg') cv2.imwrite(img_save_path, draw_img) logger.info('result save to {}'.format(img_save_path)) elapse = time.time() - starttime diff --git a/ppstructure/utility.py b/ppstructure/utility.py index 43cb0b0873812baf3ce2dc689fb62f1d0ca2c551..10d9f71a7cdfed00b555c46689b2dd3c5aad807c 100644 --- a/ppstructure/utility.py +++ b/ppstructure/utility.py @@ -40,12 +40,6 @@ def init_args(): type=ast.literal_eval, default=None, help='label map according to ppstructure/layout/README_ch.md') - # params for ser - parser.add_argument("--model_name_or_path", type=str) - parser.add_argument("--max_seq_length", type=int, default=512) - parser.add_argument( - "--label_map_path", type=str, default='./vqa/labels/labels_ser.txt') - parser.add_argument( "--mode", type=str, @@ -67,10 +61,10 @@ def draw_structure_result(image, result, font_path): if region['type'] == 'Table': pass else: - for box, rec_res in zip(region['res'][0], region['res'][1]): - boxes.append(np.array(box).reshape(-1, 2)) - txts.append(rec_res[0]) - scores.append(rec_res[1]) + for text_result in region['res']: + boxes.append(np.array(text_result['text_region'])) + txts.append(text_result['text']) + scores.append(text_result['confidence']) im_show = draw_ocr_box_txt( image, boxes, txts, scores, font_path=font_path, drop_score=0) return im_show diff --git a/ppstructure/vqa/README.md b/ppstructure/vqa/README.md index b9a82cc5fd971800aaebd9bc4553ba6f0700845e..f142778506ee53ee8955f078b0116f033522a4e6 100644 --- a/ppstructure/vqa/README.md +++ b/ppstructure/vqa/README.md @@ -242,3 +242,7 @@ python3 tools/infer_vqa_token_ser_re.py -c configs/vqa/re/layoutxlm.yml -o Archi - LayoutXLM: Multimodal Pre-training for Multilingual Visually-rich Document Understanding, https://arxiv.org/pdf/2104.08836.pdf - microsoft/unilm/layoutxlm, https://github.com/microsoft/unilm/tree/master/layoutxlm - XFUND dataset, https://github.com/doc-analysis/XFUND + +## License + +The content of this project itself is licensed under the [Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/) diff --git a/requirements.txt b/requirements.txt index 1d9522aa0167c60ffce263a35b86640efb1438b2..b60d48371337e38bde6e51171aa6ecfb9573fb4d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,3 @@ cython lxml premailer openpyxl -fasttext==0.9.1 diff --git a/test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml b/test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml index d37fdcfbb5b27404403674d99c1b8abe8cd65e85..27ec3ebeaf6a2033d98bc8f5cb0ac275972bdbbf 100644 --- a/test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml +++ b/test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml @@ -56,7 +56,7 @@ PostProcess: thresh: 0 box_thresh: 0.85 min_area: 16 - box_type: box # 'box' or 'poly' + box_type: quad # 'quad' or 'poly' scale: 1 Metric: @@ -132,4 +132,4 @@ Eval: shuffle: False drop_last: False batch_size_per_card: 1 # must be 1 - num_workers: 8 \ No newline at end of file + num_workers: 8 diff --git a/test_tipc/configs/det_r18_vd_v2_0/train_infer_python.txt b/test_tipc/configs/det_r18_vd_db_v2_0/train_infer_python.txt similarity index 100% rename from test_tipc/configs/det_r18_vd_v2_0/train_infer_python.txt rename to test_tipc/configs/det_r18_vd_db_v2_0/train_infer_python.txt diff --git a/test_tipc/configs/det_r50_vd_pse_v2_0/det_r50_vd_pse.yml b/test_tipc/configs/det_r50_vd_pse_v2_0/det_r50_vd_pse.yml index 5ebc4252718d5572837eac58061bf6f9eb35bf73..f7e60fd1968820ef093455473346a6b8f0f8d34e 100644 --- a/test_tipc/configs/det_r50_vd_pse_v2_0/det_r50_vd_pse.yml +++ b/test_tipc/configs/det_r50_vd_pse_v2_0/det_r50_vd_pse.yml @@ -55,7 +55,7 @@ PostProcess: thresh: 0 box_thresh: 0.85 min_area: 16 - box_type: box # 'box' or 'poly' + box_type: quad # 'quad' or 'poly' scale: 1 Metric: @@ -131,4 +131,4 @@ Eval: shuffle: False drop_last: False batch_size_per_card: 1 # must be 1 - num_workers: 8 \ No newline at end of file + num_workers: 8 diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh index bd4af1923c0e00a613ea2734c6fa90232d35469f..31433884a69da24fc55dee4f0853e5cee1ea8edb 100644 --- a/test_tipc/prepare.sh +++ b/test_tipc/prepare.sh @@ -60,6 +60,13 @@ if [ ${MODE} = "lite_train_lite_infer" ];then ln -s ./icdar2015_lite ./icdar2015 cd ../ cd ./inference && tar xf rec_inference.tar && cd ../ + if [ ${model_name} == "ch_PPOCRv2_det" ] || [ ${model_name} == "ch_PPOCRv2_det_PACT" ]; then + wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar --no-check-certificate + cd ./pretrain_models/ && tar xf ch_ppocr_server_v2.0_det_train.tar && cd ../ + fi + if [ ${model_name} == "det_r18_db_v2_0" ]; then + wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/ResNet18_vd_pretrained.pdparams --no-check-certificate + fi if [ ${model_name} == "en_server_pgnetA" ]; then wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/total_text_lite.tar --no-check-certificate wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar --no-check-certificate diff --git a/test_tipc/supplementary/config.py b/test_tipc/supplementary/config.py index d0dce227ef1f1a57780b36cb7f9f60acfe6afc36..72a99c70af4dfb7b2d43e6a3bf34e4b3db10d8db 100644 --- a/test_tipc/supplementary/config.py +++ b/test_tipc/supplementary/config.py @@ -122,7 +122,7 @@ def preprocess(is_train=False): log_file = '{}/train.log'.format(save_model_dir) else: log_file = None - logger = get_logger(name='root', log_file=log_file) + logger = get_logger(log_file=log_file) # check if set use_gpu=True in paddlepaddle cpu version use_gpu = config['use_gpu'] diff --git a/tools/end2end/convert_ppocr_label.py b/tools/end2end/convert_ppocr_label.py new file mode 100644 index 0000000000000000000000000000000000000000..8084cac785125f23885399931f98531326b6fb20 --- /dev/null +++ b/tools/end2end/convert_ppocr_label.py @@ -0,0 +1,94 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import json +import os + + +def poly_to_string(poly): + if len(poly.shape) > 1: + poly = np.array(poly).flatten() + + string = "\t".join(str(i) for i in poly) + return string + + +def convert_label(label_dir, mode="gt", save_dir="./save_results/"): + if not os.path.exists(label_dir): + raise ValueError(f"The file {label_dir} does not exist!") + + assert label_dir != save_dir, "hahahhaha" + + label_file = open(label_dir, 'r') + data = label_file.readlines() + + gt_dict = {} + + for line in data: + try: + tmp = line.split('\t') + assert len(tmp) == 2, "" + except: + tmp = line.strip().split(' ') + + gt_lists = [] + + if tmp[0].split('/')[0] is not None: + img_path = tmp[0] + anno = json.loads(tmp[1]) + gt_collect = [] + for dic in anno: + #txt = dic['transcription'].replace(' ', '') # ignore blank + txt = dic['transcription'] + if 'score' in dic and float(dic['score']) < 0.5: + continue + if u'\u3000' in txt: txt = txt.replace(u'\u3000', u' ') + #while ' ' in txt: + # txt = txt.replace(' ', '') + poly = np.array(dic['points']).flatten() + if txt == "###": + txt_tag = 1 ## ignore 1 + else: + txt_tag = 0 + if mode == "gt": + gt_label = poly_to_string(poly) + "\t" + str( + txt_tag) + "\t" + txt + "\n" + else: + gt_label = poly_to_string(poly) + "\t" + txt + "\n" + + gt_lists.append(gt_label) + + gt_dict[img_path] = gt_lists + else: + continue + + if not os.path.exists(save_dir): + os.makedirs(save_dir) + + for img_name in gt_dict.keys(): + save_name = img_name.split("/")[-1] + save_file = os.path.join(save_dir, save_name + ".txt") + with open(save_file, "w") as f: + f.writelines(gt_dict[img_name]) + + print("The convert label saved in {}".format(save_dir)) + + +if __name__ == "__main__": + + ppocr_label_gt = "/paddle/Datasets/chinese/test_set/Label_refine_310_V2.txt" + convert_label(ppocr_label_gt, "gt", "./save_gt_310_V2/") + + ppocr_label_gt = "./infer_results/ch_PPOCRV2_infer.txt" + convert_label(ppocr_label_gt_en, "pred", "./save_PPOCRV2_infer/") diff --git a/tools/end2end/draw_html.py b/tools/end2end/draw_html.py new file mode 100644 index 0000000000000000000000000000000000000000..fcac8ad3bfb6f0d1fcc48ea026b0febe60a001c0 --- /dev/null +++ b/tools/end2end/draw_html.py @@ -0,0 +1,73 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse + + +def str2bool(v): + return v.lower() in ("true", "t", "1") + + +def init_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--image_dir", type=str, default="") + parser.add_argument("--save_html_path", type=str, default="./default.html") + parser.add_argument("--width", type=int, default=640) + return parser + + +def parse_args(): + parser = init_args() + return parser.parse_args() + + +def draw_debug_img(args): + + html_path = args.save_html_path + + err_cnt = 0 + with open(html_path, 'w') as html: + html.write('\n\n') + html.write('\n') + html.write( + "" + ) + image_list = [] + path = args.image_dir + for i, filename in enumerate(sorted(os.listdir(path))): + if filename.endswith("txt"): continue + # The image path + base = "{}/{}".format(path, filename) + html.write("\n") + html.write(f'') + + html.write("\n") + html.write('\n') + html.write('
{filename}\n GT') + html.write(f'GT\n
\n') + html.write('\n\n') + print(f"The html file saved in {html_path}") + return + + +if __name__ == "__main__": + + args = parse_args() + + draw_debug_img(args) diff --git a/tools/end2end/eval_end2end.py b/tools/end2end/eval_end2end.py new file mode 100644 index 0000000000000000000000000000000000000000..6e7573ca472503e5d2216723c056ddf42c77e0aa --- /dev/null +++ b/tools/end2end/eval_end2end.py @@ -0,0 +1,193 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +import sys +import shapely +from shapely.geometry import Polygon +import numpy as np +from collections import defaultdict +import operator +import editdistance + + +def strQ2B(ustring): + rstring = "" + for uchar in ustring: + inside_code = ord(uchar) + if inside_code == 12288: + inside_code = 32 + elif (inside_code >= 65281 and inside_code <= 65374): + inside_code -= 65248 + rstring += chr(inside_code) + return rstring + + +def polygon_from_str(polygon_points): + """ + Create a shapely polygon object from gt or dt line. + """ + polygon_points = np.array(polygon_points).reshape(4, 2) + polygon = Polygon(polygon_points).convex_hull + return polygon + + +def polygon_iou(poly1, poly2): + """ + Intersection over union between two shapely polygons. + """ + if not poly1.intersects( + poly2): # this test is fast and can accelerate calculation + iou = 0 + else: + try: + inter_area = poly1.intersection(poly2).area + union_area = poly1.area + poly2.area - inter_area + iou = float(inter_area) / union_area + except shapely.geos.TopologicalError: + # except Exception as e: + # print(e) + print('shapely.geos.TopologicalError occured, iou set to 0') + iou = 0 + return iou + + +def ed(str1, str2): + return editdistance.eval(str1, str2) + + +def e2e_eval(gt_dir, res_dir, ignore_blank=False): + print('start testing...') + iou_thresh = 0.5 + val_names = os.listdir(gt_dir) + num_gt_chars = 0 + gt_count = 0 + dt_count = 0 + hit = 0 + ed_sum = 0 + + for i, val_name in enumerate(val_names): + with open(os.path.join(gt_dir, val_name), encoding='utf-8') as f: + gt_lines = [o.strip() for o in f.readlines()] + gts = [] + ignore_masks = [] + for line in gt_lines: + parts = line.strip().split('\t') + # ignore illegal data + if len(parts) < 9: + continue + assert (len(parts) < 11) + if len(parts) == 9: + gts.append(parts[:8] + ['']) + else: + gts.append(parts[:8] + [parts[-1]]) + + ignore_masks.append(parts[8]) + + val_path = os.path.join(res_dir, val_name) + if not os.path.exists(val_path): + dt_lines = [] + else: + with open(val_path, encoding='utf-8') as f: + dt_lines = [o.strip() for o in f.readlines()] + dts = [] + for line in dt_lines: + # print(line) + parts = line.strip().split("\t") + assert (len(parts) < 10), "line error: {}".format(line) + if len(parts) == 8: + dts.append(parts + ['']) + else: + dts.append(parts) + + dt_match = [False] * len(dts) + gt_match = [False] * len(gts) + all_ious = defaultdict(tuple) + for index_gt, gt in enumerate(gts): + gt_coors = [float(gt_coor) for gt_coor in gt[0:8]] + gt_poly = polygon_from_str(gt_coors) + for index_dt, dt in enumerate(dts): + dt_coors = [float(dt_coor) for dt_coor in dt[0:8]] + dt_poly = polygon_from_str(dt_coors) + iou = polygon_iou(dt_poly, gt_poly) + if iou >= iou_thresh: + all_ious[(index_gt, index_dt)] = iou + sorted_ious = sorted( + all_ious.items(), key=operator.itemgetter(1), reverse=True) + sorted_gt_dt_pairs = [item[0] for item in sorted_ious] + + # matched gt and dt + for gt_dt_pair in sorted_gt_dt_pairs: + index_gt, index_dt = gt_dt_pair + if gt_match[index_gt] == False and dt_match[index_dt] == False: + gt_match[index_gt] = True + dt_match[index_dt] = True + if ignore_blank: + gt_str = strQ2B(gts[index_gt][8]).replace(" ", "") + dt_str = strQ2B(dts[index_dt][8]).replace(" ", "") + else: + gt_str = strQ2B(gts[index_gt][8]) + dt_str = strQ2B(dts[index_dt][8]) + if ignore_masks[index_gt] == '0': + ed_sum += ed(gt_str, dt_str) + num_gt_chars += len(gt_str) + if gt_str == dt_str: + hit += 1 + gt_count += 1 + dt_count += 1 + + # unmatched dt + for tindex, dt_match_flag in enumerate(dt_match): + if dt_match_flag == False: + dt_str = dts[tindex][8] + gt_str = '' + ed_sum += ed(dt_str, gt_str) + dt_count += 1 + + # unmatched gt + for tindex, gt_match_flag in enumerate(gt_match): + if gt_match_flag == False and ignore_masks[tindex] == '0': + dt_str = '' + gt_str = gts[tindex][8] + ed_sum += ed(gt_str, dt_str) + num_gt_chars += len(gt_str) + gt_count += 1 + + eps = 1e-9 + print('hit, dt_count, gt_count', hit, dt_count, gt_count) + precision = hit / (dt_count + eps) + recall = hit / (gt_count + eps) + fmeasure = 2.0 * precision * recall / (precision + recall + eps) + avg_edit_dist_img = ed_sum / len(val_names) + avg_edit_dist_field = ed_sum / (gt_count + eps) + character_acc = 1 - ed_sum / (num_gt_chars + eps) + + print('character_acc: %.2f' % (character_acc * 100) + "%") + print('avg_edit_dist_field: %.2f' % (avg_edit_dist_field)) + print('avg_edit_dist_img: %.2f' % (avg_edit_dist_img)) + print('precision: %.2f' % (precision * 100) + "%") + print('recall: %.2f' % (recall * 100) + "%") + print('fmeasure: %.2f' % (fmeasure * 100) + "%") + + +if __name__ == '__main__': + # if len(sys.argv) != 3: + # print("python3 ocr_e2e_eval.py gt_dir res_dir") + # exit(-1) + # gt_folder = sys.argv[1] + # pred_folder = sys.argv[2] + gt_folder = sys.argv[1] + pred_folder = sys.argv[2] + e2e_eval(gt_folder, pred_folder) diff --git a/tools/end2end/readme.md b/tools/end2end/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..69da06dcdabc92c0b6f1831341e592e674ea7473 --- /dev/null +++ b/tools/end2end/readme.md @@ -0,0 +1,69 @@ + +# 简介 + +`tools/end2end`目录下存放了文本检测+文本识别pipeline串联预测的指标评测代码以及可视化工具。本节介绍文本检测+文本识别的端对端指标评估方式。 + + +## 端对端评测步骤 + +**步骤一:** + +运行`tools/infer/predict_system.py`,得到保存的结果: + +``` +python3 tools/infer/predict_system.py --det_model_dir=./ch_PP-OCRv2_det_infer/ --rec_model_dir=./ch_PP-OCRv2_rec_infer/ --image_dir=./datasets/img_dir/ --draw_img_save_dir=./ch_PP-OCRv2_results/ --is_visualize=True +``` + +文本检测识别可视化图默认保存在`./ch_PP-OCRv2_results/`目录下,预测结果默认保存在`./ch_PP-OCRv2_results/system_results.txt`中,格式如下: +``` +all-sum-510/00224225.jpg [{"transcription": "超赞", "points": [[8.0, 48.0], [157.0, 44.0], [159.0, 115.0], [10.0, 119.0]], "score": "0.99396634"}, {"transcription": "中", "points": [[202.0, 152.0], [230.0, 152.0], [230.0, 163.0], [202.0, 163.0]], "score": "0.09310734"}, {"transcription": "58.0m", "points": [[196.0, 192.0], [444.0, 192.0], [444.0, 240.0], [196.0, 240.0]], "score": "0.44041982"}, {"transcription": "汽配", "points": [[55.0, 263.0], [95.0, 263.0], [95.0, 281.0], [55.0, 281.0]], "score": "0.9986651"}, {"transcription": "成总店", "points": [[120.0, 262.0], [176.0, 262.0], [176.0, 283.0], [120.0, 283.0]], "score": "0.9929402"}, {"transcription": "K", "points": [[237.0, 286.0], [311.0, 286.0], [311.0, 345.0], [237.0, 345.0]], "score": "0.6074794"}, {"transcription": "88:-8", "points": [[203.0, 405.0], [477.0, 414.0], [475.0, 459.0], [201.0, 450.0]], "score": "0.7106863"}] +``` + + +**步骤二:** + +将步骤一保存的数据转换为端对端评测需要的数据格式: +修改 `tools/convert_ppocr_label.py`中的代码,convert_label函数中设置输入标签路径,Mode,保存标签路径等,对预测数据的GTlabel和预测结果的label格式进行转换。 + +``` +ppocr_label_gt = "gt_label.txt" +convert_label(ppocr_label_gt, "gt", "./save_gt_label/") + +ppocr_label_gt = "./ch_PP-OCRv2_results/system_results.txt" +convert_label(ppocr_label_gt_en, "pred", "./save_PPOCRV2_infer/") +``` + +运行`convert_ppocr_label.py`: +``` +python3 tools/convert_ppocr_label.py +``` + +得到如下结果: +``` +├── ./save_gt_label/ +├── ./save_PPOCRV2_infer/ +``` + +**步骤三:** + +执行端对端评测,运行`tools/eval_end2end.py`计算端对端指标,运行方式如下: + +``` +python3 tools/eval_end2end.py "gt_label_dir" "predict_label_dir" +``` + +比如: + +``` +python3 tools/eval_end2end.py ./save_gt_label/ ./save_PPOCRV2_infer/ +``` +将得到如下结果,fmeasure为主要关注的指标: +``` +hit, dt_count, gt_count 1557 2693 3283 +character_acc: 61.77% +avg_edit_dist_field: 3.08 +avg_edit_dist_img: 51.82 +precision: 57.82% +recall: 47.43% +fmeasure: 52.11% +``` diff --git a/tools/infer/predict_det.py b/tools/infer/predict_det.py index 37ac818dbfd22dc4d5d933613be161891530229d..695587a9aa39f27fb5e37ba8d5447fb9f085e1e1 100755 --- a/tools/infer/predict_det.py +++ b/tools/infer/predict_det.py @@ -150,29 +150,15 @@ class TextDetector(object): logger=logger) def order_points_clockwise(self, pts): - """ - reference from: https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py - # sort the points based on their x-coordinates - """ - xSorted = pts[np.argsort(pts[:, 0]), :] - - # grab the left-most and right-most points from the sorted - # x-roodinate points - leftMost = xSorted[:2, :] - rightMost = xSorted[2:, :] - - # now, sort the left-most coordinates according to their - # y-coordinates so we can grab the top-left and bottom-left - # points, respectively - leftMost = leftMost[np.argsort(leftMost[:, 1]), :] - (tl, bl) = leftMost - - rightMost = rightMost[np.argsort(rightMost[:, 1]), :] - (tr, br) = rightMost - - rect = np.array([tl, tr, br, bl], dtype="float32") + rect = np.zeros((4, 2), dtype="float32") + s = pts.sum(axis=1) + rect[0] = pts[np.argmin(s)] + rect[2] = pts[np.argmax(s)] + diff = np.diff(pts, axis=1) + rect[1] = pts[np.argmin(diff)] + rect[3] = pts[np.argmax(diff)] return rect - + def clip_det_res(self, points, img_height, img_width): for pno in range(points.shape[0]): points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1)) diff --git a/tools/infer/utility.py b/tools/infer/utility.py index 25939b0ebc39314583a45b4375d947f19a826d17..80abba67b293e3412afa6c1ea8da0291331ef8de 100644 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -622,7 +622,6 @@ def get_rotate_crop_image(img, points): def check_gpu(use_gpu): if use_gpu and not paddle.is_compiled_with_cuda(): - use_gpu = False return use_gpu diff --git a/tools/infer_vqa_token_ser_re.py b/tools/infer_vqa_token_ser_re.py index 1e5f6f76d6b0599089069ab30f76b3479c7c90b4..2c7cb5e4251819c53e56ba74df530181836299a2 100755 --- a/tools/infer_vqa_token_ser_re.py +++ b/tools/infer_vqa_token_ser_re.py @@ -151,7 +151,7 @@ def preprocess(): ser_config = load_config(FLAGS.config_ser) ser_config = merge_config(ser_config, FLAGS.opt_ser) - logger = get_logger(name='root') + logger = get_logger() # check if set use_gpu=True in paddlepaddle cpu version use_gpu = config['Global']['use_gpu'] diff --git a/tools/program.py b/tools/program.py index 7ff04b41513a9ddec5c8888ac6c5ded7b8527b43..8ec152bb92f0855d44b2597ce2420b16a4fa007e 100755 --- a/tools/program.py +++ b/tools/program.py @@ -525,7 +525,7 @@ def preprocess(is_train=False): log_file = '{}/train.log'.format(save_model_dir) else: log_file = None - logger = get_logger(name='root', log_file=log_file) + logger = get_logger(log_file=log_file) # check if set use_gpu=True in paddlepaddle cpu version use_gpu = config['Global']['use_gpu'] diff --git a/tools/test_hubserving.py b/tools/test_hubserving.py index 0548726417699855a3905fa1a3fb679d69c85fc8..ec17a9413e15b3ae92843990cfcbb05fc5f991a8 100755 --- a/tools/test_hubserving.py +++ b/tools/test_hubserving.py @@ -25,7 +25,9 @@ import numpy as np import time from PIL import Image from ppocr.utils.utility import get_image_file_list -from tools.infer.utility import draw_ocr, draw_boxes +from tools.infer.utility import draw_ocr, draw_boxes, str2bool +from ppstructure.utility import draw_structure_result +from ppstructure.predict_system import to_excel import requests import json @@ -69,8 +71,33 @@ def draw_server_result(image_file, res): return draw_img -def main(url, image_path): - image_file_list = get_image_file_list(image_path) +def save_structure_res(res, save_folder, image_file): + img = cv2.imread(image_file) + excel_save_folder = os.path.join(save_folder, os.path.basename(image_file)) + os.makedirs(excel_save_folder, exist_ok=True) + # save res + with open( + os.path.join(excel_save_folder, 'res.txt'), 'w', + encoding='utf8') as f: + for region in res: + if region['type'] == 'Table': + excel_path = os.path.join(excel_save_folder, + '{}.xlsx'.format(region['bbox'])) + to_excel(region['res'], excel_path) + elif region['type'] == 'Figure': + x1, y1, x2, y2 = region['bbox'] + print(region['bbox']) + roi_img = img[y1:y2, x1:x2, :] + img_path = os.path.join(excel_save_folder, + '{}.jpg'.format(region['bbox'])) + cv2.imwrite(img_path, roi_img) + else: + for text_result in region['res']: + f.write('{}\n'.format(json.dumps(text_result))) + + +def main(args): + image_file_list = get_image_file_list(args.image_dir) is_visualize = False headers = {"Content-type": "application/json"} cnt = 0 @@ -80,38 +107,51 @@ def main(url, image_path): if img is None: logger.info("error in loading image:{}".format(image_file)) continue - - # 发送HTTP请求 + img_name = os.path.basename(image_file) + # seed http request starttime = time.time() data = {'images': [cv2_to_base64(img)]} - r = requests.post(url=url, headers=headers, data=json.dumps(data)) + r = requests.post( + url=args.server_url, headers=headers, data=json.dumps(data)) elapse = time.time() - starttime total_time += elapse logger.info("Predict time of %s: %.3fs" % (image_file, elapse)) res = r.json()["results"][0] logger.info(res) - if is_visualize: - draw_img = draw_server_result(image_file, res) + if args.visualize: + draw_img = None + if 'structure_table' in args.server_url: + to_excel(res['html'], './{}.xlsx'.format(img_name)) + elif 'structure_system' in args.server_url: + save_structure_res(res['regions'], args.output, image_file) + else: + draw_img = draw_server_result(image_file, res) if draw_img is not None: - draw_img_save = "./server_results/" - if not os.path.exists(draw_img_save): - os.makedirs(draw_img_save) + if not os.path.exists(args.output): + os.makedirs(args.output) cv2.imwrite( - os.path.join(draw_img_save, os.path.basename(image_file)), + os.path.join(args.output, os.path.basename(image_file)), draw_img[:, :, ::-1]) logger.info("The visualized image saved in {}".format( - os.path.join(draw_img_save, os.path.basename(image_file)))) + os.path.join(args.output, os.path.basename(image_file)))) cnt += 1 if cnt % 100 == 0: logger.info("{} processed".format(cnt)) logger.info("avg time cost: {}".format(float(total_time) / cnt)) +def parse_args(): + import argparse + parser = argparse.ArgumentParser(description="args for hub serving") + parser.add_argument("--server_url", type=str, required=True) + parser.add_argument("--image_dir", type=str, required=True) + parser.add_argument("--visualize", type=str2bool, default=False) + parser.add_argument("--output", type=str, default='./hubserving_result') + args = parser.parse_args() + return args + + if __name__ == '__main__': - if len(sys.argv) != 3: - logger.info("Usage: %s server_url image_path" % sys.argv[0]) - else: - server_url = sys.argv[1] - image_path = sys.argv[2] - main(server_url, image_path) + args = parse_args() + main(args)