diff --git a/deploy/cpp_infer/CMakeLists.txt b/deploy/cpp_infer/CMakeLists.txt index 0cf20635f8849cbb405118fd0e2fa8538eb3fa06..efb183c5b4ebb460832b7d353e8a019ee079d975 100644 --- a/deploy/cpp_infer/CMakeLists.txt +++ b/deploy/cpp_infer/CMakeLists.txt @@ -1,4 +1,4 @@ -project(ocr_system CXX C) +project(ppocr CXX C) option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON) option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF) @@ -11,7 +11,8 @@ SET(CUDA_LIB "" CACHE PATH "Location of libraries") SET(CUDNN_LIB "" CACHE PATH "Location of libraries") SET(TENSORRT_DIR "" CACHE PATH "Compile demo with TensorRT") -set(DEMO_NAME "ocr_system") +set(DEMO_NAME "ppocr") + macro(safe_set_static_flag) foreach(flag_var diff --git a/deploy/cpp_infer/src/clipper.cpp b/deploy/cpp_infer/include/clipper.cpp similarity index 100% rename from deploy/cpp_infer/src/clipper.cpp rename to deploy/cpp_infer/include/clipper.cpp diff --git a/deploy/cpp_infer/include/clipper.h b/deploy/cpp_infer/include/clipper.h index 384a6cf44c191a369906373d40fb81ffb02bb7fa..522f81c8c48fe77c50e87c8b753568432c056e38 100644 --- a/deploy/cpp_infer/include/clipper.h +++ b/deploy/cpp_infer/include/clipper.h @@ -31,6 +31,8 @@ * * *******************************************************************************/ +#pragma once + #ifndef clipper_hpp #define clipper_hpp diff --git a/deploy/cpp_infer/include/config.h b/deploy/cpp_infer/include/config.h deleted file mode 100644 index cd02a997e304850ebc04ce2288f4e497dbb4be4a..0000000000000000000000000000000000000000 --- a/deploy/cpp_infer/include/config.h +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "include/utility.h" - -namespace PaddleOCR { - -class OCRConfig { -public: - explicit OCRConfig(const std::string &config_file) { - config_map_ = LoadConfig(config_file); - - this->use_gpu = bool(stoi(config_map_["use_gpu"])); - - this->gpu_id = stoi(config_map_["gpu_id"]); - - this->gpu_mem = stoi(config_map_["gpu_mem"]); - - this->cpu_math_library_num_threads = - stoi(config_map_["cpu_math_library_num_threads"]); - - this->use_mkldnn = bool(stoi(config_map_["use_mkldnn"])); - - this->max_side_len = stoi(config_map_["max_side_len"]); - - this->det_db_thresh = stod(config_map_["det_db_thresh"]); - - this->det_db_box_thresh = stod(config_map_["det_db_box_thresh"]); - - this->det_db_unclip_ratio = stod(config_map_["det_db_unclip_ratio"]); - - this->use_polygon_score = bool(stoi(config_map_["use_polygon_score"])); - - this->det_model_dir.assign(config_map_["det_model_dir"]); - - this->rec_model_dir.assign(config_map_["rec_model_dir"]); - - this->char_list_file.assign(config_map_["char_list_file"]); - - this->use_angle_cls = bool(stoi(config_map_["use_angle_cls"])); - - this->cls_model_dir.assign(config_map_["cls_model_dir"]); - - this->cls_thresh = stod(config_map_["cls_thresh"]); - - this->visualize = bool(stoi(config_map_["visualize"])); - - this->use_tensorrt = bool(stoi(config_map_["use_tensorrt"])); - - this->use_fp16 = bool(stod(config_map_["use_fp16"])); - } - - bool use_gpu = false; - - int gpu_id = 0; - - int gpu_mem = 4000; - - int cpu_math_library_num_threads = 1; - - bool use_mkldnn = false; - - int max_side_len = 960; - - double det_db_thresh = 0.3; - - double det_db_box_thresh = 0.5; - - double det_db_unclip_ratio = 2.0; - - bool use_polygon_score = false; - - std::string det_model_dir; - - std::string rec_model_dir; - - bool use_angle_cls; - - std::string char_list_file; - - std::string cls_model_dir; - - double cls_thresh; - - bool visualize = true; - - bool use_tensorrt = false; - - bool use_fp16 = false; - - void PrintConfigInfo(); - -private: - // Load configuration - std::map LoadConfig(const std::string &config_file); - - std::vector split(const std::string &str, - const std::string &delim); - - std::map config_map_; -}; - -} // namespace PaddleOCR diff --git a/deploy/cpp_infer/include/ocr_cls.h b/deploy/cpp_infer/include/ocr_cls.h index 41494085a797c7a4490942741e6e888033c0be00..742e1f8bb0392859ea4bc3a6a4b4410f6b375826 100644 --- a/deploy/cpp_infer/include/ocr_cls.h +++ b/deploy/cpp_infer/include/ocr_cls.h @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#pragma once + #include "opencv2/core.hpp" #include "opencv2/imgcodecs.hpp" #include "opencv2/imgproc.hpp" @@ -40,7 +42,7 @@ public: const int &gpu_id, const int &gpu_mem, const int &cpu_math_library_num_threads, const bool &use_mkldnn, const double &cls_thresh, - const bool &use_tensorrt, const bool &use_fp16) { + const bool &use_tensorrt, const std::string &precision) { this->use_gpu_ = use_gpu; this->gpu_id_ = gpu_id; this->gpu_mem_ = gpu_mem; @@ -49,7 +51,7 @@ public: this->cls_thresh = cls_thresh; this->use_tensorrt_ = use_tensorrt; - this->use_fp16_ = use_fp16; + this->precision_ = precision; LoadModel(model_dir); } @@ -73,7 +75,7 @@ private: std::vector scale_ = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f}; bool is_scale_ = true; bool use_tensorrt_ = false; - bool use_fp16_ = false; + std::string precision_ = "fp32"; // pre-process ClsResizeImg resize_op_; Normalize normalize_op_; diff --git a/deploy/cpp_infer/include/ocr_det.h b/deploy/cpp_infer/include/ocr_det.h index 18318c9c4e37136db62c1338db1b58f82859f037..e5a31ed8e5ab6397c4fa67388252e2baef8b9dd7 100644 --- a/deploy/cpp_infer/include/ocr_det.h +++ b/deploy/cpp_infer/include/ocr_det.h @@ -46,7 +46,7 @@ public: const double &det_db_box_thresh, const double &det_db_unclip_ratio, const bool &use_polygon_score, const bool &visualize, - const bool &use_tensorrt, const bool &use_fp16) { + const bool &use_tensorrt, const std::string &precision) { this->use_gpu_ = use_gpu; this->gpu_id_ = gpu_id; this->gpu_mem_ = gpu_mem; @@ -62,7 +62,7 @@ public: this->visualize_ = visualize; this->use_tensorrt_ = use_tensorrt; - this->use_fp16_ = use_fp16; + this->precision_ = precision; LoadModel(model_dir); } @@ -71,7 +71,7 @@ public: void LoadModel(const std::string &model_dir); // Run predictor - void Run(cv::Mat &img, std::vector>> &boxes); + void Run(cv::Mat &img, std::vector>> &boxes, std::vector *times); private: std::shared_ptr predictor_; @@ -91,7 +91,7 @@ private: bool visualize_ = true; bool use_tensorrt_ = false; - bool use_fp16_ = false; + std::string precision_ = "fp32"; std::vector mean_ = {0.485f, 0.456f, 0.406f}; std::vector scale_ = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f}; diff --git a/deploy/cpp_infer/include/ocr_rec.h b/deploy/cpp_infer/include/ocr_rec.h index 94d605a96e1f43423b15b0d81c7cd88f618ea4d3..d585112b051daff7c03060836a4c065ba6e3949c 100644 --- a/deploy/cpp_infer/include/ocr_rec.h +++ b/deploy/cpp_infer/include/ocr_rec.h @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#pragma once + #include "opencv2/core.hpp" #include "opencv2/imgcodecs.hpp" #include "opencv2/imgproc.hpp" @@ -42,14 +44,14 @@ public: const int &gpu_id, const int &gpu_mem, const int &cpu_math_library_num_threads, const bool &use_mkldnn, const string &label_path, - const bool &use_tensorrt, const bool &use_fp16) { + const bool &use_tensorrt, const std::string &precision) { this->use_gpu_ = use_gpu; this->gpu_id_ = gpu_id; this->gpu_mem_ = gpu_mem; this->cpu_math_library_num_threads_ = cpu_math_library_num_threads; this->use_mkldnn_ = use_mkldnn; this->use_tensorrt_ = use_tensorrt; - this->use_fp16_ = use_fp16; + this->precision_ = precision; this->label_list_ = Utility::ReadDict(label_path); this->label_list_.insert(this->label_list_.begin(), @@ -62,8 +64,7 @@ public: // Load Paddle inference model void LoadModel(const std::string &model_dir); - void Run(std::vector>> boxes, cv::Mat &img, - Classifier *cls); + void Run(cv::Mat &img, std::vector *times); private: std::shared_ptr predictor_; @@ -80,7 +81,7 @@ private: std::vector scale_ = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f}; bool is_scale_ = true; bool use_tensorrt_ = false; - bool use_fp16_ = false; + std::string precision_ = "fp32"; // pre-process CrnnResizeImg resize_op_; Normalize normalize_op_; @@ -89,9 +90,6 @@ private: // post-process PostProcessor post_processor_; - cv::Mat GetRotateCropImage(const cv::Mat &srcimage, - std::vector> box); - }; // class CrnnRecognizer } // namespace PaddleOCR diff --git a/deploy/cpp_infer/include/utility.h b/deploy/cpp_infer/include/utility.h index 6e8173e007279319657250b376de022240bc6f62..678187d3fabfb1c91584226950155b3c47b5f93f 100644 --- a/deploy/cpp_infer/include/utility.h +++ b/deploy/cpp_infer/include/utility.h @@ -47,6 +47,9 @@ public: static void GetAllFiles(const char *dir_name, std::vector &all_inputs); + + static cv::Mat GetRotateCropImage(const cv::Mat &srcimage, + std::vector> box); }; } // namespace PaddleOCR \ No newline at end of file diff --git a/deploy/cpp_infer/readme.md b/deploy/cpp_infer/readme.md index 30b8628517d605c74008378078aef3f03528e7cf..9bdd54669faec874e3cdad59f604882ab0bce010 100644 --- a/deploy/cpp_infer/readme.md +++ b/deploy/cpp_infer/readme.md @@ -154,82 +154,102 @@ inference/ * 编译命令如下,其中Paddle C++预测库、opencv等其他依赖库的地址需要换成自己机器上的实际地址。 - ```shell sh tools/build.sh ``` -具体地,`tools/build.sh`中内容如下。 +* 具体的,需要修改`tools/build.sh`中环境路径,相关内容如下: ```shell OPENCV_DIR=your_opencv_dir LIB_DIR=your_paddle_inference_dir CUDA_LIB_DIR=your_cuda_lib_dir CUDNN_LIB_DIR=/your_cudnn_lib_dir - -BUILD_DIR=build -rm -rf ${BUILD_DIR} -mkdir ${BUILD_DIR} -cd ${BUILD_DIR} -cmake .. \ - -DPADDLE_LIB=${LIB_DIR} \ - -DWITH_MKL=ON \ - -DDEMO_NAME=ocr_system \ - -DWITH_GPU=OFF \ - -DWITH_STATIC_LIB=OFF \ - -DUSE_TENSORRT=OFF \ - -DOPENCV_DIR=${OPENCV_DIR} \ - -DCUDNN_LIB=${CUDNN_LIB_DIR} \ - -DCUDA_LIB=${CUDA_LIB_DIR} \ - -make -j ``` -`OPENCV_DIR`为opencv编译安装的地址;`LIB_DIR`为下载(`paddle_inference`文件夹)或者编译生成的Paddle预测库地址(`build/paddle_inference_install_dir`文件夹);`CUDA_LIB_DIR`为cuda库文件地址,在docker中为`/usr/local/cuda/lib64`;`CUDNN_LIB_DIR`为cudnn库文件地址,在docker中为`/usr/lib/x86_64-linux-gnu/`。**注意**:以上路径都写绝对路径,不要写相对路径。 +其中,`OPENCV_DIR`为opencv编译安装的地址;`LIB_DIR`为下载(`paddle_inference`文件夹)或者编译生成的Paddle预测库地址(`build/paddle_inference_install_dir`文件夹);`CUDA_LIB_DIR`为cuda库文件地址,在docker中为`/usr/local/cuda/lib64`;`CUDNN_LIB_DIR`为cudnn库文件地址,在docker中为`/usr/lib/x86_64-linux-gnu/`。**注意:以上路径都写绝对路径,不要写相对路径。** -* 编译完成之后,会在`build`文件夹下生成一个名为`ocr_system`的可执行文件。 +* 编译完成之后,会在`build`文件夹下生成一个名为`ppocr`的可执行文件。 ### 运行demo -* 执行以下命令,完成对一幅图像的OCR识别与检测。 +运行方式: +```shell +./build/ppocr [--param1] [--param2] [...] +``` +其中,`mode`为必选参数,表示选择的功能,取值范围['det', 'rec', 'system'],分别表示调用检测、识别、检测识别串联(包括方向分类器)。具体命令如下: + +##### 1. 只调用检测: +```shell +./build/ppocr det \ + --det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \ + --image_dir=../../doc/imgs/12.jpg +``` +##### 2. 只调用识别: +```shell +./build/ppocr rec \ + --rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \ + --image_dir=../../doc/imgs_words/ch/ +``` +##### 3. 调用串联: ```shell -sh tools/run.sh +# 不使用方向分类器 +./build/ppocr system \ + --det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \ + --rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \ + --image_dir=../../doc/imgs/12.jpg +# 使用方向分类器 +./build/ppocr system \ + --det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \ + --use_angle_cls=true \ + --cls_model_dir=inference/ch_ppocr_mobile_v2.0_cls_infer \ + --rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \ + --image_dir=../../doc/imgs/12.jpg ``` -* 若需要使用方向分类器,则需要将`tools/config.txt`中的`use_angle_cls`参数修改为1,表示开启方向分类器的预测。 -* 更多地,tools/config.txt中的参数及解释如下。 +更多参数如下: -``` -use_gpu 0 # 是否使用GPU,1表示使用,0表示不使用 -gpu_id 0 # GPU id,使用GPU时有效 -gpu_mem 4000 # 申请的GPU内存 -cpu_math_library_num_threads 10 # CPU预测时的线程数,在机器核数充足的情况下,该值越大,预测速度越快 -use_mkldnn 1 # 是否使用mkldnn库 +- 通用参数 -# det config -max_side_len 960 # 输入图像长宽大于960时,等比例缩放图像,使得图像最长边为960 -det_db_thresh 0.3 # 用于过滤DB预测的二值化图像,设置为0.-0.3对结果影响不明显 -det_db_box_thresh 0.5 # DB后处理过滤box的阈值,如果检测存在漏框情况,可酌情减小 -det_db_unclip_ratio 1.6 # 表示文本框的紧致程度,越小则文本框更靠近文本 -use_polygon_score 1 # 是否使用多边形框计算bbox score,0表示使用矩形框计算。矩形框计算速度更快,多边形框对弯曲文本区域计算更准确。 -det_model_dir ./inference/det_db # 检测模型inference model地址 +|参数名称|类型|默认参数|意义| +| --- | --- | --- | --- | +|use_gpu|bool|false|是否使用GPU| +|gpu_id|int|0|GPU id,使用GPU时有效| +|gpu_mem|int|4000|申请的GPU内存| +|cpu_math_library_num_threads|int|10|CPU预测时的线程数,在机器核数充足的情况下,该值越大,预测速度越快| +|use_mkldnn|bool|true|是否使用mkldnn库| -# cls config -use_angle_cls 0 # 是否使用方向分类器,0表示不使用,1表示使用 -cls_model_dir ./inference/cls # 方向分类器inference model地址 -cls_thresh 0.9 # 方向分类器的得分阈值 +- 检测模型相关 -# rec config -rec_model_dir ./inference/rec_crnn # 识别模型inference model地址 -char_list_file ../../ppocr/utils/ppocr_keys_v1.txt # 字典文件 +|参数名称|类型|默认参数|意义| +| --- | --- | --- | --- | +|det_model_dir|string|-|检测模型inference model地址| +|max_side_len|int|960|输入图像长宽大于960时,等比例缩放图像,使得图像最长边为960| +|det_db_thresh|float|0.3|用于过滤DB预测的二值化图像,设置为0.-0.3对结果影响不明显| +|det_db_box_thresh|float|0.5|DB后处理过滤box的阈值,如果检测存在漏框情况,可酌情减小| +|det_db_unclip_ratio|float|1.6|表示文本框的紧致程度,越小则文本框更靠近文本| +|use_polygon_score|bool|false|是否使用多边形框计算bbox score,false表示使用矩形框计算。矩形框计算速度更快,多边形框对弯曲文本区域计算更准确。| +|visualize|bool|true|是否对结果进行可视化,为1时,会在当前文件夹下保存文件名为`ocr_vis.png`的预测结果。| + +- 方向分类器相关 + +|参数名称|类型|默认参数|意义| +| --- | --- | --- | --- | +|use_angle_cls|bool|false|是否使用方向分类器| +|cls_model_dir|string|-|方向分类器inference model地址| +|cls_thresh|float|0.9|方向分类器的得分阈值| + +- 识别模型相关 + +|参数名称|类型|默认参数|意义| +| --- | --- | --- | --- | +|rec_model_dir|string|-|识别模型inference model地址| +|char_list_file|string|../../ppocr/utils/ppocr_keys_v1.txt|字典文件| -# show the detection results -visualize 1 # 是否对结果进行可视化,为1时,会在当前文件夹下保存文件名为`ocr_vis.png`的预测结果。 -``` -* PaddleOCR也支持多语言的预测,更多支持的语言和模型可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分,如果希望进行多语言预测,只需将修改`tools/config.txt`中的`char_list_file`(字典文件路径)以及`rec_model_dir`(inference模型路径)字段即可。 +* PaddleOCR也支持多语言的预测,更多支持的语言和模型可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分,如果希望进行多语言预测,只需将修改`char_list_file`(字典文件路径)以及`rec_model_dir`(inference模型路径)字段即可。 最终屏幕上会输出检测结果如下。 diff --git a/deploy/cpp_infer/readme_en.md b/deploy/cpp_infer/readme_en.md index b03187a7659a5f3bb7ca67970febe853dd201fa1..039aecf1ba3d6c1c717bafbecdb117416a1acc32 100644 --- a/deploy/cpp_infer/readme_en.md +++ b/deploy/cpp_infer/readme_en.md @@ -162,30 +162,13 @@ inference/ sh tools/build.sh ``` -Specifically, the content in `tools/build.sh` is as follows. +Specifically, you should modify the paths in `tools/build.sh`. The related content is as follows. ```shell OPENCV_DIR=your_opencv_dir LIB_DIR=your_paddle_inference_dir CUDA_LIB_DIR=your_cuda_lib_dir CUDNN_LIB_DIR=your_cudnn_lib_dir - -BUILD_DIR=build -rm -rf ${BUILD_DIR} -mkdir ${BUILD_DIR} -cd ${BUILD_DIR} -cmake .. \ - -DPADDLE_LIB=${LIB_DIR} \ - -DWITH_MKL=ON \ - -DDEMO_NAME=ocr_system \ - -DWITH_GPU=OFF \ - -DWITH_STATIC_LIB=OFF \ - -DUSE_TENSORRT=OFF \ - -DOPENCV_DIR=${OPENCV_DIR} \ - -DCUDNN_LIB=${CUDNN_LIB_DIR} \ - -DCUDA_LIB=${CUDA_LIB_DIR} \ - -make -j ``` `OPENCV_DIR` is the opencv installation path; `LIB_DIR` is the download (`paddle_inference` folder) @@ -193,48 +176,84 @@ or the generated Paddle inference library path (`build/paddle_inference_install_ `CUDA_LIB_DIR` is the cuda library file path, in docker; it is `/usr/local/cuda/lib64`; `CUDNN_LIB_DIR` is the cudnn library file path, in docker it is `/usr/lib/x86_64-linux-gnu/`. -* After the compilation is completed, an executable file named `ocr_system` will be generated in the `build` folder. +* After the compilation is completed, an executable file named `ppocr` will be generated in the `build` folder. ### Run the demo -* Execute the following command to complete the OCR recognition and detection of an image. +Execute the built executable file: +```shell +./build/ppocr [--param1] [--param2] [...] +``` +Here, `mode` is a required parameter,and the value range is ['det', 'rec', 'system'], representing using detection only, using recognition only and using the end-to-end system respectively. Specifically, +##### 1. run det demo: +```shell +./build/ppocr det \ + --det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \ + --image_dir=../../doc/imgs/12.jpg +``` +##### 2. run rec demo: +```shell +./build/ppocr rec \ + --rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \ + --image_dir=../../doc/imgs_words/ch/ +``` +##### 3. run system demo: ```shell -sh tools/run.sh +# without text direction classifier +./build/ppocr system \ + --det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \ + --rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \ + --image_dir=../../doc/imgs/12.jpg +# with text direction classifier +./build/ppocr system \ + --det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \ + --use_angle_cls=true \ + --cls_model_dir=inference/ch_ppocr_mobile_v2.0_cls_infer \ + --rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \ + --image_dir=../../doc/imgs/12.jpg ``` -* If you want to orientation classifier to correct the detected boxes, you can set `use_angle_cls` in the file `tools/config.txt` as 1 to enable the function. -* What's more, Parameters and their meanings in `tools/config.txt` are as follows. +More parameters are as follows, +- common parameters -``` -use_gpu 0 # Whether to use GPU, 0 means not to use, 1 means to use -gpu_id 0 # GPU id when use_gpu is 1 -gpu_mem 4000 # GPU memory requested -cpu_math_library_num_threads 10 # Number of threads when using CPU inference. When machine cores is enough, the large the value, the faster the inference speed -use_mkldnn 1 # Whether to use mkdlnn library +|parameter|data type|default|meaning| +| --- | --- | --- | --- | +|use_gpu|bool|false|Whether to use GPU| +|gpu_id|int|0|GPU id when use_gpu is true| +|gpu_mem|int|4000|GPU memory requested| +|cpu_math_library_num_threads|int|10|Number of threads when using CPU inference. When machine cores is enough, the large the value, the faster the inference speed| +|use_mkldnn|bool|true|Whether to use mkdlnn library| -max_side_len 960 # Limit the maximum image height and width to 960 -det_db_thresh 0.3 # Used to filter the binarized image of DB prediction, setting 0.-0.3 has no obvious effect on the result -det_db_box_thresh 0.5 # DDB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate -det_db_unclip_ratio 1.6 # Indicates the compactness of the text box, the smaller the value, the closer the text box to the text -use_polygon_score 1 # Whether to use polygon box to calculate bbox score, 0 means to use rectangle box to calculate. Use rectangular box to calculate faster, and polygonal box more accurate for curved text area. -det_model_dir ./inference/det_db # Address of detection inference model +- detection related parameters -# cls config -use_angle_cls 0 # Whether to use the direction classifier, 0 means not to use, 1 means to use -cls_model_dir ./inference/cls # Address of direction classifier inference model -cls_thresh 0.9 # Score threshold of the direction classifier +|parameter|data type|default|meaning| +| --- | --- | --- | --- | +|det_model_dir|string|-|Address of detection inference model| +|max_side_len|int|960|Limit the maximum image height and width to 960| +|det_db_thresh|float|0.3|Used to filter the binarized image of DB prediction, setting 0.-0.3 has no obvious effect on the result| +|det_db_box_thresh|float|0.5|DB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate| +|det_db_unclip_ratio|float|1.6|Indicates the compactness of the text box, the smaller the value, the closer the text box to the text| +|use_polygon_score|bool|false|Whether to use polygon box to calculate bbox score, false means to use rectangle box to calculate. Use rectangular box to calculate faster, and polygonal box more accurate for curved text area.| +|visualize|bool|true|Whether to visualize the results,when it is set as true, The prediction result will be save in the image file `./ocr_vis.png`.| -# rec config -rec_model_dir ./inference/rec_crnn # Address of recognition inference model -char_list_file ../../ppocr/utils/ppocr_keys_v1.txt # dictionary file +- classifier related parameters -# show the detection results -visualize 1 # Whether to visualize the results,when it is set as 1, The prediction result will be save in the image file `./ocr_vis.png`. -``` +|parameter|data type|default|meaning| +| --- | --- | --- | --- | +|use_angle_cls|bool|false|Whether to use the direction classifier| +|cls_model_dir|string|-|Address of direction classifier inference model| +|cls_thresh|float|0.9|Score threshold of the direction classifier| + +- recogniton related parameters + +|parameter|data type|default|meaning| +| --- | --- | --- | --- | +|rec_model_dir|string|-|Address of recognition inference model| +|char_list_file|string|../../ppocr/utils/ppocr_keys_v1.txt|dictionary file| -* Multi-language inference is also supported in PaddleOCR, you can refer to [recognition tutorial](../../doc/doc_en/recognition_en.md) for more supported languages and models in PaddleOCR. Specifically, if you want to infer using multi-language models, you just need to modify values of `char_list_file` and `rec_model_dir` in file `tools/config.txt`. +* Multi-language inference is also supported in PaddleOCR, you can refer to [recognition tutorial](../../doc/doc_en/recognition_en.md) for more supported languages and models in PaddleOCR. Specifically, if you want to infer using multi-language models, you just need to modify values of `char_list_file` and `rec_model_dir`. The detection results will be shown on the screen, which is as follows. diff --git a/deploy/cpp_infer/src/config.cpp b/deploy/cpp_infer/src/config.cpp deleted file mode 100644 index 050b75ede9475432f250cafac2cd5fbed17fea0a..0000000000000000000000000000000000000000 --- a/deploy/cpp_infer/src/config.cpp +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -namespace PaddleOCR { - -std::vector OCRConfig::split(const std::string &str, - const std::string &delim) { - std::vector res; - if ("" == str) - return res; - - int strlen = str.length() + 1; - chars *strs = new char[strlen]; - std::strcpy(strs, str.c_str()); - - int delimlen = delim.length() + 1; - char *d = new char[delimlen]; - std::strcpy(d, delim.c_str()); - - delete[] strs; - delete[] d; - - char *p = std::strtok(strs, d); - while (p) { - std::string s = p; - res.push_back(s); - p = std::strtok(NULL, d); - } - - return res; -} - -std::map -OCRConfig::LoadConfig(const std::string &config_path) { - auto config = Utility::ReadDict(config_path); - - std::map dict; - for (int i = 0; i < config.size(); i++) { - // pass for empty line or comment - if (config[i].size() <= 1 || config[i][0] == '#') { - continue; - } - std::vector res = split(config[i], " "); - dict[res[0]] = res[1]; - } - return dict; -} - -void OCRConfig::PrintConfigInfo() { - std::cout << "=======Paddle OCR inference config======" << std::endl; - for (auto iter = config_map_.begin(); iter != config_map_.end(); iter++) { - std::cout << iter->first << " : " << iter->second << std::endl; - } - std::cout << "=======End of Paddle OCR inference config======" << std::endl; -} - -} // namespace PaddleOCR diff --git a/deploy/cpp_infer/src/main.cpp b/deploy/cpp_infer/src/main.cpp index f25e674b489ea92118fe45c63939fca203ce3823..5e5c851517d5efaa75f54b7a156563a4a42880d5 100644 --- a/deploy/cpp_infer/src/main.cpp +++ b/deploy/cpp_infer/src/main.cpp @@ -28,76 +28,276 @@ #include #include -#include #include +#include #include #include #include +#include + +DEFINE_bool(use_gpu, false, "Infering with GPU or CPU."); +DEFINE_int32(gpu_id, 0, "Device id of GPU to execute."); +DEFINE_int32(gpu_mem, 4000, "GPU id when infering with GPU."); +DEFINE_int32(cpu_math_library_num_threads, 10, "Num of threads with CPU."); +DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU."); +DEFINE_bool(use_tensorrt, false, "Whether use tensorrt."); +DEFINE_string(precision, "fp32", "Precision be one of fp32/fp16/int8"); +DEFINE_bool(benchmark, true, "Whether use benchmark."); +DEFINE_string(save_log_path, "./log_output/", "Save benchmark log path."); +// detection related +DEFINE_string(image_dir, "", "Dir of input image."); +DEFINE_string(det_model_dir, "", "Path of det inference model."); +DEFINE_int32(max_side_len, 960, "max_side_len of input image."); +DEFINE_double(det_db_thresh, 0.3, "Threshold of det_db_thresh."); +DEFINE_double(det_db_box_thresh, 0.5, "Threshold of det_db_box_thresh."); +DEFINE_double(det_db_unclip_ratio, 1.6, "Threshold of det_db_unclip_ratio."); +DEFINE_bool(use_polygon_score, false, "Whether use polygon score."); +DEFINE_bool(visualize, true, "Whether show the detection results."); +// classification related +DEFINE_bool(use_angle_cls, false, "Whether use use_angle_cls."); +DEFINE_string(cls_model_dir, "", "Path of cls inference model."); +DEFINE_double(cls_thresh, 0.9, "Threshold of cls_thresh."); +// recognition related +DEFINE_string(rec_model_dir, "", "Path of rec inference model."); +DEFINE_string(char_list_file, "../../ppocr/utils/ppocr_keys_v1.txt", "Path of dictionary."); + + using namespace std; using namespace cv; using namespace PaddleOCR; + +void PrintBenchmarkLog(std::string model_name, + int batch_size, + std::string input_shape, + std::vector time_info, + int img_num){ + LOG(INFO) << "----------------------- Config info -----------------------"; + LOG(INFO) << "runtime_device: " << (FLAGS_use_gpu ? "gpu" : "cpu"); + LOG(INFO) << "ir_optim: " << "True"; + LOG(INFO) << "enable_memory_optim: " << "True"; + LOG(INFO) << "enable_tensorrt: " << FLAGS_use_tensorrt; + LOG(INFO) << "enable_mkldnn: " << (FLAGS_use_mkldnn ? "True" : "False"); + LOG(INFO) << "cpu_math_library_num_threads: " << FLAGS_cpu_math_library_num_threads; + LOG(INFO) << "----------------------- Data info -----------------------"; + LOG(INFO) << "batch_size: " << batch_size; + LOG(INFO) << "input_shape: " << input_shape; + LOG(INFO) << "data_num: " << img_num; + LOG(INFO) << "----------------------- Model info -----------------------"; + LOG(INFO) << "model_name: " << model_name; + LOG(INFO) << "precision: " << FLAGS_precision; + LOG(INFO) << "----------------------- Perf info ------------------------"; + LOG(INFO) << "Total time spent(ms): " + << std::accumulate(time_info.begin(), time_info.end(), 0); + LOG(INFO) << "preprocess_time(ms): " << time_info[0] / img_num + << ", inference_time(ms): " << time_info[1] / img_num + << ", postprocess_time(ms): " << time_info[2] / img_num; +} + + +static bool PathExists(const std::string& path){ +#ifdef _WIN32 + struct _stat buffer; + return (_stat(path.c_str(), &buffer) == 0); +#else + struct stat buffer; + return (stat(path.c_str(), &buffer) == 0); +#endif // !_WIN32 +} + + +int main_det(std::vector cv_all_img_names) { + std::vector time_info = {0, 0, 0}; + DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, + FLAGS_gpu_mem, FLAGS_cpu_math_library_num_threads, + FLAGS_use_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh, + FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio, + FLAGS_use_polygon_score, FLAGS_visualize, + FLAGS_use_tensorrt, FLAGS_precision); + + for (int i = 0; i < cv_all_img_names.size(); ++i) { + LOG(INFO) << "The predict img: " << cv_all_img_names[i]; + + cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); + if (!srcimg.data) { + std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << endl; + exit(1); + } + std::vector>> boxes; + std::vector det_times; + + det.Run(srcimg, boxes, &det_times); + + time_info[0] += det_times[0]; + time_info[1] += det_times[1]; + time_info[2] += det_times[2]; + } + + if (FLAGS_benchmark) { + PrintBenchmarkLog("det", 1, "dynamic", time_info, cv_all_img_names.size()); + } + return 0; +} + + +int main_rec(std::vector cv_all_img_names) { + std::vector time_info = {0, 0, 0}; + CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, + FLAGS_gpu_mem, FLAGS_cpu_math_library_num_threads, + FLAGS_use_mkldnn, FLAGS_char_list_file, + FLAGS_use_tensorrt, FLAGS_precision); + + for (int i = 0; i < cv_all_img_names.size(); ++i) { + LOG(INFO) << "The predict img: " << cv_all_img_names[i]; + + cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); + if (!srcimg.data) { + std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << endl; + exit(1); + } + + std::vector rec_times; + rec.Run(srcimg, &rec_times); + + time_info[0] += rec_times[0]; + time_info[1] += rec_times[1]; + time_info[2] += rec_times[2]; + } + + if (FLAGS_benchmark) { + PrintBenchmarkLog("rec", 1, "dynamic", time_info, cv_all_img_names.size()); + } + + return 0; +} + + +int main_system(std::vector cv_all_img_names) { + DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, + FLAGS_gpu_mem, FLAGS_cpu_math_library_num_threads, + FLAGS_use_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh, + FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio, + FLAGS_use_polygon_score, FLAGS_visualize, + FLAGS_use_tensorrt, FLAGS_precision); + + Classifier *cls = nullptr; + if (FLAGS_use_angle_cls) { + cls = new Classifier(FLAGS_cls_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, + FLAGS_gpu_mem, FLAGS_cpu_math_library_num_threads, + FLAGS_use_mkldnn, FLAGS_cls_thresh, + FLAGS_use_tensorrt, FLAGS_precision); + } + + CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, + FLAGS_gpu_mem, FLAGS_cpu_math_library_num_threads, + FLAGS_use_mkldnn, FLAGS_char_list_file, + FLAGS_use_tensorrt, FLAGS_precision); + + auto start = std::chrono::system_clock::now(); + + for (int i = 0; i < cv_all_img_names.size(); ++i) { + LOG(INFO) << "The predict img: " << cv_all_img_names[i]; + + cv::Mat srcimg = cv::imread(FLAGS_image_dir, cv::IMREAD_COLOR); + if (!srcimg.data) { + std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << endl; + exit(1); + } + std::vector>> boxes; + std::vector det_times; + std::vector rec_times; + + det.Run(srcimg, boxes, &det_times); + + cv::Mat crop_img; + for (int j = 0; j < boxes.size(); j++) { + crop_img = Utility::GetRotateCropImage(srcimg, boxes[j]); + + if (cls != nullptr) { + crop_img = cls->Run(crop_img); + } + rec.Run(crop_img, &rec_times); + } + + auto end = std::chrono::system_clock::now(); + auto duration = + std::chrono::duration_cast(end - start); + std::cout << "Cost " + << double(duration.count()) * + std::chrono::microseconds::period::num / + std::chrono::microseconds::period::den + << "s" << std::endl; + } + + return 0; +} + + +void check_params(char* mode) { + if (strcmp(mode, "det")==0) { + if (FLAGS_det_model_dir.empty() || FLAGS_image_dir.empty()) { + std::cout << "Usage[det]: ./ppocr --det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ " + << "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl; + exit(1); + } + } + if (strcmp(mode, "rec")==0) { + if (FLAGS_rec_model_dir.empty() || FLAGS_image_dir.empty()) { + std::cout << "Usage[rec]: ./ppocr --rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ " + << "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl; + exit(1); + } + } + if (strcmp(mode, "system")==0) { + if ((FLAGS_det_model_dir.empty() || FLAGS_rec_model_dir.empty() || FLAGS_image_dir.empty()) || + (FLAGS_use_angle_cls && FLAGS_cls_model_dir.empty())) { + std::cout << "Usage[system without angle cls]: ./ppocr --det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ " + << "--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ " + << "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl; + std::cout << "Usage[system with angle cls]: ./ppocr --det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ " + << "--use_angle_cls=true " + << "--cls_model_dir=/PATH/TO/CLS_INFERENCE_MODEL/ " + << "--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ " + << "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl; + exit(1); + } + } + if (FLAGS_precision != "fp32" && FLAGS_precision != "fp16" && FLAGS_precision != "int8") { + cout << "precison should be 'fp32'(default), 'fp16' or 'int8'. " << endl; + exit(1); + } +} + + int main(int argc, char **argv) { - if (argc < 3) { - std::cerr << "[ERROR] usage: " << argv[0] - << " configure_filepath image_path\n"; - exit(1); - } - - OCRConfig config(argv[1]); - - config.PrintConfigInfo(); - - std::string img_path(argv[2]); - std::vector all_img_names; - Utility::GetAllFiles((char *)img_path.c_str(), all_img_names); - - DBDetector det(config.det_model_dir, config.use_gpu, config.gpu_id, - config.gpu_mem, config.cpu_math_library_num_threads, - config.use_mkldnn, config.max_side_len, config.det_db_thresh, - config.det_db_box_thresh, config.det_db_unclip_ratio, - config.use_polygon_score, config.visualize, - config.use_tensorrt, config.use_fp16); - - Classifier *cls = nullptr; - if (config.use_angle_cls == true) { - cls = new Classifier(config.cls_model_dir, config.use_gpu, config.gpu_id, - config.gpu_mem, config.cpu_math_library_num_threads, - config.use_mkldnn, config.cls_thresh, - config.use_tensorrt, config.use_fp16); - } - - CRNNRecognizer rec(config.rec_model_dir, config.use_gpu, config.gpu_id, - config.gpu_mem, config.cpu_math_library_num_threads, - config.use_mkldnn, config.char_list_file, - config.use_tensorrt, config.use_fp16); - - auto start = std::chrono::system_clock::now(); - - for (auto img_dir : all_img_names) { - LOG(INFO) << "The predict img: " << img_dir; - - cv::Mat srcimg = cv::imread(img_dir, cv::IMREAD_COLOR); - if (!srcimg.data) { - std::cerr << "[ERROR] image read failed! image path: " << img_path - << "\n"; - exit(1); + if (argc<=1 || (strcmp(argv[1], "det")!=0 && strcmp(argv[1], "rec")!=0 && strcmp(argv[1], "system")!=0)) { + std::cout << "Please choose one mode of [det, rec, system] !" << std::endl; + return -1; + } + std::cout << "mode: " << argv[1] << endl; + + // Parsing command-line + google::ParseCommandLineFlags(&argc, &argv, true); + check_params(argv[1]); + + if (!PathExists(FLAGS_image_dir)) { + std::cerr << "[ERROR] image path not exist! image_dir: " << FLAGS_image_dir << endl; + exit(1); } - std::vector>> boxes; - - det.Run(srcimg, boxes); - - rec.Run(boxes, srcimg, cls); - auto end = std::chrono::system_clock::now(); - auto duration = - std::chrono::duration_cast(end - start); - std::cout << "Cost " - << double(duration.count()) * - std::chrono::microseconds::period::num / - std::chrono::microseconds::period::den - << "s" << std::endl; - } - - return 0; + + std::vector cv_all_img_names; + cv::glob(FLAGS_image_dir, cv_all_img_names); + std::cout << "total images num: " << cv_all_img_names.size() << endl; + + if (strcmp(argv[1], "det")==0) { + return main_det(cv_all_img_names); + } + if (strcmp(argv[1], "rec")==0) { + return main_rec(cv_all_img_names); + } + if (strcmp(argv[1], "system")==0) { + return main_system(cv_all_img_names); + } + } diff --git a/deploy/cpp_infer/src/ocr_cls.cpp b/deploy/cpp_infer/src/ocr_cls.cpp index 9199e082e5df42b0c9c42e668d2df37acf4521c4..3b04b6f8248bb17b9e315ae8b777530840015394 100644 --- a/deploy/cpp_infer/src/ocr_cls.cpp +++ b/deploy/cpp_infer/src/ocr_cls.cpp @@ -77,10 +77,16 @@ void Classifier::LoadModel(const std::string &model_dir) { if (this->use_gpu_) { config.EnableUseGpu(this->gpu_mem_, this->gpu_id_); if (this->use_tensorrt_) { + auto precision = paddle_infer::Config::Precision::kFloat32; + if (this->precision_ == "fp16") { + precision = paddle_infer::Config::Precision::kHalf; + } + if (this->precision_ == "int8") { + precision = paddle_infer::Config::Precision::kInt8; + } config.EnableTensorRtEngine( 1 << 20, 10, 3, - this->use_fp16_ ? paddle_infer::Config::Precision::kHalf - : paddle_infer::Config::Precision::kFloat32, + precision, false, false); } } else { diff --git a/deploy/cpp_infer/src/ocr_det.cpp b/deploy/cpp_infer/src/ocr_det.cpp index 33ad468a33b42c3d9f25beb19452f2fa6a81db9e..a69f5ca1bd3ee7665f8b2f5610c67dd6feb7eb54 100644 --- a/deploy/cpp_infer/src/ocr_det.cpp +++ b/deploy/cpp_infer/src/ocr_det.cpp @@ -14,6 +14,7 @@ #include + namespace PaddleOCR { void DBDetector::LoadModel(const std::string &model_dir) { @@ -25,10 +26,16 @@ void DBDetector::LoadModel(const std::string &model_dir) { if (this->use_gpu_) { config.EnableUseGpu(this->gpu_mem_, this->gpu_id_); if (this->use_tensorrt_) { + auto precision = paddle_infer::Config::Precision::kFloat32; + if (this->precision_ == "fp16") { + precision = paddle_infer::Config::Precision::kHalf; + } + if (this->precision_ == "int8") { + precision = paddle_infer::Config::Precision::kInt8; + } config.EnableTensorRtEngine( 1 << 20, 10, 3, - this->use_fp16_ ? paddle_infer::Config::Precision::kHalf - : paddle_infer::Config::Precision::kFloat32, + precision, false, false); std::map> min_input_shape = { {"x", {1, 3, 50, 50}}, @@ -90,13 +97,16 @@ void DBDetector::LoadModel(const std::string &model_dir) { } void DBDetector::Run(cv::Mat &img, - std::vector>> &boxes) { + std::vector>> &boxes, + std::vector *times) { float ratio_h{}; float ratio_w{}; cv::Mat srcimg; cv::Mat resize_img; img.copyTo(srcimg); + + auto preprocess_start = std::chrono::steady_clock::now(); this->resize_op_.Run(img, resize_img, this->max_side_len_, ratio_h, ratio_w, this->use_tensorrt_); @@ -105,14 +115,17 @@ void DBDetector::Run(cv::Mat &img, std::vector input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f); this->permute_op_.Run(&resize_img, input.data()); - + auto preprocess_end = std::chrono::steady_clock::now(); + // Inference. auto input_names = this->predictor_->GetInputNames(); auto input_t = this->predictor_->GetInputHandle(input_names[0]); input_t->Reshape({1, 3, resize_img.rows, resize_img.cols}); + auto inference_start = std::chrono::steady_clock::now(); input_t->CopyFromCpu(input.data()); + this->predictor_->Run(); - + std::vector out_data; auto output_names = this->predictor_->GetOutputNames(); auto output_t = this->predictor_->GetOutputHandle(output_names[0]); @@ -122,7 +135,9 @@ void DBDetector::Run(cv::Mat &img, out_data.resize(out_num); output_t->CopyToCpu(out_data.data()); - + auto inference_end = std::chrono::steady_clock::now(); + + auto postprocess_start = std::chrono::steady_clock::now(); int n2 = output_shape[2]; int n3 = output_shape[3]; int n = n2 * n3; @@ -150,7 +165,16 @@ void DBDetector::Run(cv::Mat &img, this->det_db_unclip_ratio_, this->use_polygon_score_); boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg); - + auto postprocess_end = std::chrono::steady_clock::now(); + std::cout << "Detected boxes num: " << boxes.size() << endl; + + std::chrono::duration preprocess_diff = preprocess_end - preprocess_start; + times->push_back(double(preprocess_diff.count() * 1000)); + std::chrono::duration inference_diff = inference_end - inference_start; + times->push_back(double(inference_diff.count() * 1000)); + std::chrono::duration postprocess_diff = postprocess_end - postprocess_start; + times->push_back(double(postprocess_diff.count() * 1000)); + //// visualization if (this->visualize_) { Utility::VisualizeBboxes(srcimg, boxes); diff --git a/deploy/cpp_infer/src/ocr_rec.cpp b/deploy/cpp_infer/src/ocr_rec.cpp index b09282b0283743b530cd5477dbe9c5ff751de93c..b64dcea5ae2a68485296c02cdb7689c60ea504f8 100644 --- a/deploy/cpp_infer/src/ocr_rec.cpp +++ b/deploy/cpp_infer/src/ocr_rec.cpp @@ -16,80 +16,80 @@ namespace PaddleOCR { -void CRNNRecognizer::Run(std::vector>> boxes, - cv::Mat &img, Classifier *cls) { +void CRNNRecognizer::Run(cv::Mat &img, std::vector *times) { cv::Mat srcimg; img.copyTo(srcimg); - cv::Mat crop_img; cv::Mat resize_img; - std::cout << "The predicted text is :" << std::endl; - int index = 0; - for (int i = 0; i < boxes.size(); i++) { - crop_img = GetRotateCropImage(srcimg, boxes[i]); - - if (cls != nullptr) { - crop_img = cls->Run(crop_img); - } - - float wh_ratio = float(crop_img.cols) / float(crop_img.rows); - - this->resize_op_.Run(crop_img, resize_img, wh_ratio, this->use_tensorrt_); - - this->normalize_op_.Run(&resize_img, this->mean_, this->scale_, - this->is_scale_); - - std::vector input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f); - - this->permute_op_.Run(&resize_img, input.data()); - - // Inference. - auto input_names = this->predictor_->GetInputNames(); - auto input_t = this->predictor_->GetInputHandle(input_names[0]); - input_t->Reshape({1, 3, resize_img.rows, resize_img.cols}); - input_t->CopyFromCpu(input.data()); - this->predictor_->Run(); - - std::vector predict_batch; - auto output_names = this->predictor_->GetOutputNames(); - auto output_t = this->predictor_->GetOutputHandle(output_names[0]); - auto predict_shape = output_t->shape(); - - int out_num = std::accumulate(predict_shape.begin(), predict_shape.end(), 1, - std::multiplies()); - predict_batch.resize(out_num); - - output_t->CopyToCpu(predict_batch.data()); - - // ctc decode - std::vector str_res; - int argmax_idx; - int last_index = 0; - float score = 0.f; - int count = 0; - float max_value = 0.0f; - - for (int n = 0; n < predict_shape[1]; n++) { - argmax_idx = - int(Utility::argmax(&predict_batch[n * predict_shape[2]], - &predict_batch[(n + 1) * predict_shape[2]])); - max_value = - float(*std::max_element(&predict_batch[n * predict_shape[2]], - &predict_batch[(n + 1) * predict_shape[2]])); - - if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) { - score += max_value; - count += 1; - str_res.push_back(label_list_[argmax_idx]); - } - last_index = argmax_idx; + float wh_ratio = float(srcimg.cols) / float(srcimg.rows); + auto preprocess_start = std::chrono::steady_clock::now(); + this->resize_op_.Run(srcimg, resize_img, wh_ratio, this->use_tensorrt_); + + this->normalize_op_.Run(&resize_img, this->mean_, this->scale_, + this->is_scale_); + + std::vector input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f); + + this->permute_op_.Run(&resize_img, input.data()); + auto preprocess_end = std::chrono::steady_clock::now(); + + // Inference. + auto input_names = this->predictor_->GetInputNames(); + auto input_t = this->predictor_->GetInputHandle(input_names[0]); + input_t->Reshape({1, 3, resize_img.rows, resize_img.cols}); + auto inference_start = std::chrono::steady_clock::now(); + input_t->CopyFromCpu(input.data()); + this->predictor_->Run(); + + std::vector predict_batch; + auto output_names = this->predictor_->GetOutputNames(); + auto output_t = this->predictor_->GetOutputHandle(output_names[0]); + auto predict_shape = output_t->shape(); + + int out_num = std::accumulate(predict_shape.begin(), predict_shape.end(), 1, + std::multiplies()); + predict_batch.resize(out_num); + + output_t->CopyToCpu(predict_batch.data()); + auto inference_end = std::chrono::steady_clock::now(); + + // ctc decode + auto postprocess_start = std::chrono::steady_clock::now(); + std::vector str_res; + int argmax_idx; + int last_index = 0; + float score = 0.f; + int count = 0; + float max_value = 0.0f; + + for (int n = 0; n < predict_shape[1]; n++) { + argmax_idx = + int(Utility::argmax(&predict_batch[n * predict_shape[2]], + &predict_batch[(n + 1) * predict_shape[2]])); + max_value = + float(*std::max_element(&predict_batch[n * predict_shape[2]], + &predict_batch[(n + 1) * predict_shape[2]])); + + if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) { + score += max_value; + count += 1; + str_res.push_back(label_list_[argmax_idx]); } - score /= count; - for (int i = 0; i < str_res.size(); i++) { - std::cout << str_res[i]; - } - std::cout << "\tscore: " << score << std::endl; + last_index = argmax_idx; + } + auto postprocess_end = std::chrono::steady_clock::now(); + score /= count; + for (int i = 0; i < str_res.size(); i++) { + std::cout << str_res[i]; } + std::cout << "\tscore: " << score << std::endl; + + std::chrono::duration preprocess_diff = preprocess_end - preprocess_start; + times->push_back(double(preprocess_diff.count() * 1000)); + std::chrono::duration inference_diff = inference_end - inference_start; + times->push_back(double(inference_diff.count() * 1000)); + std::chrono::duration postprocess_diff = postprocess_end - postprocess_start; + times->push_back(double(postprocess_diff.count() * 1000)); } void CRNNRecognizer::LoadModel(const std::string &model_dir) { @@ -101,10 +101,16 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) { if (this->use_gpu_) { config.EnableUseGpu(this->gpu_mem_, this->gpu_id_); if (this->use_tensorrt_) { + auto precision = paddle_infer::Config::Precision::kFloat32; + if (this->precision_ == "fp16") { + precision = paddle_infer::Config::Precision::kHalf; + } + if (this->precision_ == "int8") { + precision = paddle_infer::Config::Precision::kInt8; + } config.EnableTensorRtEngine( 1 << 20, 10, 3, - this->use_fp16_ ? paddle_infer::Config::Precision::kHalf - : paddle_infer::Config::Precision::kFloat32, + precision, false, false); std::map> min_input_shape = { {"x", {1, 3, 32, 10}}}; @@ -138,59 +144,4 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) { this->predictor_ = CreatePredictor(config); } -cv::Mat CRNNRecognizer::GetRotateCropImage(const cv::Mat &srcimage, - std::vector> box) { - cv::Mat image; - srcimage.copyTo(image); - std::vector> points = box; - - int x_collect[4] = {box[0][0], box[1][0], box[2][0], box[3][0]}; - int y_collect[4] = {box[0][1], box[1][1], box[2][1], box[3][1]}; - int left = int(*std::min_element(x_collect, x_collect + 4)); - int right = int(*std::max_element(x_collect, x_collect + 4)); - int top = int(*std::min_element(y_collect, y_collect + 4)); - int bottom = int(*std::max_element(y_collect, y_collect + 4)); - - cv::Mat img_crop; - image(cv::Rect(left, top, right - left, bottom - top)).copyTo(img_crop); - - for (int i = 0; i < points.size(); i++) { - points[i][0] -= left; - points[i][1] -= top; - } - - int img_crop_width = int(sqrt(pow(points[0][0] - points[1][0], 2) + - pow(points[0][1] - points[1][1], 2))); - int img_crop_height = int(sqrt(pow(points[0][0] - points[3][0], 2) + - pow(points[0][1] - points[3][1], 2))); - - cv::Point2f pts_std[4]; - pts_std[0] = cv::Point2f(0., 0.); - pts_std[1] = cv::Point2f(img_crop_width, 0.); - pts_std[2] = cv::Point2f(img_crop_width, img_crop_height); - pts_std[3] = cv::Point2f(0.f, img_crop_height); - - cv::Point2f pointsf[4]; - pointsf[0] = cv::Point2f(points[0][0], points[0][1]); - pointsf[1] = cv::Point2f(points[1][0], points[1][1]); - pointsf[2] = cv::Point2f(points[2][0], points[2][1]); - pointsf[3] = cv::Point2f(points[3][0], points[3][1]); - - cv::Mat M = cv::getPerspectiveTransform(pointsf, pts_std); - - cv::Mat dst_img; - cv::warpPerspective(img_crop, dst_img, M, - cv::Size(img_crop_width, img_crop_height), - cv::BORDER_REPLICATE); - - if (float(dst_img.rows) >= float(dst_img.cols) * 1.5) { - cv::Mat srcCopy = cv::Mat(dst_img.rows, dst_img.cols, dst_img.depth()); - cv::transpose(dst_img, srcCopy); - cv::flip(srcCopy, srcCopy, 0); - return srcCopy; - } else { - return dst_img; - } -} - } // namespace PaddleOCR diff --git a/deploy/cpp_infer/src/postprocess_op.cpp b/deploy/cpp_infer/src/postprocess_op.cpp index e7db70f3bff81390728c6b373b89cf06c74e4eca..c3985572048155cf5aca57c95f1d8a816658ef13 100644 --- a/deploy/cpp_infer/src/postprocess_op.cpp +++ b/deploy/cpp_infer/src/postprocess_op.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include namespace PaddleOCR { diff --git a/deploy/cpp_infer/src/utility.cpp b/deploy/cpp_infer/src/utility.cpp index 2cd84f7e8dbdd8144b5337f55b3f3a62ed43d5b3..dba445b747ff3f3c0d2db91061650c369977c4dd 100644 --- a/deploy/cpp_infer/src/utility.cpp +++ b/deploy/cpp_infer/src/utility.cpp @@ -92,4 +92,59 @@ void Utility::GetAllFiles(const char *dir_name, } } +cv::Mat Utility::GetRotateCropImage(const cv::Mat &srcimage, + std::vector> box) { + cv::Mat image; + srcimage.copyTo(image); + std::vector> points = box; + + int x_collect[4] = {box[0][0], box[1][0], box[2][0], box[3][0]}; + int y_collect[4] = {box[0][1], box[1][1], box[2][1], box[3][1]}; + int left = int(*std::min_element(x_collect, x_collect + 4)); + int right = int(*std::max_element(x_collect, x_collect + 4)); + int top = int(*std::min_element(y_collect, y_collect + 4)); + int bottom = int(*std::max_element(y_collect, y_collect + 4)); + + cv::Mat img_crop; + image(cv::Rect(left, top, right - left, bottom - top)).copyTo(img_crop); + + for (int i = 0; i < points.size(); i++) { + points[i][0] -= left; + points[i][1] -= top; + } + + int img_crop_width = int(sqrt(pow(points[0][0] - points[1][0], 2) + + pow(points[0][1] - points[1][1], 2))); + int img_crop_height = int(sqrt(pow(points[0][0] - points[3][0], 2) + + pow(points[0][1] - points[3][1], 2))); + + cv::Point2f pts_std[4]; + pts_std[0] = cv::Point2f(0., 0.); + pts_std[1] = cv::Point2f(img_crop_width, 0.); + pts_std[2] = cv::Point2f(img_crop_width, img_crop_height); + pts_std[3] = cv::Point2f(0.f, img_crop_height); + + cv::Point2f pointsf[4]; + pointsf[0] = cv::Point2f(points[0][0], points[0][1]); + pointsf[1] = cv::Point2f(points[1][0], points[1][1]); + pointsf[2] = cv::Point2f(points[2][0], points[2][1]); + pointsf[3] = cv::Point2f(points[3][0], points[3][1]); + + cv::Mat M = cv::getPerspectiveTransform(pointsf, pts_std); + + cv::Mat dst_img; + cv::warpPerspective(img_crop, dst_img, M, + cv::Size(img_crop_width, img_crop_height), + cv::BORDER_REPLICATE); + + if (float(dst_img.rows) >= float(dst_img.cols) * 1.5) { + cv::Mat srcCopy = cv::Mat(dst_img.rows, dst_img.cols, dst_img.depth()); + cv::transpose(dst_img, srcCopy); + cv::flip(srcCopy, srcCopy, 0); + return srcCopy; + } else { + return dst_img; + } +} + } // namespace PaddleOCR \ No newline at end of file diff --git a/deploy/cpp_infer/tools/config.txt b/deploy/cpp_infer/tools/config.txt deleted file mode 100644 index d4d66d65225bc9d1d4d62f45550db71fb5d8414e..0000000000000000000000000000000000000000 --- a/deploy/cpp_infer/tools/config.txt +++ /dev/null @@ -1,31 +0,0 @@ -# model load config -use_gpu 0 -gpu_id 0 -gpu_mem 4000 -cpu_math_library_num_threads 10 -use_mkldnn 0 - -# det config -max_side_len 960 -det_db_thresh 0.3 -det_db_box_thresh 0.5 -det_db_unclip_ratio 1.6 -use_polygon_score 1 -det_model_dir ./inference/ch_ppocr_mobile_v2.0_det_infer/ - -# cls config -use_angle_cls 0 -cls_model_dir ./inference/ch_ppocr_mobile_v2.0_cls_infer/ -cls_thresh 0.9 - -# rec config -rec_model_dir ./inference/ch_ppocr_mobile_v2.0_rec_infer/ -char_list_file ../../ppocr/utils/ppocr_keys_v1.txt - -# show the detection results -visualize 0 - -# use_tensorrt -use_tensorrt 0 -use_fp16 0 - diff --git a/deploy/cpp_infer/tools/run.sh b/deploy/cpp_infer/tools/run.sh deleted file mode 100755 index fa61da75e3a71262f539ee348c69fb82ed2574fb..0000000000000000000000000000000000000000 --- a/deploy/cpp_infer/tools/run.sh +++ /dev/null @@ -1,2 +0,0 @@ - -./build/ocr_system ./tools/config.txt ../../doc/imgs/12.jpg diff --git a/doc/joinus.PNG b/doc/joinus.PNG index 33f5badb61e9d5c5be5439be861beee3dcf7bcf2..1228ce0a4ddd549b9ddfe00090675d9bd7e3cb6b 100644 Binary files a/doc/joinus.PNG and b/doc/joinus.PNG differ diff --git a/tests/prepare.sh b/tests/prepare.sh index d27a051cb0a7effc50305db8e2268ad36492d6cb..418e5661ad0f315bc60b8fda37742c115b395b7c 100644 --- a/tests/prepare.sh +++ b/tests/prepare.sh @@ -62,7 +62,7 @@ else if [ ${model_name} = "ocr_det" ]; then eval_model_name="ch_ppocr_mobile_v2.0_det_infer" rm -rf ./train_data/icdar2015 - wget -nc -P ./train_data https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar + wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar cd ./inference && tar xf ${eval_model_name}.tar && tar xf ch_det_data_50.tar && cd ../ else diff --git a/tests/readme.md b/tests/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..1c5e0faee90cad9709b6e4d517cbf7830aa2bb8e --- /dev/null +++ b/tests/readme.md @@ -0,0 +1,58 @@ + +# 介绍 + +test.sh和params.txt文件配合使用,完成OCR轻量检测和识别模型从训练到预测的流程测试。 + +# 安装依赖 +- 安装PaddlePaddle >= 2.0 +- 安装PaddleOCR依赖 + ``` + pip3 install -r ../requirements.txt + ``` +- 安装autolog + ``` + git clone https://github.com/LDOUBLEV/AutoLog + cd AutoLog + pip3 install -r requirements.txt + python3 setup.py bdist_wheel + pip3 install ./dist/auto_log-1.0.0-py3-none-any.whl + cd ../ + ``` + +# 目录介绍 + +```bash +tests/ +├── ocr_det_params.txt # 测试OCR检测模型的参数配置文件 +├── ocr_rec_params.txt # 测试OCR识别模型的参数配置文件 +└── prepare.sh # 完成test.sh运行所需要的数据和模型下载 +└── test.sh # 根据 +``` + +# 使用方法 +test.sh包含四种运行模式,每种模式的运行数据不同,分别用于测试速度和精度,分别是: +- 模式1 lite_train_infer,使用少量数据训练,用于快速验证训练到预测的走通流程,不验证精度和速度; +``` +bash test/prepare.sh ./tests/ocr_det_params.txt 'lite_train_infer' +bash tests/test.sh ./tests/ocr_det_params.txt 'lite_train_infer' +``` +- 模式2 whole_infer,使用少量数据训练,一定量数据预测,用于验证训练后的模型执行预测,预测速度是否合理; +``` +bash tests/prepare.sh ./tests/ocr_det_params.txt 'whole_infer' +bash tests/test.sh ./tests/ocr_det_params.txt 'whole_infer' +``` + +- 模式3 infer 不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度; +``` +bash tests/prepare.sh ./tests/ocr_det_params.txt 'infer' +用法1: +bash tests/test.sh ./tests/ocr_det_params.txt 'infer' +用法2: 指定GPU卡预测,第三个传入参数为GPU卡号 +bash tests/test.sh ./tests/ocr_det_params.txt 'infer' '1' +``` + +模式4: whole_train_infer , CE: 全量数据训练,全量数据预测,验证模型训练精度,预测精度,预测速度 +``` +bash tests/prepare.sh ./tests/ocr_det_params.txt 'whole_train_infer' +bash tests/test.sh ./tests/ocr_det_params.txt 'whole_train_infer' +``` diff --git a/tools/infer/predict_det.py b/tools/infer/predict_det.py index 3de00d83a8f9f55af9b89d5d2cd5c877399c5930..5c75e0c480eac6796d6d4b7075d1b38d254380fd 100755 --- a/tools/infer/predict_det.py +++ b/tools/infer/predict_det.py @@ -101,6 +101,7 @@ class TextDetector(object): if args.benchmark: import auto_log pid = os.getpid() + gpu_id = utility.get_infer_gpuid() self.autolog = auto_log.AutoLogger( model_name="det", model_precision=args.precision, @@ -110,7 +111,7 @@ class TextDetector(object): inference_config=self.config, pids=pid, process_name=None, - gpu_ids=0, + gpu_ids=gpu_id if args.use_gpu else None, time_keys=[ 'preprocess_time', 'inference_time', 'postprocess_time' ], diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py index bb4a31706471b9b1745519ac9f390d01b60d5d44..97dfa5214628123d0c9b7edd7d94060a2bfd2a1e 100755 --- a/tools/infer/predict_rec.py +++ b/tools/infer/predict_rec.py @@ -68,6 +68,7 @@ class TextRecognizer(object): if args.benchmark: import auto_log pid = os.getpid() + gpu_id = utility.get_infer_gpuid() self.autolog = auto_log.AutoLogger( model_name="rec", model_precision=args.precision, @@ -77,7 +78,7 @@ class TextRecognizer(object): inference_config=self.config, pids=pid, process_name=None, - gpu_ids=0 if args.use_gpu else None, + gpu_ids=gpu_id if args.use_gpu else None, time_keys=[ 'preprocess_time', 'inference_time', 'postprocess_time' ], diff --git a/tools/infer/utility.py b/tools/infer/utility.py index 1c82280099f17f6d3bf848669e47439505f10576..527a811d620efac33ece9cdbd4b6196e18a8497d 100755 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -280,6 +280,20 @@ def create_predictor(args, mode, logger): return predictor, input_tensor, output_tensors, config +def get_infer_gpuid(): + cmd = "nvidia-smi" + res = os.popen(cmd).readlines() + if len(res) == 0: + return None + cmd = "env | grep CUDA_VISIBLE_DEVICES" + env_cuda = os.popen(cmd).readlines() + if len(env_cuda) == 0: + return 0 + else: + gpu_id = env_cuda[0].strip().split("=")[1] + return int(gpu_id[0]) + + def draw_e2e_res(dt_boxes, strs, img_path): src_im = cv2.imread(img_path) for box, str in zip(dt_boxes, strs):