diff --git a/deploy/cpp_infer/CMakeLists.txt b/deploy/cpp_infer/CMakeLists.txt index 90f62345de8524143bd7a6f2631b64f3f8fb0d02..4f2dc88a8f1a9d185b7274a1a723c0e670bd1bf1 100644 --- a/deploy/cpp_infer/CMakeLists.txt +++ b/deploy/cpp_infer/CMakeLists.txt @@ -13,7 +13,6 @@ SET(TENSORRT_DIR "" CACHE PATH "Compile demo with TensorRT") set(DEMO_NAME "ocr_system") - macro(safe_set_static_flag) foreach(flag_var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE @@ -134,7 +133,11 @@ if(WITH_MKL) endif () endif() else() - set(MATH_LIB ${PADDLE_LIB}/third_party/install/openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX}) + if (WIN32) + set(MATH_LIB ${PADDLE_LIB}/third_party/install/openblas/lib/openblas${CMAKE_STATIC_LIBRARY_SUFFIX}) + else () + set(MATH_LIB ${PADDLE_LIB}/third_party/install/openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX}) + endif () endif() # Note: libpaddle_inference_api.so/a must put before libpaddle_inference.so/a @@ -158,7 +161,7 @@ endif(WITH_STATIC_LIB) if (NOT WIN32) set(DEPS ${DEPS} - ${MATH_LIB} ${MKLDNN_LIB} + ${MATH_LIB} ${MKLDNN_LIB} glog gflags protobuf z xxhash ) if(EXISTS "${PADDLE_LIB}/third_party/install/snappystream/lib") diff --git a/deploy/cpp_infer/docs/windows_vs2019_build.md b/deploy/cpp_infer/docs/windows_vs2019_build.md index 21fbf4e0eb95ee82475164047d8051e90e9e224f..0f243bf8f54b5cd50e9fa2faab29b064e694e45c 100644 --- a/deploy/cpp_infer/docs/windows_vs2019_build.md +++ b/deploy/cpp_infer/docs/windows_vs2019_build.md @@ -14,7 +14,7 @@ PaddleOCR在Windows 平台下基于`Visual Studio 2019 Community` 进行了测 ### Step1: 下载PaddlePaddle C++ 预测库 fluid_inference -PaddlePaddle C++ 预测库针对不同的`CPU`和`CUDA`版本提供了不同的预编译版本,请根据实际情况下载: [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/windows_cpp_inference.html) +PaddlePaddle C++ 预测库针对不同的`CPU`和`CUDA`版本提供了不同的预编译版本,请根据实际情况下载: [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/windows_cpp_inference.html) 解压后`D:\projects\fluid_inference`目录包含内容为: ``` diff --git a/deploy/cpp_infer/imgs/cpp_infer_pred_12.png b/deploy/cpp_infer/imgs/cpp_infer_pred_12.png new file mode 100644 index 0000000000000000000000000000000000000000..eb5f64e1f6c329f7ae772c50edce7fc8afcb1211 Binary files /dev/null and b/deploy/cpp_infer/imgs/cpp_infer_pred_12.png differ diff --git a/deploy/cpp_infer/include/config.h b/deploy/cpp_infer/include/config.h index dbfbc2df141042f1065b380010e1ea3ff3ccedab..cd02a997e304850ebc04ce2288f4e497dbb4be4a 100644 --- a/deploy/cpp_infer/include/config.h +++ b/deploy/cpp_infer/include/config.h @@ -49,6 +49,8 @@ public: this->det_db_unclip_ratio = stod(config_map_["det_db_unclip_ratio"]); + this->use_polygon_score = bool(stoi(config_map_["use_polygon_score"])); + this->det_model_dir.assign(config_map_["det_model_dir"]); this->rec_model_dir.assign(config_map_["rec_model_dir"]); @@ -86,6 +88,8 @@ public: double det_db_unclip_ratio = 2.0; + bool use_polygon_score = false; + std::string det_model_dir; std::string rec_model_dir; diff --git a/deploy/cpp_infer/include/ocr_det.h b/deploy/cpp_infer/include/ocr_det.h index bab9c95fa4a3f1cb160ccbf9ca4587fa4c2ba16a..18318c9c4e37136db62c1338db1b58f82859f037 100644 --- a/deploy/cpp_infer/include/ocr_det.h +++ b/deploy/cpp_infer/include/ocr_det.h @@ -44,7 +44,8 @@ public: const bool &use_mkldnn, const int &max_side_len, const double &det_db_thresh, const double &det_db_box_thresh, - const double &det_db_unclip_ratio, const bool &visualize, + const double &det_db_unclip_ratio, + const bool &use_polygon_score, const bool &visualize, const bool &use_tensorrt, const bool &use_fp16) { this->use_gpu_ = use_gpu; this->gpu_id_ = gpu_id; @@ -57,6 +58,7 @@ public: this->det_db_thresh_ = det_db_thresh; this->det_db_box_thresh_ = det_db_box_thresh; this->det_db_unclip_ratio_ = det_db_unclip_ratio; + this->use_polygon_score_ = use_polygon_score; this->visualize_ = visualize; this->use_tensorrt_ = use_tensorrt; @@ -85,6 +87,7 @@ private: double det_db_thresh_ = 0.3; double det_db_box_thresh_ = 0.5; double det_db_unclip_ratio_ = 2.0; + bool use_polygon_score_ = false; bool visualize_ = true; bool use_tensorrt_ = false; diff --git a/deploy/cpp_infer/include/postprocess_op.h b/deploy/cpp_infer/include/postprocess_op.h index 44ca35312ebc61b39a1f61e69c2cc5dd5e999da7..b384b79b3041bfcb96f042c6450d3c6e54f00498 100644 --- a/deploy/cpp_infer/include/postprocess_op.h +++ b/deploy/cpp_infer/include/postprocess_op.h @@ -51,10 +51,12 @@ public: float &ssid); float BoxScoreFast(std::vector> box_array, cv::Mat pred); + float PolygonScoreAcc(std::vector contour, cv::Mat pred); std::vector>> BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap, - const float &box_thresh, const float &det_db_unclip_ratio); + const float &box_thresh, const float &det_db_unclip_ratio, + const bool &use_polygon_score); std::vector>> FilterTagDetRes(std::vector>> boxes, diff --git a/deploy/cpp_infer/include/utility.h b/deploy/cpp_infer/include/utility.h index 367e37e434b396ac1eae28961f366dc397ed446f..6e8173e007279319657250b376de022240bc6f62 100644 --- a/deploy/cpp_infer/include/utility.h +++ b/deploy/cpp_infer/include/utility.h @@ -44,6 +44,9 @@ public: inline static size_t argmax(ForwardIterator first, ForwardIterator last) { return std::distance(first, std::max_element(first, last)); } + + static void GetAllFiles(const char *dir_name, + std::vector &all_inputs); }; } // namespace PaddleOCR \ No newline at end of file diff --git a/deploy/cpp_infer/inference b/deploy/cpp_infer/inference new file mode 120000 index 0000000000000000000000000000000000000000..c7785f47352a6b8e90cd02531046ed54b8820477 --- /dev/null +++ b/deploy/cpp_infer/inference @@ -0,0 +1 @@ +/paddle/test/PaddleOCR/deploy/cpp_infer/inference \ No newline at end of file diff --git a/deploy/cpp_infer/readme.md b/deploy/cpp_infer/readme.md index 3e5c12867d2845d46972b435a8ec85eed226f0ba..6a57044b0ef81c4600c13180bb33c45b2bf0bc01 100644 --- a/deploy/cpp_infer/readme.md +++ b/deploy/cpp_infer/readme.md @@ -74,12 +74,26 @@ opencv3/ * 有2种方式获取Paddle预测库,下面进行详细介绍。 -#### 1.2.1 预测库源码编译 + +#### 1.2.1 直接下载安装 + +* [Paddle预测库官网](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0/guides/05_inference_deployment/inference/build_and_install_lib_cn.html) 上提供了不同cuda版本的Linux预测库,可以在官网查看并选择合适的预测库版本(*建议选择paddle版本>=2.0.1版本的预测库* )。 + +* 下载之后使用下面的方法解压。 + +``` +tar -xf paddle_inference.tgz +``` + +最终会在当前的文件夹中生成`paddle_inference/`的子文件夹。 + +#### 1.2.2 预测库源码编译 * 如果希望获取最新预测库特性,可以从Paddle github上克隆最新代码,源码编译预测库。 -* 可以参考[Paddle预测库官网](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)的说明,从github上获取Paddle代码,然后进行编译,生成最新的预测库。使用git获取代码方法如下。 +* 可以参考[Paddle预测库安装编译说明](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0/guides/05_inference_deployment/inference/build_and_install_lib_cn.html#congyuanmabianyi) 的说明,从github上获取Paddle代码,然后进行编译,生成最新的预测库。使用git获取代码方法如下。 ```shell git clone https://github.com/PaddlePaddle/Paddle.git +git checkout release/2.1 ``` * 进入Paddle目录后,编译方法如下。 @@ -102,7 +116,7 @@ make -j make inference_lib_dist ``` -更多编译参数选项可以参考Paddle C++预测库官网:[https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)。 +更多编译参数选项介绍可以参考[文档说明](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0/guides/05_inference_deployment/inference/build_and_install_lib_cn.html#congyuanmabianyi)。 * 编译完成之后,可以在`build/paddle_inference_install_dir/`文件下看到生成了以下文件及文件夹。 @@ -117,19 +131,6 @@ build/paddle_inference_install_dir/ 其中`paddle`就是C++预测所需的Paddle库,`version.txt`中包含当前预测库的版本信息。 -#### 1.2.2 直接下载安装 - -* [Paddle预测库官网](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)上提供了不同cuda版本的Linux预测库,可以在官网查看并选择合适的预测库版本(*建议选择paddle版本>=2.0.1版本的预测库* )。 - - -* 下载之后使用下面的方法解压。 - -``` -tar -xf paddle_inference.tgz -``` - -最终会在当前的文件夹中生成`paddle_inference/`的子文件夹。 - ## 2 开始运行 @@ -140,11 +141,11 @@ tar -xf paddle_inference.tgz ``` inference/ |-- det_db -| |--inference.pdparams -| |--inference.pdimodel +| |--inference.pdiparams +| |--inference.pdmodel |-- rec_rcnn -| |--inference.pdparams -| |--inference.pdparams +| |--inference.pdiparams +| |--inference.pdmodel ``` @@ -183,7 +184,7 @@ cmake .. \ make -j ``` -`OPENCV_DIR`为opencv编译安装的地址;`LIB_DIR`为下载(`paddle_inference`文件夹)或者编译生成的Paddle预测库地址(`build/paddle_inference_install_dir`文件夹);`CUDA_LIB_DIR`为cuda库文件地址,在docker中;为`/usr/local/cuda/lib64`;`CUDNN_LIB_DIR`为cudnn库文件地址,在docker中为`/usr/lib/x86_64-linux-gnu/`。 +`OPENCV_DIR`为opencv编译安装的地址;`LIB_DIR`为下载(`paddle_inference`文件夹)或者编译生成的Paddle预测库地址(`build/paddle_inference_install_dir`文件夹);`CUDA_LIB_DIR`为cuda库文件地址,在docker中为`/usr/local/cuda/lib64`;`CUDNN_LIB_DIR`为cudnn库文件地址,在docker中为`/usr/lib/x86_64-linux-gnu/`。 * 编译完成之后,会在`build`文件夹下生成一个名为`ocr_system`的可执行文件。 @@ -211,6 +212,7 @@ max_side_len 960 # 输入图像长宽大于960时,等比例缩放图像,使 det_db_thresh 0.3 # 用于过滤DB预测的二值化图像,设置为0.-0.3对结果影响不明显 det_db_box_thresh 0.5 # DB后处理过滤box的阈值,如果检测存在漏框情况,可酌情减小 det_db_unclip_ratio 1.6 # 表示文本框的紧致程度,越小则文本框更靠近文本 +use_polygon_score 1 # 是否使用多边形框计算bbox score,0表示使用矩形框计算。矩形框计算速度更快,多边形框对弯曲文本区域计算更准确。 det_model_dir ./inference/det_db # 检测模型inference model地址 # cls config @@ -226,15 +228,15 @@ char_list_file ../../ppocr/utils/ppocr_keys_v1.txt # 字典文件 visualize 1 # 是否对结果进行可视化,为1时,会在当前文件夹下保存文件名为`ocr_vis.png`的预测结果。 ``` -* PaddleOCR也支持多语言的预测,更多细节可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分。 +* PaddleOCR也支持多语言的预测,更多支持的语言和模型可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分,如果希望进行多语言预测,只需将修改`tools/config.txt`中的`char_list_file`(字典文件路径)以及`rec_model_dir`(inference模型路径)字段即可。 最终屏幕上会输出检测结果如下。
- +
### 2.3 注意 -* 在使用Paddle预测库时,推荐使用2.0.0-beta0版本的预测库。 +* 在使用Paddle预测库时,推荐使用2.0.0版本的预测库。 diff --git a/deploy/cpp_infer/readme_en.md b/deploy/cpp_infer/readme_en.md index a51977326c2ee848773be34b17e396b6a166f80b..6c0a18db4f76d4e2971cea16130216434ff01d7b 100644 --- a/deploy/cpp_infer/readme_en.md +++ b/deploy/cpp_infer/readme_en.md @@ -76,17 +76,30 @@ opencv3/ * There are 2 ways to obtain the Paddle inference library, described in detail below. +#### 1.2.1 Direct download and installation -#### 1.2.1 Compile from the source code +[Paddle inference library official website](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0/guides/05_inference_deployment/inference/build_and_install_lib_cn.html). You can view and select the appropriate version of the inference library on the official website. + + +* After downloading, use the following method to uncompress. + +``` +tar -xf paddle_inference.tgz +``` + +Finally you can see the following files in the folder of `paddle_inference/`. + +#### 1.2.2 Compile from the source code * If you want to get the latest Paddle inference library features, you can download the latest code from Paddle github repository and compile the inference library from the source code. It is recommended to download the inference library with paddle version greater than or equal to 2.0.1. * You can refer to [Paddle inference library] (https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html) to get the Paddle source code from github, and then compile To generate the latest inference library. The method of using git to access the code is as follows. ```shell git clone https://github.com/PaddlePaddle/Paddle.git +git checkout release/2.1 ``` -* After entering the Paddle directory, the compilation method is as follows. +* After entering the Paddle directory, the commands to compile the paddle inference library are as follows. ```shell rm -rf build @@ -106,7 +119,7 @@ make -j make inference_lib_dist ``` -For more compilation parameter options, please refer to the official website of the Paddle C++ inference library:[https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html](https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html). +For more compilation parameter options, please refer to the [document](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0/guides/05_inference_deployment/inference/build_and_install_lib_cn.html#congyuanmabianyi). * After the compilation process, you can see the following files in the folder of `build/paddle_inference_install_dir/`. @@ -122,22 +135,6 @@ build/paddle_inference_install_dir/ Among them, `paddle` is the Paddle library required for C++ prediction later, and `version.txt` contains the version information of the current inference library. - -#### 1.2.2 Direct download and installation - -* Different cuda versions of the Linux inference library (based on GCC 4.8.2) are provided on the -[Paddle inference library official website](https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html). You can view and select the appropriate version of the inference library on the official website. - - -* After downloading, use the following method to uncompress. - -``` -tar -xf paddle_inference.tgz -``` - -Finally you can see the following files in the folder of `paddle_inference/`. - - ## 2. Compile and run the demo ### 2.1 Export the inference model @@ -147,11 +144,11 @@ Finally you can see the following files in the folder of `paddle_inference/`. ``` inference/ |-- det_db -| |--inference.pdparams -| |--inference.pdimodel +| |--inference.pdiparams +| |--inference.pdmodel |-- rec_rcnn -| |--inference.pdparams -| |--inference.pdparams +| |--inference.pdiparams +| |--inference.pdmodel ``` @@ -220,6 +217,7 @@ max_side_len 960 # Limit the maximum image height and width to 960 det_db_thresh 0.3 # Used to filter the binarized image of DB prediction, setting 0.-0.3 has no obvious effect on the result det_db_box_thresh 0.5 # DDB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate det_db_unclip_ratio 1.6 # Indicates the compactness of the text box, the smaller the value, the closer the text box to the text +use_polygon_score 1 # Whether to use polygon box to calculate bbox score, 0 means to use rectangle box to calculate. Use rectangular box to calculate faster, and polygonal box more accurate for curved text area. det_model_dir ./inference/det_db # Address of detection inference model # cls config @@ -235,16 +233,16 @@ char_list_file ../../ppocr/utils/ppocr_keys_v1.txt # dictionary file visualize 1 # Whether to visualize the results,when it is set as 1, The prediction result will be save in the image file `./ocr_vis.png`. ``` -* Multi-language inference is also supported in PaddleOCR, for more details, please refer to part of multi-language dictionaries and models in [recognition tutorial](../../doc/doc_en/recognition_en.md). +* Multi-language inference is also supported in PaddleOCR, you can refer to [recognition tutorial](../../doc/doc_en/recognition_en.md) for more supported languages and models in PaddleOCR. Specifically, if you want to infer using multi-language models, you just need to modify values of `char_list_file` and `rec_model_dir` in file `tools/config.txt`. The detection results will be shown on the screen, which is as follows.
- +
### 2.3 Notes -* Paddle2.0.0-beta0 inference model library is recommended for this toturial. +* Paddle2.0.0 inference model library is recommended for this toturial. diff --git a/deploy/cpp_infer/src/clipper.cpp b/deploy/cpp_infer/src/clipper.cpp index b35c25f29ee1144f98747bd41e89d87779fdb6a3..5f5d22167659ef27ba32a5645c7df04ba39106c5 100644 --- a/deploy/cpp_infer/src/clipper.cpp +++ b/deploy/cpp_infer/src/clipper.cpp @@ -668,7 +668,7 @@ void DisposeOutPts(OutPt *&pp) { //------------------------------------------------------------------------------ inline void InitEdge(TEdge *e, TEdge *eNext, TEdge *ePrev, const IntPoint &Pt) { - std::memset(e, 0, sizeof(TEdge)); + std::memset(e, int(0), sizeof(TEdge)); e->Next = eNext; e->Prev = ePrev; e->Curr = Pt; @@ -1895,17 +1895,17 @@ void Clipper::InsertLocalMinimaIntoAEL(const cInt botY) { TEdge *rb = lm->RightBound; OutPt *Op1 = 0; - if (!lb) { + if (!lb || !rb) { // nb: don't insert LB into either AEL or SEL InsertEdgeIntoAEL(rb, 0); SetWindingCount(*rb); if (IsContributing(*rb)) Op1 = AddOutPt(rb, rb->Bot); - } else if (!rb) { - InsertEdgeIntoAEL(lb, 0); - SetWindingCount(*lb); - if (IsContributing(*lb)) - Op1 = AddOutPt(lb, lb->Bot); + //} else if (!rb) { + // InsertEdgeIntoAEL(lb, 0); + // SetWindingCount(*lb); + // if (IsContributing(*lb)) + // Op1 = AddOutPt(lb, lb->Bot); InsertScanbeam(lb->Top.Y); } else { InsertEdgeIntoAEL(lb, 0); @@ -2547,13 +2547,13 @@ void Clipper::ProcessHorizontal(TEdge *horzEdge) { if (dir == dLeftToRight) { maxIt = m_Maxima.begin(); while (maxIt != m_Maxima.end() && *maxIt <= horzEdge->Bot.X) - maxIt++; + ++maxIt; if (maxIt != m_Maxima.end() && *maxIt >= eLastHorz->Top.X) maxIt = m_Maxima.end(); } else { maxRit = m_Maxima.rbegin(); while (maxRit != m_Maxima.rend() && *maxRit > horzEdge->Bot.X) - maxRit++; + ++maxRit; if (maxRit != m_Maxima.rend() && *maxRit <= eLastHorz->Top.X) maxRit = m_Maxima.rend(); } @@ -2576,13 +2576,13 @@ void Clipper::ProcessHorizontal(TEdge *horzEdge) { while (maxIt != m_Maxima.end() && *maxIt < e->Curr.X) { if (horzEdge->OutIdx >= 0 && !IsOpen) AddOutPt(horzEdge, IntPoint(*maxIt, horzEdge->Bot.Y)); - maxIt++; + ++maxIt; } } else { while (maxRit != m_Maxima.rend() && *maxRit > e->Curr.X) { if (horzEdge->OutIdx >= 0 && !IsOpen) AddOutPt(horzEdge, IntPoint(*maxRit, horzEdge->Bot.Y)); - maxRit++; + ++maxRit; } } }; diff --git a/deploy/cpp_infer/src/config.cpp b/deploy/cpp_infer/src/config.cpp index 303c3c1259515ee8c67fa865bf485ae3338505d6..a0ac1d08c93d1ff1e51f769465b2df2b4355fbc0 100644 --- a/deploy/cpp_infer/src/config.cpp +++ b/deploy/cpp_infer/src/config.cpp @@ -21,10 +21,10 @@ std::vector OCRConfig::split(const std::string &str, std::vector res; if ("" == str) return res; - char *strs = new char[str.length() + 1]; + char strs[str.length() + 1]; std::strcpy(strs, str.c_str()); - char *d = new char[delim.length() + 1]; + char d[delim.length() + 1]; std::strcpy(d, delim.c_str()); char *p = std::strtok(strs, d); @@ -61,4 +61,4 @@ void OCRConfig::PrintConfigInfo() { std::cout << "=======End of Paddle OCR inference config======" << std::endl; } -} // namespace PaddleOCR \ No newline at end of file +} // namespace PaddleOCR diff --git a/deploy/cpp_infer/src/main.cpp b/deploy/cpp_infer/src/main.cpp index 49c68f55234980009aa8e139acc3f22bf6a17bcf..f25e674b489ea92118fe45c63939fca203ce3823 100644 --- a/deploy/cpp_infer/src/main.cpp +++ b/deploy/cpp_infer/src/main.cpp @@ -27,9 +27,12 @@ #include #include +#include #include #include #include +#include +#include using namespace std; using namespace cv; @@ -47,14 +50,15 @@ int main(int argc, char **argv) { config.PrintConfigInfo(); std::string img_path(argv[2]); - - cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR); + std::vector all_img_names; + Utility::GetAllFiles((char *)img_path.c_str(), all_img_names); DBDetector det(config.det_model_dir, config.use_gpu, config.gpu_id, config.gpu_mem, config.cpu_math_library_num_threads, config.use_mkldnn, config.max_side_len, config.det_db_thresh, config.det_db_box_thresh, config.det_db_unclip_ratio, - config.visualize, config.use_tensorrt, config.use_fp16); + config.use_polygon_score, config.visualize, + config.use_tensorrt, config.use_fp16); Classifier *cls = nullptr; if (config.use_angle_cls == true) { @@ -70,18 +74,30 @@ int main(int argc, char **argv) { config.use_tensorrt, config.use_fp16); auto start = std::chrono::system_clock::now(); - std::vector>> boxes; - det.Run(srcimg, boxes); - - rec.Run(boxes, srcimg, cls); - auto end = std::chrono::system_clock::now(); - auto duration = - std::chrono::duration_cast(end - start); - std::cout << "Cost " - << double(duration.count()) * - std::chrono::microseconds::period::num / - std::chrono::microseconds::period::den - << "s" << std::endl; + + for (auto img_dir : all_img_names) { + LOG(INFO) << "The predict img: " << img_dir; + + cv::Mat srcimg = cv::imread(img_dir, cv::IMREAD_COLOR); + if (!srcimg.data) { + std::cerr << "[ERROR] image read failed! image path: " << img_path + << "\n"; + exit(1); + } + std::vector>> boxes; + + det.Run(srcimg, boxes); + + rec.Run(boxes, srcimg, cls); + auto end = std::chrono::system_clock::now(); + auto duration = + std::chrono::duration_cast(end - start); + std::cout << "Cost " + << double(duration.count()) * + std::chrono::microseconds::period::num / + std::chrono::microseconds::period::den + << "s" << std::endl; + } return 0; } diff --git a/deploy/cpp_infer/src/ocr_det.cpp b/deploy/cpp_infer/src/ocr_det.cpp index 489940f062fa9f8093282d20441704dd5cb8b382..33ad468a33b42c3d9f25beb19452f2fa6a81db9e 100644 --- a/deploy/cpp_infer/src/ocr_det.cpp +++ b/deploy/cpp_infer/src/ocr_det.cpp @@ -30,6 +30,42 @@ void DBDetector::LoadModel(const std::string &model_dir) { this->use_fp16_ ? paddle_infer::Config::Precision::kHalf : paddle_infer::Config::Precision::kFloat32, false, false); + std::map> min_input_shape = { + {"x", {1, 3, 50, 50}}, + {"conv2d_92.tmp_0", {1, 96, 20, 20}}, + {"conv2d_91.tmp_0", {1, 96, 10, 10}}, + {"nearest_interp_v2_1.tmp_0", {1, 96, 10, 10}}, + {"nearest_interp_v2_2.tmp_0", {1, 96, 20, 20}}, + {"nearest_interp_v2_3.tmp_0", {1, 24, 20, 20}}, + {"nearest_interp_v2_4.tmp_0", {1, 24, 20, 20}}, + {"nearest_interp_v2_5.tmp_0", {1, 24, 20, 20}}, + {"elementwise_add_7", {1, 56, 2, 2}}, + {"nearest_interp_v2_0.tmp_0", {1, 96, 2, 2}}}; + std::map> max_input_shape = { + {"x", {1, 3, this->max_side_len_, this->max_side_len_}}, + {"conv2d_92.tmp_0", {1, 96, 400, 400}}, + {"conv2d_91.tmp_0", {1, 96, 200, 200}}, + {"nearest_interp_v2_1.tmp_0", {1, 96, 200, 200}}, + {"nearest_interp_v2_2.tmp_0", {1, 96, 400, 400}}, + {"nearest_interp_v2_3.tmp_0", {1, 24, 400, 400}}, + {"nearest_interp_v2_4.tmp_0", {1, 24, 400, 400}}, + {"nearest_interp_v2_5.tmp_0", {1, 24, 400, 400}}, + {"elementwise_add_7", {1, 56, 400, 400}}, + {"nearest_interp_v2_0.tmp_0", {1, 96, 400, 400}}}; + std::map> opt_input_shape = { + {"x", {1, 3, 640, 640}}, + {"conv2d_92.tmp_0", {1, 96, 160, 160}}, + {"conv2d_91.tmp_0", {1, 96, 80, 80}}, + {"nearest_interp_v2_1.tmp_0", {1, 96, 80, 80}}, + {"nearest_interp_v2_2.tmp_0", {1, 96, 160, 160}}, + {"nearest_interp_v2_3.tmp_0", {1, 24, 160, 160}}, + {"nearest_interp_v2_4.tmp_0", {1, 24, 160, 160}}, + {"nearest_interp_v2_5.tmp_0", {1, 24, 160, 160}}, + {"elementwise_add_7", {1, 56, 40, 40}}, + {"nearest_interp_v2_0.tmp_0", {1, 96, 40, 40}}}; + + config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape, + opt_input_shape); } } else { config.DisableGpu(); @@ -48,7 +84,7 @@ void DBDetector::LoadModel(const std::string &model_dir) { config.SwitchIrOptim(true); config.EnableMemoryOptim(); - config.DisableGlogInfo(); + // config.DisableGlogInfo(); this->predictor_ = CreatePredictor(config); } @@ -109,9 +145,9 @@ void DBDetector::Run(cv::Mat &img, cv::Mat dilation_map; cv::Mat dila_ele = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2)); cv::dilate(bit_map, dilation_map, dila_ele); - boxes = post_processor_.BoxesFromBitmap(pred_map, dilation_map, - this->det_db_box_thresh_, - this->det_db_unclip_ratio_); + boxes = post_processor_.BoxesFromBitmap( + pred_map, dilation_map, this->det_db_box_thresh_, + this->det_db_unclip_ratio_, this->use_polygon_score_); boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg); diff --git a/deploy/cpp_infer/src/ocr_rec.cpp b/deploy/cpp_infer/src/ocr_rec.cpp index 76873dad3c871a027c7fccd88409227639edefdf..b09282b0283743b530cd5477dbe9c5ff751de93c 100644 --- a/deploy/cpp_infer/src/ocr_rec.cpp +++ b/deploy/cpp_infer/src/ocr_rec.cpp @@ -25,8 +25,9 @@ void CRNNRecognizer::Run(std::vector>> boxes, std::cout << "The predicted text is :" << std::endl; int index = 0; - for (int i = boxes.size() - 1; i >= 0; i--) { + for (int i = 0; i < boxes.size(); i++) { crop_img = GetRotateCropImage(srcimg, boxes[i]); + if (cls != nullptr) { crop_img = cls->Run(crop_img); } @@ -105,6 +106,15 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) { this->use_fp16_ ? paddle_infer::Config::Precision::kHalf : paddle_infer::Config::Precision::kFloat32, false, false); + std::map> min_input_shape = { + {"x", {1, 3, 32, 10}}}; + std::map> max_input_shape = { + {"x", {1, 3, 32, 2000}}}; + std::map> opt_input_shape = { + {"x", {1, 3, 32, 320}}}; + + config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape, + opt_input_shape); } } else { config.DisableGpu(); diff --git a/deploy/cpp_infer/src/postprocess_op.cpp b/deploy/cpp_infer/src/postprocess_op.cpp index 8c44a54a40da43e22194589f2e759204eff368e7..22494d67f6a65d43b37d3c42b96010952442f52d 100644 --- a/deploy/cpp_infer/src/postprocess_op.cpp +++ b/deploy/cpp_infer/src/postprocess_op.cpp @@ -159,6 +159,52 @@ std::vector> PostProcessor::GetMiniBoxes(cv::RotatedRect box, return array; } +float PostProcessor::PolygonScoreAcc(std::vector contour, + cv::Mat pred) { + int width = pred.cols; + int height = pred.rows; + std::vector box_x; + std::vector box_y; + for (int i = 0; i < contour.size(); ++i) { + box_x.push_back(contour[i].x); + box_y.push_back(contour[i].y); + } + + int xmin = + clamp(int(std::floor(*(std::min_element(box_x.begin(), box_x.end())))), 0, + width - 1); + int xmax = + clamp(int(std::ceil(*(std::max_element(box_x.begin(), box_x.end())))), 0, + width - 1); + int ymin = + clamp(int(std::floor(*(std::min_element(box_y.begin(), box_y.end())))), 0, + height - 1); + int ymax = + clamp(int(std::ceil(*(std::max_element(box_y.begin(), box_y.end())))), 0, + height - 1); + + cv::Mat mask; + mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1); + + cv::Point *rook_point = new cv::Point[contour.size()]; + + for (int i = 0; i < contour.size(); ++i) { + rook_point[i] = cv::Point(int(box_x[i]) - xmin, int(box_y[i]) - ymin); + } + const cv::Point *ppt[1] = {rook_point}; + int npt[] = {int(contour.size())}; + + cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1)); + + cv::Mat croppedImg; + pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1)) + .copyTo(croppedImg); + float score = cv::mean(croppedImg, mask)[0]; + + delete[] rook_point; + return score; +} + float PostProcessor::BoxScoreFast(std::vector> box_array, cv::Mat pred) { auto array = box_array; @@ -197,10 +243,9 @@ float PostProcessor::BoxScoreFast(std::vector> box_array, return score; } -std::vector>> -PostProcessor::BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap, - const float &box_thresh, - const float &det_db_unclip_ratio) { +std::vector>> PostProcessor::BoxesFromBitmap( + const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh, + const float &det_db_unclip_ratio, const bool &use_polygon_score) { const int min_size = 3; const int max_candidates = 1000; @@ -234,7 +279,12 @@ PostProcessor::BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap, } float score; - score = BoxScoreFast(array, pred); + if (use_polygon_score) + /* compute using polygon*/ + score = PolygonScoreAcc(contours[_i], pred); + else + score = BoxScoreFast(array, pred); + if (score < box_thresh) continue; diff --git a/deploy/cpp_infer/src/preprocess_op.cpp b/deploy/cpp_infer/src/preprocess_op.cpp index 87d8dbbd774f7dd29843d9a2c2533368f1914272..23c51c2008dc7280ce4d6c232ed766dbf2a53226 100644 --- a/deploy/cpp_infer/src/preprocess_op.cpp +++ b/deploy/cpp_infer/src/preprocess_op.cpp @@ -47,16 +47,13 @@ void Normalize::Run(cv::Mat *im, const std::vector &mean, e /= 255.0; } (*im).convertTo(*im, CV_32FC3, e); - for (int h = 0; h < im->rows; h++) { - for (int w = 0; w < im->cols; w++) { - im->at(h, w)[0] = - (im->at(h, w)[0] - mean[0]) * scale[0]; - im->at(h, w)[1] = - (im->at(h, w)[1] - mean[1]) * scale[1]; - im->at(h, w)[2] = - (im->at(h, w)[2] - mean[2]) * scale[2]; - } + std::vector bgr_channels(3); + cv::split(*im, bgr_channels); + for (auto i = 0; i < bgr_channels.size(); i++) { + bgr_channels[i].convertTo(bgr_channels[i], CV_32FC1, 1.0 * scale[i], + (0.0 - mean[i]) * scale[i]); } + cv::merge(bgr_channels, *im); } void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img, @@ -77,28 +74,13 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img, int resize_h = int(float(h) * ratio); int resize_w = int(float(w) * ratio); - if (resize_h % 32 == 0) - resize_h = resize_h; - else if (resize_h / 32 < 1 + 1e-5) - resize_h = 32; - else - resize_h = (resize_h / 32) * 32; - if (resize_w % 32 == 0) - resize_w = resize_w; - else if (resize_w / 32 < 1 + 1e-5) - resize_w = 32; - else - resize_w = (resize_w / 32) * 32; - if (!use_tensorrt) { - cv::resize(img, resize_img, cv::Size(resize_w, resize_h)); - ratio_h = float(resize_h) / float(h); - ratio_w = float(resize_w) / float(w); - } else { - cv::resize(img, resize_img, cv::Size(640, 640)); - ratio_h = float(640) / float(h); - ratio_w = float(640) / float(w); - } + resize_h = max(int(round(float(resize_h) / 32) * 32), 32); + resize_w = max(int(round(float(resize_w) / 32) * 32), 32); + + cv::resize(img, resize_img, cv::Size(resize_w, resize_h)); + ratio_h = float(resize_h) / float(h); + ratio_w = float(resize_w) / float(w); } void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio, @@ -117,23 +99,12 @@ void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio, resize_w = imgW; else resize_w = int(ceilf(imgH * ratio)); - if (!use_tensorrt) { - cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f, - cv::INTER_LINEAR); - cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, - int(imgW - resize_img.cols), cv::BORDER_CONSTANT, - {127, 127, 127}); - } else { - int k = int(img.cols * 32 / img.rows); - if (k >= 100) { - cv::resize(img, resize_img, cv::Size(100, 32), 0.f, 0.f, - cv::INTER_LINEAR); - } else { - cv::resize(img, resize_img, cv::Size(k, 32), 0.f, 0.f, cv::INTER_LINEAR); - cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, int(100 - k), - cv::BORDER_CONSTANT, {127, 127, 127}); - } - } + + cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f, + cv::INTER_LINEAR); + cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, + int(imgW - resize_img.cols), cv::BORDER_CONSTANT, + {127, 127, 127}); } void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, @@ -151,15 +122,11 @@ void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, else resize_w = int(ceilf(imgH * ratio)); - if (!use_tensorrt) { - cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f, - cv::INTER_LINEAR); - if (resize_w < imgW) { - cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, imgW - resize_w, - cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); - } - } else { - cv::resize(img, resize_img, cv::Size(100, 32), 0.f, 0.f, cv::INTER_LINEAR); + cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f, + cv::INTER_LINEAR); + if (resize_w < imgW) { + cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, imgW - resize_w, + cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); } } diff --git a/deploy/cpp_infer/src/utility.cpp b/deploy/cpp_infer/src/utility.cpp index c1c9d9382a06432daca71eb7b08acb8b19b8ee98..2cd84f7e8dbdd8144b5337f55b3f3a62ed43d5b3 100644 --- a/deploy/cpp_infer/src/utility.cpp +++ b/deploy/cpp_infer/src/utility.cpp @@ -12,12 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include #include #include +#include +#include #include -#include - namespace PaddleOCR { std::vector Utility::ReadDict(const std::string &path) { @@ -57,4 +59,37 @@ void Utility::VisualizeBboxes( << std::endl; } +// list all files under a directory +void Utility::GetAllFiles(const char *dir_name, + std::vector &all_inputs) { + if (NULL == dir_name) { + std::cout << " dir_name is null ! " << std::endl; + return; + } + struct stat s; + lstat(dir_name, &s); + if (!S_ISDIR(s.st_mode)) { + std::cout << "dir_name is not a valid directory !" << std::endl; + all_inputs.push_back(dir_name); + return; + } else { + struct dirent *filename; // return value for readdir() + DIR *dir; // return value for opendir() + dir = opendir(dir_name); + if (NULL == dir) { + std::cout << "Can not open dir " << dir_name << std::endl; + return; + } + std::cout << "Successfully opened the dir !" << std::endl; + while ((filename = readdir(dir)) != NULL) { + if (strcmp(filename->d_name, ".") == 0 || + strcmp(filename->d_name, "..") == 0) + continue; + // img_dir + std::string("/") + all_inputs[0]; + all_inputs.push_back(dir_name + std::string("/") + + std::string(filename->d_name)); + } + } +} + } // namespace PaddleOCR \ No newline at end of file diff --git a/deploy/cpp_infer/tools/build.sh b/deploy/cpp_infer/tools/build.sh index 606539487fce82adf817e7a3ee300e3bf890643b..3b36e8f12c71360001cc1e7af83ce46f42227263 100755 --- a/deploy/cpp_infer/tools/build.sh +++ b/deploy/cpp_infer/tools/build.sh @@ -1,7 +1,7 @@ -OPENCV_DIR=your_opencv_dir -LIB_DIR=your_paddle_inference_dir -CUDA_LIB_DIR=your_cuda_lib_dir -CUDNN_LIB_DIR=your_cudnn_lib_dir +OPENCV_DIR=/paddle/test/opencv-3.4.7/opencv3 +LIB_DIR=/paddle/test/PaddleOCR/deploy/paddle_inference +CUDA_LIB_DIR=/usr/local/cuda/lib64 +CUDNN_LIB_DIR=/usr/lib/x86_64-linux-gnu/ BUILD_DIR=build rm -rf ${BUILD_DIR} @@ -18,3 +18,5 @@ cmake .. \ -DCUDA_LIB=${CUDA_LIB_DIR} \ make -j + + diff --git a/deploy/cpp_infer/tools/config.txt b/deploy/cpp_infer/tools/config.txt index 28085ca408d279fc61a1bce1abf1df9c05115c78..d4d66d65225bc9d1d4d62f45550db71fb5d8414e 100644 --- a/deploy/cpp_infer/tools/config.txt +++ b/deploy/cpp_infer/tools/config.txt @@ -3,13 +3,14 @@ use_gpu 0 gpu_id 0 gpu_mem 4000 cpu_math_library_num_threads 10 -use_mkldnn 1 +use_mkldnn 0 # det config max_side_len 960 det_db_thresh 0.3 det_db_box_thresh 0.5 det_db_unclip_ratio 1.6 +use_polygon_score 1 det_model_dir ./inference/ch_ppocr_mobile_v2.0_det_infer/ # cls config @@ -19,10 +20,10 @@ cls_thresh 0.9 # rec config rec_model_dir ./inference/ch_ppocr_mobile_v2.0_rec_infer/ -char_list_file ../../ppocr/utils/ppocr_keys_v1.txt +char_list_file ../../ppocr/utils/ppocr_keys_v1.txt # show the detection results -visualize 1 +visualize 0 # use_tensorrt use_tensorrt 0