From dd9f48da2ebb98139147f49eab052895a6b64e53 Mon Sep 17 00:00:00 2001 From: WenmuZhou <572459439@qq.com> Date: Tue, 20 Sep 2022 03:40:05 +0000 Subject: [PATCH] fix bug --- deploy/cpp_infer/include/ocr_cls.h | 16 +----- deploy/cpp_infer/include/ocr_det.h | 20 ++------ deploy/cpp_infer/include/ocr_rec.h | 19 +------ deploy/cpp_infer/include/paddleocr.h | 19 ------- deploy/cpp_infer/include/paddlestructure.h | 19 ------- deploy/cpp_infer/include/postprocess_op.h | 41 +-------------- deploy/cpp_infer/include/preprocess_op.h | 21 +++----- deploy/cpp_infer/include/structure_layout.h | 19 +------ deploy/cpp_infer/include/structure_table.h | 19 +------ deploy/cpp_infer/include/utility.h | 8 +-- deploy/cpp_infer/src/main.cpp | 29 +++++------ deploy/cpp_infer/src/ocr_cls.cpp | 10 ++-- deploy/cpp_infer/src/ocr_det.cpp | 7 ++- deploy/cpp_infer/src/ocr_rec.cpp | 16 +++--- deploy/cpp_infer/src/paddleocr.cpp | 2 +- deploy/cpp_infer/src/paddlestructure.cpp | 18 +++---- deploy/cpp_infer/src/postprocess_op.cpp | 56 +++++++++++++++------ deploy/cpp_infer/src/preprocess_op.cpp | 27 +++------- deploy/cpp_infer/src/structure_layout.cpp | 6 +-- deploy/cpp_infer/src/structure_table.cpp | 6 +-- deploy/cpp_infer/src/utility.cpp | 10 +++- 21 files changed, 122 insertions(+), 266 deletions(-) diff --git a/deploy/cpp_infer/include/ocr_cls.h b/deploy/cpp_infer/include/ocr_cls.h index f5429a7c..f5a03565 100644 --- a/deploy/cpp_infer/include/ocr_cls.h +++ b/deploy/cpp_infer/include/ocr_cls.h @@ -14,26 +14,12 @@ #pragma once -#include "opencv2/core.hpp" -#include "opencv2/imgcodecs.hpp" -#include "opencv2/imgproc.hpp" #include "paddle_api.h" #include "paddle_inference_api.h" -#include -#include -#include -#include -#include - -#include -#include -#include #include #include -using namespace paddle_infer; - namespace PaddleOCR { class Classifier { @@ -66,7 +52,7 @@ public: std::vector &cls_scores, std::vector ×); private: - std::shared_ptr predictor_; + std::shared_ptr predictor_; bool use_gpu_ = false; int gpu_id_ = 0; diff --git a/deploy/cpp_infer/include/ocr_det.h b/deploy/cpp_infer/include/ocr_det.h index d1421b10..9f6f2520 100644 --- a/deploy/cpp_infer/include/ocr_det.h +++ b/deploy/cpp_infer/include/ocr_det.h @@ -14,26 +14,12 @@ #pragma once -#include "opencv2/core.hpp" -#include "opencv2/imgcodecs.hpp" -#include "opencv2/imgproc.hpp" #include "paddle_api.h" #include "paddle_inference_api.h" -#include -#include -#include -#include -#include - -#include -#include -#include #include #include -using namespace paddle_infer; - namespace PaddleOCR { class DBDetector { @@ -41,7 +27,7 @@ public: explicit DBDetector(const std::string &model_dir, const bool &use_gpu, const int &gpu_id, const int &gpu_mem, const int &cpu_math_library_num_threads, - const bool &use_mkldnn, const string &limit_type, + const bool &use_mkldnn, const std::string &limit_type, const int &limit_side_len, const double &det_db_thresh, const double &det_db_box_thresh, const double &det_db_unclip_ratio, @@ -77,7 +63,7 @@ public: std::vector ×); private: - std::shared_ptr predictor_; + std::shared_ptr predictor_; bool use_gpu_ = false; int gpu_id_ = 0; @@ -85,7 +71,7 @@ private: int cpu_math_library_num_threads_ = 4; bool use_mkldnn_ = false; - string limit_type_ = "max"; + std::string limit_type_ = "max"; int limit_side_len_ = 960; double det_db_thresh_ = 0.3; diff --git a/deploy/cpp_infer/include/ocr_rec.h b/deploy/cpp_infer/include/ocr_rec.h index 30f8efa9..257c2610 100644 --- a/deploy/cpp_infer/include/ocr_rec.h +++ b/deploy/cpp_infer/include/ocr_rec.h @@ -14,27 +14,12 @@ #pragma once -#include "opencv2/core.hpp" -#include "opencv2/imgcodecs.hpp" -#include "opencv2/imgproc.hpp" #include "paddle_api.h" #include "paddle_inference_api.h" -#include -#include -#include -#include -#include - -#include -#include -#include #include -#include #include -using namespace paddle_infer; - namespace PaddleOCR { class CRNNRecognizer { @@ -42,7 +27,7 @@ public: explicit CRNNRecognizer(const std::string &model_dir, const bool &use_gpu, const int &gpu_id, const int &gpu_mem, const int &cpu_math_library_num_threads, - const bool &use_mkldnn, const string &label_path, + const bool &use_mkldnn, const std::string &label_path, const bool &use_tensorrt, const std::string &precision, const int &rec_batch_num, const int &rec_img_h, @@ -75,7 +60,7 @@ public: std::vector &rec_text_scores, std::vector ×); private: - std::shared_ptr predictor_; + std::shared_ptr predictor_; bool use_gpu_ = false; int gpu_id_ = 0; diff --git a/deploy/cpp_infer/include/paddleocr.h b/deploy/cpp_infer/include/paddleocr.h index 225672ec..16750a15 100644 --- a/deploy/cpp_infer/include/paddleocr.h +++ b/deploy/cpp_infer/include/paddleocr.h @@ -14,28 +14,9 @@ #pragma once -#include "opencv2/core.hpp" -#include "opencv2/imgcodecs.hpp" -#include "opencv2/imgproc.hpp" -#include "paddle_api.h" -#include "paddle_inference_api.h" -#include -#include -#include -#include -#include - -#include -#include -#include - #include #include #include -#include -#include - -using namespace paddle_infer; namespace PaddleOCR { diff --git a/deploy/cpp_infer/include/paddlestructure.h b/deploy/cpp_infer/include/paddlestructure.h index bee888a8..8478a85c 100644 --- a/deploy/cpp_infer/include/paddlestructure.h +++ b/deploy/cpp_infer/include/paddlestructure.h @@ -14,28 +14,9 @@ #pragma once -#include "opencv2/core.hpp" -#include "opencv2/imgcodecs.hpp" -#include "opencv2/imgproc.hpp" -#include "paddle_api.h" -#include "paddle_inference_api.h" -#include -#include -#include -#include -#include - -#include -#include -#include - #include -#include #include #include -#include - -using namespace paddle_infer; namespace PaddleOCR { diff --git a/deploy/cpp_infer/include/postprocess_op.h b/deploy/cpp_infer/include/postprocess_op.h index 2bff298c..e267eeee 100644 --- a/deploy/cpp_infer/include/postprocess_op.h +++ b/deploy/cpp_infer/include/postprocess_op.h @@ -14,24 +14,9 @@ #pragma once -#include "opencv2/core.hpp" -#include "opencv2/imgcodecs.hpp" -#include "opencv2/imgproc.hpp" -#include -#include -#include -#include -#include - -#include -#include -#include - #include "include/clipper.h" #include "include/utility.h" -using namespace std; - namespace PaddleOCR { class DBPostProcessor { @@ -92,23 +77,7 @@ private: class TablePostProcessor { public: - void init(std::string label_path, bool merge_no_span_structure = true) { - this->label_list_ = Utility::ReadDict(label_path); - if (merge_no_span_structure) { - this->label_list_.push_back(""); - std::vector::iterator it; - for (it = this->label_list_.begin(); it != this->label_list_.end();) { - if (*it == "") { - it = this->label_list_.erase(it); - } else { - ++it; - } - } - } - // add_special_char - this->label_list_.insert(this->label_list_.begin(), this->beg); - this->label_list_.push_back(this->end); - } + void init(std::string label_path, bool merge_no_span_structure = true); void Run(std::vector &loc_preds, std::vector &structure_probs, std::vector &rec_scores, std::vector &loc_preds_shape, std::vector &structure_probs_shape, @@ -126,13 +95,7 @@ class PicodetPostProcessor { public: void init(std::string label_path, const double score_threshold = 0.4, const double nms_threshold = 0.5, - const std::vector &fpn_stride = {8, 16, 32, 64}) { - this->label_list_ = Utility::ReadDict(label_path); - this->score_threshold_ = score_threshold; - this->nms_threshold_ = nms_threshold; - this->num_class_ = label_list_.size(); - this->fpn_stride_ = fpn_stride; - } + const std::vector &fpn_stride = {8, 16, 32, 64}); void Run(std::vector &results, std::vector> outs, std::vector ori_shape, std::vector resize_shape, int eg_max); diff --git a/deploy/cpp_infer/include/preprocess_op.h b/deploy/cpp_infer/include/preprocess_op.h index 46cda1ca..0b2e1833 100644 --- a/deploy/cpp_infer/include/preprocess_op.h +++ b/deploy/cpp_infer/include/preprocess_op.h @@ -14,21 +14,12 @@ #pragma once -#include "opencv2/core.hpp" -#include "opencv2/imgcodecs.hpp" -#include "opencv2/imgproc.hpp" -#include -#include #include -#include #include -#include -#include -#include - -using namespace std; -using namespace paddle; +#include "opencv2/core.hpp" +#include "opencv2/imgcodecs.hpp" +#include "opencv2/imgproc.hpp" namespace PaddleOCR { @@ -51,9 +42,9 @@ public: class ResizeImgType0 { public: - virtual void Run(const cv::Mat &img, cv::Mat &resize_img, string limit_type, - int limit_side_len, float &ratio_h, float &ratio_w, - bool use_tensorrt); + virtual void Run(const cv::Mat &img, cv::Mat &resize_img, + std::string limit_type, int limit_side_len, float &ratio_h, + float &ratio_w, bool use_tensorrt); }; class CrnnResizeImg { diff --git a/deploy/cpp_infer/include/structure_layout.h b/deploy/cpp_infer/include/structure_layout.h index f3afb98f..3dd60572 100644 --- a/deploy/cpp_infer/include/structure_layout.h +++ b/deploy/cpp_infer/include/structure_layout.h @@ -14,26 +14,11 @@ #pragma once -#include "opencv2/core.hpp" -#include "opencv2/imgcodecs.hpp" -#include "opencv2/imgproc.hpp" #include "paddle_api.h" #include "paddle_inference_api.h" -#include -#include -#include -#include -#include - -#include -#include -#include #include #include -#include - -using namespace paddle_infer; namespace PaddleOCR { @@ -42,7 +27,7 @@ public: explicit StructureLayoutRecognizer( const std::string &model_dir, const bool &use_gpu, const int &gpu_id, const int &gpu_mem, const int &cpu_math_library_num_threads, - const bool &use_mkldnn, const string &label_path, + const bool &use_mkldnn, const std::string &label_path, const bool &use_tensorrt, const std::string &precision, const double &layout_score_threshold, const double &layout_nms_threshold) { @@ -66,7 +51,7 @@ public: std::vector ×); private: - std::shared_ptr predictor_; + std::shared_ptr predictor_; bool use_gpu_ = false; int gpu_id_ = 0; diff --git a/deploy/cpp_infer/include/structure_table.h b/deploy/cpp_infer/include/structure_table.h index c09e6565..616e95d2 100644 --- a/deploy/cpp_infer/include/structure_table.h +++ b/deploy/cpp_infer/include/structure_table.h @@ -14,26 +14,11 @@ #pragma once -#include "opencv2/core.hpp" -#include "opencv2/imgcodecs.hpp" -#include "opencv2/imgproc.hpp" #include "paddle_api.h" #include "paddle_inference_api.h" -#include -#include -#include -#include -#include - -#include -#include -#include #include #include -#include - -using namespace paddle_infer; namespace PaddleOCR { @@ -42,7 +27,7 @@ public: explicit StructureTableRecognizer( const std::string &model_dir, const bool &use_gpu, const int &gpu_id, const int &gpu_mem, const int &cpu_math_library_num_threads, - const bool &use_mkldnn, const string &label_path, + const bool &use_mkldnn, const std::string &label_path, const bool &use_tensorrt, const std::string &precision, const int &table_batch_num, const int &table_max_len, const bool &merge_no_span_structure) { @@ -70,7 +55,7 @@ public: std::vector ×); private: - std::shared_ptr predictor_; + std::shared_ptr predictor_; bool use_gpu_ = false; int gpu_id_ = 0; diff --git a/deploy/cpp_infer/include/utility.h b/deploy/cpp_infer/include/utility.h index ddf2eee1..7dfe03dd 100644 --- a/deploy/cpp_infer/include/utility.h +++ b/deploy/cpp_infer/include/utility.h @@ -41,8 +41,7 @@ struct OCRPredictResult { }; struct StructurePredictResult { - std::vector box; - std::vector box_float; + std::vector box; std::vector> cell_box; std::string type; std::vector text_res; @@ -60,7 +59,7 @@ public: const std::string &save_path); static void VisualizeBboxes(const cv::Mat &srcimg, - StructurePredictResult &structure_result, + const StructurePredictResult &structure_result, const std::string &save_path); template @@ -84,7 +83,8 @@ public: static void print_result(const std::vector &ocr_result); - static cv::Mat crop_image(cv::Mat &img, std::vector &area); + static cv::Mat crop_image(cv::Mat &img, const std::vector &area); + static cv::Mat crop_image(cv::Mat &img, const std::vector &area); static void sorted_boxes(std::vector &ocr_result); diff --git a/deploy/cpp_infer/src/main.cpp b/deploy/cpp_infer/src/main.cpp index a639614b..0c155dd0 100644 --- a/deploy/cpp_infer/src/main.cpp +++ b/deploy/cpp_infer/src/main.cpp @@ -75,7 +75,8 @@ void check_params() { } if (FLAGS_precision != "fp32" && FLAGS_precision != "fp16" && FLAGS_precision != "int8") { - cout << "precison should be 'fp32'(default), 'fp16' or 'int8'. " << endl; + std::cout << "precison should be 'fp32'(default), 'fp16' or 'int8'. " + << std::endl; exit(1); } } @@ -93,7 +94,7 @@ void ocr(std::vector &cv_all_img_names) { cv::Mat img = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); if (!img.data) { std::cerr << "[ERROR] image read failed! image path: " - << cv_all_img_names[i] << endl; + << cv_all_img_names[i] << std::endl; continue; } img_list.push_back(img); @@ -104,7 +105,7 @@ void ocr(std::vector &cv_all_img_names) { ocr.ocr(img_list, FLAGS_det, FLAGS_rec, FLAGS_cls); for (int i = 0; i < img_names.size(); ++i) { - cout << "predict img: " << cv_all_img_names[i] << endl; + std::cout << "predict img: " << cv_all_img_names[i] << std::endl; Utility::print_result(ocr_results[i]); if (FLAGS_visualize && FLAGS_det) { std::string file_name = Utility::basename(img_names[i]); @@ -126,11 +127,11 @@ void structure(std::vector &cv_all_img_names) { } for (int i = 0; i < cv_all_img_names.size(); i++) { - cout << "predict img: " << cv_all_img_names[i] << endl; + std::cout << "predict img: " << cv_all_img_names[i] << std::endl; cv::Mat img = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); if (!img.data) { std::cerr << "[ERROR] image read failed! image path: " - << cv_all_img_names[i] << endl; + << cv_all_img_names[i] << std::endl; continue; } @@ -156,14 +157,14 @@ void structure(std::vector &cv_all_img_names) { "_" + file_name); } } else { - cout << "count of ocr result is : " - << structure_results[j].text_res.size() << endl; + std::cout << "count of ocr result is : " + << structure_results[j].text_res.size() << std::endl; if (structure_results[j].text_res.size() > 0) { - cout << "********** print ocr result " - << "**********" << endl; + std::cout << "********** print ocr result " + << "**********" << std::endl; Utility::print_result(structure_results[j].text_res); - cout << "********** end print ocr result " - << "**********" << endl; + std::cout << "********** end print ocr result " + << "**********" << std::endl; } } } @@ -180,13 +181,13 @@ int main(int argc, char **argv) { if (!Utility::PathExists(FLAGS_image_dir)) { std::cerr << "[ERROR] image path not exist! image_dir: " << FLAGS_image_dir - << endl; + << std::endl; exit(1); } std::vector cv_all_img_names; cv::glob(FLAGS_image_dir, cv_all_img_names); - std::cout << "total images num: " << cv_all_img_names.size() << endl; + std::cout << "total images num: " << cv_all_img_names.size() << std::endl; if (!Utility::PathExists(FLAGS_output)) { Utility::CreateDir(FLAGS_output); @@ -196,6 +197,6 @@ int main(int argc, char **argv) { } else if (FLAGS_type == "structure") { structure(cv_all_img_names); } else { - std::cout << "only value in ['ocr','structure'] is supported" << endl; + std::cout << "only value in ['ocr','structure'] is supported" << std::endl; } } diff --git a/deploy/cpp_infer/src/ocr_cls.cpp b/deploy/cpp_infer/src/ocr_cls.cpp index 92d83600..abcfed12 100644 --- a/deploy/cpp_infer/src/ocr_cls.cpp +++ b/deploy/cpp_infer/src/ocr_cls.cpp @@ -32,7 +32,7 @@ void Classifier::Run(std::vector img_list, for (int beg_img_no = 0; beg_img_no < img_num; beg_img_no += this->cls_batch_num_) { auto preprocess_start = std::chrono::steady_clock::now(); - int end_img_no = min(img_num, beg_img_no + this->cls_batch_num_); + int end_img_no = std::min(img_num, beg_img_no + this->cls_batch_num_); int batch_num = end_img_no - beg_img_no; // preprocess std::vector norm_img_batch; @@ -97,7 +97,7 @@ void Classifier::Run(std::vector img_list, } void Classifier::LoadModel(const std::string &model_dir) { - AnalysisConfig config; + paddle_infer::Config config; config.SetModel(model_dir + "/inference.pdmodel", model_dir + "/inference.pdiparams"); @@ -112,9 +112,9 @@ void Classifier::LoadModel(const std::string &model_dir) { precision = paddle_infer::Config::Precision::kInt8; } config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false); - if (!Utility::PathExists("./trt_cls_shape.txt")){ + if (!Utility::PathExists("./trt_cls_shape.txt")) { config.CollectShapeRangeInfo("./trt_cls_shape.txt"); - } else { + } else { config.EnableTunedTensorRtDynamicShape("./trt_cls_shape.txt", true); } } @@ -136,6 +136,6 @@ void Classifier::LoadModel(const std::string &model_dir) { config.EnableMemoryOptim(); config.DisableGlogInfo(); - this->predictor_ = CreatePredictor(config); + this->predictor_ = paddle_infer::CreatePredictor(config); } } // namespace PaddleOCR diff --git a/deploy/cpp_infer/src/ocr_det.cpp b/deploy/cpp_infer/src/ocr_det.cpp index 030d5c2f..74fa09be 100644 --- a/deploy/cpp_infer/src/ocr_det.cpp +++ b/deploy/cpp_infer/src/ocr_det.cpp @@ -33,12 +33,11 @@ void DBDetector::LoadModel(const std::string &model_dir) { precision = paddle_infer::Config::Precision::kInt8; } config.EnableTensorRtEngine(1 << 30, 1, 20, precision, false, false); - if (!Utility::PathExists("./trt_det_shape.txt")){ + if (!Utility::PathExists("./trt_det_shape.txt")) { config.CollectShapeRangeInfo("./trt_det_shape.txt"); - } else { + } else { config.EnableTunedTensorRtDynamicShape("./trt_det_shape.txt", true); } - } } else { config.DisableGpu(); @@ -59,7 +58,7 @@ void DBDetector::LoadModel(const std::string &model_dir) { config.EnableMemoryOptim(); // config.DisableGlogInfo(); - this->predictor_ = CreatePredictor(config); + this->predictor_ = paddle_infer::CreatePredictor(config); } void DBDetector::Run(cv::Mat &img, diff --git a/deploy/cpp_infer/src/ocr_rec.cpp b/deploy/cpp_infer/src/ocr_rec.cpp index 088cb942..96715163 100644 --- a/deploy/cpp_infer/src/ocr_rec.cpp +++ b/deploy/cpp_infer/src/ocr_rec.cpp @@ -37,7 +37,7 @@ void CRNNRecognizer::Run(std::vector img_list, for (int beg_img_no = 0; beg_img_no < img_num; beg_img_no += this->rec_batch_num_) { auto preprocess_start = std::chrono::steady_clock::now(); - int end_img_no = min(img_num, beg_img_no + this->rec_batch_num_); + int end_img_no = std::min(img_num, beg_img_no + this->rec_batch_num_); int batch_num = end_img_no - beg_img_no; int imgH = this->rec_image_shape_[1]; int imgW = this->rec_image_shape_[2]; @@ -46,7 +46,7 @@ void CRNNRecognizer::Run(std::vector img_list, int h = img_list[indices[ino]].rows; int w = img_list[indices[ino]].cols; float wh_ratio = w * 1.0 / h; - max_wh_ratio = max(max_wh_ratio, wh_ratio); + max_wh_ratio = std::max(max_wh_ratio, wh_ratio); } int batch_width = imgW; @@ -60,7 +60,7 @@ void CRNNRecognizer::Run(std::vector img_list, this->normalize_op_.Run(&resize_img, this->mean_, this->scale_, this->is_scale_); norm_img_batch.push_back(resize_img); - batch_width = max(resize_img.cols, batch_width); + batch_width = std::max(resize_img.cols, batch_width); } std::vector input(batch_num * 3 * imgH * batch_width, 0.0f); @@ -115,7 +115,7 @@ void CRNNRecognizer::Run(std::vector img_list, last_index = argmax_idx; } score /= count; - if (isnan(score)) { + if (std::isnan(score)) { continue; } rec_texts[indices[beg_img_no + m]] = str_res; @@ -130,7 +130,6 @@ void CRNNRecognizer::Run(std::vector img_list, } void CRNNRecognizer::LoadModel(const std::string &model_dir) { - // AnalysisConfig config; paddle_infer::Config config; config.SetModel(model_dir + "/inference.pdmodel", model_dir + "/inference.pdiparams"); @@ -147,12 +146,11 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) { if (this->precision_ == "int8") { precision = paddle_infer::Config::Precision::kInt8; } - if (!Utility::PathExists("./trt_rec_shape.txt")){ + if (!Utility::PathExists("./trt_rec_shape.txt")) { config.CollectShapeRangeInfo("./trt_rec_shape.txt"); - } else { + } else { config.EnableTunedTensorRtDynamicShape("./trt_rec_shape.txt", true); } - } } else { config.DisableGpu(); @@ -177,7 +175,7 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) { config.EnableMemoryOptim(); // config.DisableGlogInfo(); - this->predictor_ = CreatePredictor(config); + this->predictor_ = paddle_infer::CreatePredictor(config); } } // namespace PaddleOCR diff --git a/deploy/cpp_infer/src/paddleocr.cpp b/deploy/cpp_infer/src/paddleocr.cpp index 7417e966..86747c60 100644 --- a/deploy/cpp_infer/src/paddleocr.cpp +++ b/deploy/cpp_infer/src/paddleocr.cpp @@ -16,7 +16,7 @@ #include #include "auto_log/autolog.h" -#include + namespace PaddleOCR { PPOCR::PPOCR() { diff --git a/deploy/cpp_infer/src/paddlestructure.cpp b/deploy/cpp_infer/src/paddlestructure.cpp index 73df39e1..b2e35f8c 100644 --- a/deploy/cpp_infer/src/paddlestructure.cpp +++ b/deploy/cpp_infer/src/paddlestructure.cpp @@ -16,8 +16,6 @@ #include #include "auto_log/autolog.h" -#include -#include namespace PaddleOCR { @@ -50,7 +48,7 @@ PaddleStructure::structure(cv::Mat srcimg, bool layout, bool table, bool ocr) { } else { StructurePredictResult res; res.type = "table"; - res.box = std::vector(4, 0); + res.box = std::vector(4, 0.0); res.box[2] = img.cols; res.box[3] = img.rows; structure_results.push_back(res); @@ -108,10 +106,10 @@ void PaddleStructure::table(cv::Mat img, std::vector ocr_box; for (int j = 0; j < ocr_result.size(); j++) { ocr_box = Utility::xyxyxyxy2xyxy(ocr_result[j].box); - ocr_box[0] = max(0, ocr_box[0] - expand_pixel); - ocr_box[1] = max(0, ocr_box[1] - expand_pixel), - ocr_box[2] = min(img_list[i].cols, ocr_box[2] + expand_pixel); - ocr_box[3] = min(img_list[i].rows, ocr_box[3] + expand_pixel); + ocr_box[0] = std::max(0, ocr_box[0] - expand_pixel); + ocr_box[1] = std::max(0, ocr_box[1] - expand_pixel), + ocr_box[2] = std::min(img_list[i].cols, ocr_box[2] + expand_pixel); + ocr_box[3] = std::min(img_list[i].rows, ocr_box[3] + expand_pixel); cv::Mat crop_img = Utility::crop_image(img_list[i], ocr_box); rec_img_list.push_back(crop_img); @@ -132,8 +130,8 @@ PaddleStructure::rebuild_table(std::vector structure_html_tags, std::vector> structure_boxes, std::vector &ocr_result) { // match text in same cell - std::vector> matched(structure_boxes.size(), - std::vector()); + std::vector> matched(structure_boxes.size(), + std::vector()); std::vector ocr_box; std::vector structure_box; @@ -233,7 +231,7 @@ float PaddleStructure::dis(std::vector &box1, std::vector &box2) { abs(x1_2 - x1_1) + abs(y1_2 - y1_1) + abs(x2_2 - x2_1) + abs(y2_2 - y2_1); float dis_2 = abs(x1_2 - x1_1) + abs(y1_2 - y1_1); float dis_3 = abs(x2_2 - x2_1) + abs(y2_2 - y2_1); - return dis + min(dis_2, dis_3); + return dis + std::min(dis_2, dis_3); } void PaddleStructure::reset_timer() { diff --git a/deploy/cpp_infer/src/postprocess_op.cpp b/deploy/cpp_infer/src/postprocess_op.cpp index 0a4da675..c139fa72 100644 --- a/deploy/cpp_infer/src/postprocess_op.cpp +++ b/deploy/cpp_infer/src/postprocess_op.cpp @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include namespace PaddleOCR { @@ -352,6 +351,25 @@ std::vector>> DBPostProcessor::FilterTagDetRes( return root_points; } +void TablePostProcessor::init(std::string label_path, + bool merge_no_span_structure) { + this->label_list_ = Utility::ReadDict(label_path); + if (merge_no_span_structure) { + this->label_list_.push_back(""); + std::vector::iterator it; + for (it = this->label_list_.begin(); it != this->label_list_.end();) { + if (*it == "") { + it = this->label_list_.erase(it); + } else { + ++it; + } + } + } + // add_special_char + this->label_list_.insert(this->label_list_.begin(), this->beg); + this->label_list_.push_back(this->end); +} + void TablePostProcessor::Run( std::vector &loc_preds, std::vector &structure_probs, std::vector &rec_scores, std::vector &loc_preds_shape, @@ -412,7 +430,7 @@ void TablePostProcessor::Run( } } score /= count; - if (isnan(score) || rec_boxes.size() == 0) { + if (std::isnan(score) || rec_boxes.size() == 0) { score = -1; } rec_scores.push_back(score); @@ -421,6 +439,17 @@ void TablePostProcessor::Run( } } +void PicodetPostProcessor::init(std::string label_path, + const double score_threshold, + const double nms_threshold, + const std::vector &fpn_stride) { + this->label_list_ = Utility::ReadDict(label_path); + this->score_threshold_ = score_threshold; + this->nms_threshold_ = nms_threshold; + this->num_class_ = label_list_.size(); + this->fpn_stride_ = fpn_stride; +} + void PicodetPostProcessor::Run(std::vector &results, std::vector> outs, std::vector ori_shape, @@ -469,12 +498,10 @@ void PicodetPostProcessor::Run(std::vector &results, } this->nms(bbox_results[i], this->nms_threshold_); for (auto box : bbox_results[i]) { - box.box_float[0] = box.box_float[0] / scale_factor_w; - box.box_float[2] = box.box_float[2] / scale_factor_w; - box.box_float[1] = box.box_float[1] / scale_factor_h; - box.box_float[3] = box.box_float[3] / scale_factor_h; - box.box = {(int)box.box_float[0], (int)box.box_float[1], - (int)box.box_float[2], (int)box.box_float[3]}; + box.box[0] = box.box[0] / scale_factor_w; + box.box[2] = box.box[2] / scale_factor_w; + box.box[1] = box.box[1] / scale_factor_h; + box.box[3] = box.box[3] / scale_factor_h; results.push_back(box); } } @@ -501,13 +528,13 @@ PicodetPostProcessor::disPred2Bbox(std::vector bbox_pred, int label, dis_pred[i] = dis; } - float xmin_float = (std::max)(ct_x - dis_pred[0], .0f); - float ymin_float = (std::max)(ct_y - dis_pred[1], .0f); - float xmax_float = (std::min)(ct_x + dis_pred[2], (float)im_shape[1]); - float ymax_float = (std::min)(ct_y + dis_pred[3], (float)im_shape[0]); + float xmin = (std::max)(ct_x - dis_pred[0], .0f); + float ymin = (std::max)(ct_y - dis_pred[1], .0f); + float xmax = (std::min)(ct_x + dis_pred[2], (float)im_shape[1]); + float ymax = (std::min)(ct_y + dis_pred[3], (float)im_shape[0]); StructurePredictResult result_item; - result_item.box_float = {xmin_float, ymin_float, xmax_float, ymax_float}; + result_item.box = {xmin, ymin, xmax, ymax}; result_item.type = this->label_list_[label]; result_item.confidence = score; @@ -530,8 +557,7 @@ void PicodetPostProcessor::nms(std::vector &input_boxes, if (picked[j] == 0) { continue; } - float iou = - Utility::iou(input_boxes[i].box_float, input_boxes[j].box_float); + float iou = Utility::iou(input_boxes[i].box, input_boxes[j].box); if (iou > nms_threshold) { picked[j] = 0; } diff --git a/deploy/cpp_infer/src/preprocess_op.cpp b/deploy/cpp_infer/src/preprocess_op.cpp index eb448a7e..19cd6c3f 100644 --- a/deploy/cpp_infer/src/preprocess_op.cpp +++ b/deploy/cpp_infer/src/preprocess_op.cpp @@ -12,21 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "opencv2/core.hpp" -#include "opencv2/imgcodecs.hpp" -#include "opencv2/imgproc.hpp" -#include "paddle_api.h" -#include "paddle_inference_api.h" -#include -#include -#include -#include -#include - -#include -#include -#include - #include namespace PaddleOCR { @@ -69,13 +54,13 @@ void Normalize::Run(cv::Mat *im, const std::vector &mean, } void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img, - string limit_type, int limit_side_len, float &ratio_h, - float &ratio_w, bool use_tensorrt) { + std::string limit_type, int limit_side_len, + float &ratio_h, float &ratio_w, bool use_tensorrt) { int w = img.cols; int h = img.rows; float ratio = 1.f; if (limit_type == "min") { - int min_wh = min(h, w); + int min_wh = std::min(h, w); if (min_wh < limit_side_len) { if (h < w) { ratio = float(limit_side_len) / float(h); @@ -84,7 +69,7 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img, } } } else { - int max_wh = max(h, w); + int max_wh = std::max(h, w); if (max_wh > limit_side_len) { if (h > w) { ratio = float(limit_side_len) / float(h); @@ -97,8 +82,8 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img, int resize_h = int(float(h) * ratio); int resize_w = int(float(w) * ratio); - resize_h = max(int(round(float(resize_h) / 32) * 32), 32); - resize_w = max(int(round(float(resize_w) / 32) * 32), 32); + resize_h = std::max(int(round(float(resize_h) / 32) * 32), 32); + resize_w = std::max(int(round(float(resize_w) / 32) * 32), 32); cv::resize(img, resize_img, cv::Size(resize_w, resize_h)); ratio_h = float(resize_h) / float(h); diff --git a/deploy/cpp_infer/src/structure_layout.cpp b/deploy/cpp_infer/src/structure_layout.cpp index 4f587d2b..922959ae 100644 --- a/deploy/cpp_infer/src/structure_layout.cpp +++ b/deploy/cpp_infer/src/structure_layout.cpp @@ -94,7 +94,7 @@ void StructureLayoutRecognizer::Run(cv::Mat img, } void StructureLayoutRecognizer::LoadModel(const std::string &model_dir) { - AnalysisConfig config; + paddle_infer::Config config; if (Utility::PathExists(model_dir + "/inference.pdmodel") && Utility::PathExists(model_dir + "/inference.pdiparams")) { config.SetModel(model_dir + "/inference.pdmodel", @@ -105,7 +105,7 @@ void StructureLayoutRecognizer::LoadModel(const std::string &model_dir) { model_dir + "/model.pdiparams"); } else { std::cerr << "[ERROR] not find model.pdiparams or inference.pdiparams in " - << model_dir << endl; + << model_dir << std::endl; exit(1); } @@ -144,6 +144,6 @@ void StructureLayoutRecognizer::LoadModel(const std::string &model_dir) { config.EnableMemoryOptim(); config.DisableGlogInfo(); - this->predictor_ = CreatePredictor(config); + this->predictor_ = paddle_infer::CreatePredictor(config); } } // namespace PaddleOCR diff --git a/deploy/cpp_infer/src/structure_table.cpp b/deploy/cpp_infer/src/structure_table.cpp index 1aa2e624..52f5d9ee 100644 --- a/deploy/cpp_infer/src/structure_table.cpp +++ b/deploy/cpp_infer/src/structure_table.cpp @@ -34,7 +34,7 @@ void StructureTableRecognizer::Run( beg_img_no += this->table_batch_num_) { // preprocess auto preprocess_start = std::chrono::steady_clock::now(); - int end_img_no = min(img_num, beg_img_no + this->table_batch_num_); + int end_img_no = std::min(img_num, beg_img_no + this->table_batch_num_); int batch_num = end_img_no - beg_img_no; std::vector norm_img_batch; std::vector width_list; @@ -118,7 +118,7 @@ void StructureTableRecognizer::Run( } void StructureTableRecognizer::LoadModel(const std::string &model_dir) { - AnalysisConfig config; + paddle_infer::Config config; config.SetModel(model_dir + "/inference.pdmodel", model_dir + "/inference.pdiparams"); @@ -157,6 +157,6 @@ void StructureTableRecognizer::LoadModel(const std::string &model_dir) { config.EnableMemoryOptim(); config.DisableGlogInfo(); - this->predictor_ = CreatePredictor(config); + this->predictor_ = paddle_infer::CreatePredictor(config); } } // namespace PaddleOCR diff --git a/deploy/cpp_infer/src/utility.cpp b/deploy/cpp_infer/src/utility.cpp index 6b41a523..4a8b1814 100644 --- a/deploy/cpp_infer/src/utility.cpp +++ b/deploy/cpp_infer/src/utility.cpp @@ -66,7 +66,7 @@ void Utility::VisualizeBboxes(const cv::Mat &srcimg, } void Utility::VisualizeBboxes(const cv::Mat &srcimg, - StructurePredictResult &structure_result, + const StructurePredictResult &structure_result, const std::string &save_path) { cv::Mat img_vis; srcimg.copyTo(img_vis); @@ -281,7 +281,7 @@ void Utility::print_result(const std::vector &ocr_result) { } } -cv::Mat Utility::crop_image(cv::Mat &img, std::vector &box) { +cv::Mat Utility::crop_image(cv::Mat &img, const std::vector &box) { cv::Mat crop_im; int crop_x1 = std::max(0, box[0]); int crop_y1 = std::max(0, box[1]); @@ -298,6 +298,12 @@ cv::Mat Utility::crop_image(cv::Mat &img, std::vector &box) { return crop_im; } +cv::Mat Utility::crop_image(cv::Mat &img, const std::vector &box) { + std::vector box_int = {(int)box[0], (int)box[1], (int)box[2], + (int)box[3]}; + return crop_image(img, box_int); +} + void Utility::sorted_boxes(std::vector &ocr_result) { std::sort(ocr_result.begin(), ocr_result.end(), Utility::comparison_box); if (ocr_result.size() > 0) { -- GitLab