diff --git a/deploy/cpp_infer/include/ocr_det.h b/deploy/cpp_infer/include/ocr_det.h index e5a31ed8e5ab6397c4fa67388252e2baef8b9dd7..657ab25d8854ec54c27d71485fe9eeddc65013c3 100644 --- a/deploy/cpp_infer/include/ocr_det.h +++ b/deploy/cpp_infer/include/ocr_det.h @@ -45,8 +45,9 @@ public: const double &det_db_thresh, const double &det_db_box_thresh, const double &det_db_unclip_ratio, - const bool &use_polygon_score, const bool &visualize, - const bool &use_tensorrt, const std::string &precision) { + const bool &use_polygon_score, const bool &use_dilation, + const bool &visualize, const bool &use_tensorrt, + const std::string &precision) { this->use_gpu_ = use_gpu; this->gpu_id_ = gpu_id; this->gpu_mem_ = gpu_mem; @@ -59,6 +60,7 @@ public: this->det_db_box_thresh_ = det_db_box_thresh; this->det_db_unclip_ratio_ = det_db_unclip_ratio; this->use_polygon_score_ = use_polygon_score; + this->use_dilation_ = use_dilation; this->visualize_ = visualize; this->use_tensorrt_ = use_tensorrt; @@ -71,7 +73,8 @@ public: void LoadModel(const std::string &model_dir); // Run predictor - void Run(cv::Mat &img, std::vector>> &boxes, std::vector *times); + void Run(cv::Mat &img, std::vector>> &boxes, + std::vector *times); private: std::shared_ptr predictor_; @@ -88,6 +91,7 @@ private: double det_db_box_thresh_ = 0.5; double det_db_unclip_ratio_ = 2.0; bool use_polygon_score_ = false; + bool use_dilation_ = false; bool visualize_ = true; bool use_tensorrt_ = false; diff --git a/deploy/cpp_infer/src/main.cpp b/deploy/cpp_infer/src/main.cpp index ae7b83f092c2386a4934043690c1dec5613d7d6c..664b10b2f579fd8681c65dcf1ded5ebe53d0424c 100644 --- a/deploy/cpp_infer/src/main.cpp +++ b/deploy/cpp_infer/src/main.cpp @@ -54,6 +54,7 @@ DEFINE_double(det_db_thresh, 0.3, "Threshold of det_db_thresh."); DEFINE_double(det_db_box_thresh, 0.6, "Threshold of det_db_box_thresh."); DEFINE_double(det_db_unclip_ratio, 1.5, "Threshold of det_db_unclip_ratio."); DEFINE_bool(use_polygon_score, false, "Whether use polygon score."); +DEFINE_bool(use_dilation, false, "Whether use the dilation on output map."); DEFINE_bool(visualize, true, "Whether show the detection results."); // classification related DEFINE_bool(use_angle_cls, false, "Whether use use_angle_cls."); @@ -85,8 +86,8 @@ int main_det(std::vector cv_all_img_names) { FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh, FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio, - FLAGS_use_polygon_score, FLAGS_visualize, FLAGS_use_tensorrt, - FLAGS_precision); + FLAGS_use_polygon_score, FLAGS_use_dilation, FLAGS_visualize, + FLAGS_use_tensorrt, FLAGS_precision); for (int i = 0; i < cv_all_img_names.size(); ++i) { // LOG(INFO) << "The predict img: " << cv_all_img_names[i]; @@ -175,8 +176,8 @@ int main_system(std::vector cv_all_img_names) { FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh, FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio, - FLAGS_use_polygon_score, FLAGS_visualize, FLAGS_use_tensorrt, - FLAGS_precision); + FLAGS_use_polygon_score, FLAGS_use_dilation, FLAGS_visualize, + FLAGS_use_tensorrt, FLAGS_precision); Classifier *cls = nullptr; if (FLAGS_use_angle_cls) { diff --git a/deploy/cpp_infer/src/ocr_det.cpp b/deploy/cpp_infer/src/ocr_det.cpp index a69f5ca1bd3ee7665f8b2f5610c67dd6feb7eb54..ad78999449d94dcaf2e336087de5c6837f3b233c 100644 --- a/deploy/cpp_infer/src/ocr_det.cpp +++ b/deploy/cpp_infer/src/ocr_det.cpp @@ -14,7 +14,6 @@ #include - namespace PaddleOCR { void DBDetector::LoadModel(const std::string &model_dir) { @@ -30,13 +29,10 @@ void DBDetector::LoadModel(const std::string &model_dir) { if (this->precision_ == "fp16") { precision = paddle_infer::Config::Precision::kHalf; } - if (this->precision_ == "int8") { + if (this->precision_ == "int8") { precision = paddle_infer::Config::Precision::kInt8; - } - config.EnableTensorRtEngine( - 1 << 20, 10, 3, - precision, - false, false); + } + config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false); std::map> min_input_shape = { {"x", {1, 3, 50, 50}}, {"conv2d_92.tmp_0", {1, 96, 20, 20}}, @@ -105,7 +101,7 @@ void DBDetector::Run(cv::Mat &img, cv::Mat srcimg; cv::Mat resize_img; img.copyTo(srcimg); - + auto preprocess_start = std::chrono::steady_clock::now(); this->resize_op_.Run(img, resize_img, this->max_side_len_, ratio_h, ratio_w, this->use_tensorrt_); @@ -116,16 +112,16 @@ void DBDetector::Run(cv::Mat &img, std::vector input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f); this->permute_op_.Run(&resize_img, input.data()); auto preprocess_end = std::chrono::steady_clock::now(); - + // Inference. auto input_names = this->predictor_->GetInputNames(); auto input_t = this->predictor_->GetInputHandle(input_names[0]); input_t->Reshape({1, 3, resize_img.rows, resize_img.cols}); auto inference_start = std::chrono::steady_clock::now(); input_t->CopyFromCpu(input.data()); - + this->predictor_->Run(); - + std::vector out_data; auto output_names = this->predictor_->GetOutputNames(); auto output_t = this->predictor_->GetOutputHandle(output_names[0]); @@ -136,7 +132,7 @@ void DBDetector::Run(cv::Mat &img, out_data.resize(out_num); output_t->CopyToCpu(out_data.data()); auto inference_end = std::chrono::steady_clock::now(); - + auto postprocess_start = std::chrono::steady_clock::now(); int n2 = output_shape[2]; int n3 = output_shape[3]; @@ -157,24 +153,29 @@ void DBDetector::Run(cv::Mat &img, const double maxvalue = 255; cv::Mat bit_map; cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY); - cv::Mat dilation_map; - cv::Mat dila_ele = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2)); - cv::dilate(bit_map, dilation_map, dila_ele); + if (this->use_dilation_) { + cv::Mat dila_ele = + cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2)); + cv::dilate(bit_map, bit_map, dila_ele); + } + boxes = post_processor_.BoxesFromBitmap( - pred_map, dilation_map, this->det_db_box_thresh_, - this->det_db_unclip_ratio_, this->use_polygon_score_); + pred_map, bit_map, this->det_db_box_thresh_, this->det_db_unclip_ratio_, + this->use_polygon_score_); boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg); auto postprocess_end = std::chrono::steady_clock::now(); std::cout << "Detected boxes num: " << boxes.size() << endl; - std::chrono::duration preprocess_diff = preprocess_end - preprocess_start; + std::chrono::duration preprocess_diff = + preprocess_end - preprocess_start; times->push_back(double(preprocess_diff.count() * 1000)); std::chrono::duration inference_diff = inference_end - inference_start; times->push_back(double(inference_diff.count() * 1000)); - std::chrono::duration postprocess_diff = postprocess_end - postprocess_start; + std::chrono::duration postprocess_diff = + postprocess_end - postprocess_start; times->push_back(double(postprocess_diff.count() * 1000)); - + //// visualization if (this->visualize_) { Utility::VisualizeBboxes(srcimg, boxes);