未验证 提交 8f6522fc 编写于 作者: Z zhoujun 提交者: GitHub

Merge pull request #5923 from WenmuZhou/cpp_infer

Keep the parameter names the same as python
...@@ -45,8 +45,9 @@ public: ...@@ -45,8 +45,9 @@ public:
const double &det_db_thresh, const double &det_db_thresh,
const double &det_db_box_thresh, const double &det_db_box_thresh,
const double &det_db_unclip_ratio, const double &det_db_unclip_ratio,
const bool &use_polygon_score, const bool &use_dilation, const std::string &det_db_score_mode,
const bool &use_tensorrt, const std::string &precision) { const bool &use_dilation, const bool &use_tensorrt,
const std::string &precision) {
this->use_gpu_ = use_gpu; this->use_gpu_ = use_gpu;
this->gpu_id_ = gpu_id; this->gpu_id_ = gpu_id;
this->gpu_mem_ = gpu_mem; this->gpu_mem_ = gpu_mem;
...@@ -58,7 +59,7 @@ public: ...@@ -58,7 +59,7 @@ public:
this->det_db_thresh_ = det_db_thresh; this->det_db_thresh_ = det_db_thresh;
this->det_db_box_thresh_ = det_db_box_thresh; this->det_db_box_thresh_ = det_db_box_thresh;
this->det_db_unclip_ratio_ = det_db_unclip_ratio; this->det_db_unclip_ratio_ = det_db_unclip_ratio;
this->use_polygon_score_ = use_polygon_score; this->det_db_score_mode_ = det_db_score_mode;
this->use_dilation_ = use_dilation; this->use_dilation_ = use_dilation;
this->use_tensorrt_ = use_tensorrt; this->use_tensorrt_ = use_tensorrt;
...@@ -88,7 +89,7 @@ private: ...@@ -88,7 +89,7 @@ private:
double det_db_thresh_ = 0.3; double det_db_thresh_ = 0.3;
double det_db_box_thresh_ = 0.5; double det_db_box_thresh_ = 0.5;
double det_db_unclip_ratio_ = 2.0; double det_db_unclip_ratio_ = 2.0;
bool use_polygon_score_ = false; std::string det_db_score_mode_ = "slow";
bool use_dilation_ = false; bool use_dilation_ = false;
bool visualize_ = true; bool visualize_ = true;
......
...@@ -56,7 +56,7 @@ public: ...@@ -56,7 +56,7 @@ public:
std::vector<std::vector<std::vector<int>>> std::vector<std::vector<std::vector<int>>>
BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap, BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap,
const float &box_thresh, const float &det_db_unclip_ratio, const float &box_thresh, const float &det_db_unclip_ratio,
const bool &use_polygon_score); const std::string &det_db_score_mode);
std::vector<std::vector<std::vector<int>>> std::vector<std::vector<std::vector<int>>>
FilterTagDetRes(std::vector<std::vector<std::vector<int>>> boxes, FilterTagDetRes(std::vector<std::vector<std::vector<int>>> boxes,
......
...@@ -267,7 +267,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir ...@@ -267,7 +267,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
|det_db_thresh|float|0.3|用于过滤DB预测的二值化图像,设置为0.-0.3对结果影响不明显| |det_db_thresh|float|0.3|用于过滤DB预测的二值化图像,设置为0.-0.3对结果影响不明显|
|det_db_box_thresh|float|0.5|DB后处理过滤box的阈值,如果检测存在漏框情况,可酌情减小| |det_db_box_thresh|float|0.5|DB后处理过滤box的阈值,如果检测存在漏框情况,可酌情减小|
|det_db_unclip_ratio|float|1.6|表示文本框的紧致程度,越小则文本框更靠近文本| |det_db_unclip_ratio|float|1.6|表示文本框的紧致程度,越小则文本框更靠近文本|
|use_polygon_score|bool|false|是否使用多边形框计算bbox score,false表示使用矩形框计算。矩形框计算速度更快,多边形框对弯曲文本区域计算更准确。| |det_db_score_mode|string|slow|slow:使用多边形框计算bbox score,fast:使用矩形框计算。矩形框计算速度更快,多边形框对弯曲文本区域计算更准确。|
|visualize|bool|true|是否对结果进行可视化,为1时,预测结果会保存在`output`字段指定的文件夹下和输入图像同名的图像上。| |visualize|bool|true|是否对结果进行可视化,为1时,预测结果会保存在`output`字段指定的文件夹下和输入图像同名的图像上。|
- 方向分类器相关 - 方向分类器相关
......
...@@ -260,7 +260,7 @@ More parameters are as follows, ...@@ -260,7 +260,7 @@ More parameters are as follows,
|det_db_thresh|float|0.3|Used to filter the binarized image of DB prediction, setting 0.-0.3 has no obvious effect on the result| |det_db_thresh|float|0.3|Used to filter the binarized image of DB prediction, setting 0.-0.3 has no obvious effect on the result|
|det_db_box_thresh|float|0.5|DB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate| |det_db_box_thresh|float|0.5|DB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate|
|det_db_unclip_ratio|float|1.6|Indicates the compactness of the text box, the smaller the value, the closer the text box to the text| |det_db_unclip_ratio|float|1.6|Indicates the compactness of the text box, the smaller the value, the closer the text box to the text|
|use_polygon_score|bool|false|Whether to use polygon box to calculate bbox score, false means to use rectangle box to calculate. Use rectangular box to calculate faster, and polygonal box more accurate for curved text area.| |det_db_score_mode|string|slow| slow: use polygon box to calculate bbox score, fast: use rectangle box to calculate. Use rectangular box to calculate faster, and polygonal box more accurate for curved text area.|
|visualize|bool|true|Whether to visualize the results,when it is set as true, the prediction results will be saved in the folder specified by the `output` field on an image with the same name as the input image.| |visualize|bool|true|Whether to visualize the results,when it is set as true, the prediction results will be saved in the folder specified by the `output` field on an image with the same name as the input image.|
- Classifier related parameters - Classifier related parameters
......
...@@ -36,25 +36,26 @@ ...@@ -36,25 +36,26 @@
#include "auto_log/autolog.h" #include "auto_log/autolog.h"
#include <gflags/gflags.h> #include <gflags/gflags.h>
// common args
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU."); DEFINE_bool(use_gpu, false, "Infering with GPU or CPU.");
DEFINE_bool(use_tensorrt, false, "Whether use tensorrt.");
DEFINE_int32(gpu_id, 0, "Device id of GPU to execute."); DEFINE_int32(gpu_id, 0, "Device id of GPU to execute.");
DEFINE_int32(gpu_mem, 4000, "GPU id when infering with GPU."); DEFINE_int32(gpu_mem, 4000, "GPU id when infering with GPU.");
DEFINE_int32(cpu_threads, 10, "Num of threads with CPU."); DEFINE_int32(cpu_threads, 10, "Num of threads with CPU.");
DEFINE_bool(enable_mkldnn, false, "Whether use mkldnn with CPU."); DEFINE_bool(enable_mkldnn, false, "Whether use mkldnn with CPU.");
DEFINE_bool(use_tensorrt, false, "Whether use tensorrt.");
DEFINE_string(precision, "fp32", "Precision be one of fp32/fp16/int8"); DEFINE_string(precision, "fp32", "Precision be one of fp32/fp16/int8");
DEFINE_bool(benchmark, false, "Whether use benchmark."); DEFINE_bool(benchmark, false, "Whether use benchmark.");
DEFINE_string(output, "./output/", "Save benchmark log path."); DEFINE_string(output, "./output/", "Save benchmark log path.");
// detection related
DEFINE_string(image_dir, "", "Dir of input image."); DEFINE_string(image_dir, "", "Dir of input image.");
DEFINE_bool(visualize, true, "Whether show the detection results.");
// detection related
DEFINE_string(det_model_dir, "", "Path of det inference model."); DEFINE_string(det_model_dir, "", "Path of det inference model.");
DEFINE_int32(max_side_len, 960, "max_side_len of input image."); DEFINE_int32(max_side_len, 960, "max_side_len of input image.");
DEFINE_double(det_db_thresh, 0.3, "Threshold of det_db_thresh."); DEFINE_double(det_db_thresh, 0.3, "Threshold of det_db_thresh.");
DEFINE_double(det_db_box_thresh, 0.6, "Threshold of det_db_box_thresh."); DEFINE_double(det_db_box_thresh, 0.6, "Threshold of det_db_box_thresh.");
DEFINE_double(det_db_unclip_ratio, 1.5, "Threshold of det_db_unclip_ratio."); DEFINE_double(det_db_unclip_ratio, 1.5, "Threshold of det_db_unclip_ratio.");
DEFINE_bool(use_polygon_score, false, "Whether use polygon score.");
DEFINE_bool(use_dilation, false, "Whether use the dilation on output map."); DEFINE_bool(use_dilation, false, "Whether use the dilation on output map.");
DEFINE_bool(visualize, true, "Whether show the detection results."); DEFINE_string(det_db_score_mode, "slow", "Whether use polygon score.");
// classification related // classification related
DEFINE_bool(use_angle_cls, false, "Whether use use_angle_cls."); DEFINE_bool(use_angle_cls, false, "Whether use use_angle_cls.");
DEFINE_string(cls_model_dir, "", "Path of cls inference model."); DEFINE_string(cls_model_dir, "", "Path of cls inference model.");
...@@ -85,7 +86,7 @@ int main_det(std::vector<cv::String> cv_all_img_names) { ...@@ -85,7 +86,7 @@ int main_det(std::vector<cv::String> cv_all_img_names) {
FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
FLAGS_max_side_len, FLAGS_det_db_thresh, FLAGS_max_side_len, FLAGS_det_db_thresh,
FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio, FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
FLAGS_use_polygon_score, FLAGS_use_dilation, FLAGS_det_db_score_mode, FLAGS_use_dilation,
FLAGS_use_tensorrt, FLAGS_precision); FLAGS_use_tensorrt, FLAGS_precision);
if (!PathExists(FLAGS_output)) { if (!PathExists(FLAGS_output)) {
...@@ -117,13 +118,21 @@ int main_det(std::vector<cv::String> cv_all_img_names) { ...@@ -117,13 +118,21 @@ int main_det(std::vector<cv::String> cv_all_img_names) {
time_info[2] += det_times[2]; time_info[2] += det_times[2];
if (FLAGS_benchmark) { if (FLAGS_benchmark) {
cout << cv_all_img_names[i] << '\t'; cout << cv_all_img_names[i] << "\t[";
for (int n = 0; n < boxes.size(); n++) { for (int n = 0; n < boxes.size(); n++) {
cout << '[';
for (int m = 0; m < boxes[n].size(); m++) { for (int m = 0; m < boxes[n].size(); m++) {
cout << boxes[n][m][0] << ' ' << boxes[n][m][1] << ' '; cout << '[' << boxes[n][m][0] << ',' << boxes[n][m][1] << "]";
if (m != boxes[n].size() - 1) {
cout << ',';
}
}
cout << ']';
if (n != boxes.size() - 1) {
cout << ',';
} }
} }
cout << endl; cout << ']' << endl;
} }
} }
...@@ -140,8 +149,6 @@ int main_rec(std::vector<cv::String> cv_all_img_names) { ...@@ -140,8 +149,6 @@ int main_rec(std::vector<cv::String> cv_all_img_names) {
std::vector<double> time_info = {0, 0, 0}; std::vector<double> time_info = {0, 0, 0};
std::string rec_char_dict_path = FLAGS_rec_char_dict_path; std::string rec_char_dict_path = FLAGS_rec_char_dict_path;
if (FLAGS_benchmark)
rec_char_dict_path = FLAGS_rec_char_dict_path.substr(6);
cout << "label file: " << rec_char_dict_path << endl; cout << "label file: " << rec_char_dict_path << endl;
CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
...@@ -194,7 +201,7 @@ int main_system(std::vector<cv::String> cv_all_img_names) { ...@@ -194,7 +201,7 @@ int main_system(std::vector<cv::String> cv_all_img_names) {
FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
FLAGS_max_side_len, FLAGS_det_db_thresh, FLAGS_max_side_len, FLAGS_det_db_thresh,
FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio, FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
FLAGS_use_polygon_score, FLAGS_use_dilation, FLAGS_det_db_score_mode, FLAGS_use_dilation,
FLAGS_use_tensorrt, FLAGS_precision); FLAGS_use_tensorrt, FLAGS_precision);
Classifier *cls = nullptr; Classifier *cls = nullptr;
...@@ -205,8 +212,6 @@ int main_system(std::vector<cv::String> cv_all_img_names) { ...@@ -205,8 +212,6 @@ int main_system(std::vector<cv::String> cv_all_img_names) {
} }
std::string rec_char_dict_path = FLAGS_rec_char_dict_path; std::string rec_char_dict_path = FLAGS_rec_char_dict_path;
if (FLAGS_benchmark)
rec_char_dict_path = FLAGS_rec_char_dict_path.substr(6);
cout << "label file: " << rec_char_dict_path << endl; cout << "label file: " << rec_char_dict_path << endl;
CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
......
...@@ -161,7 +161,7 @@ void DBDetector::Run(cv::Mat &img, ...@@ -161,7 +161,7 @@ void DBDetector::Run(cv::Mat &img,
boxes = post_processor_.BoxesFromBitmap( boxes = post_processor_.BoxesFromBitmap(
pred_map, bit_map, this->det_db_box_thresh_, this->det_db_unclip_ratio_, pred_map, bit_map, this->det_db_box_thresh_, this->det_db_unclip_ratio_,
this->use_polygon_score_); this->det_db_score_mode_);
boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg); boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg);
auto postprocess_end = std::chrono::steady_clock::now(); auto postprocess_end = std::chrono::steady_clock::now();
......
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <include/clipper.h>
#include <include/postprocess_op.h> #include <include/postprocess_op.h>
#include <include/clipper.cpp>
namespace PaddleOCR { namespace PaddleOCR {
...@@ -187,23 +187,22 @@ float PostProcessor::PolygonScoreAcc(std::vector<cv::Point> contour, ...@@ -187,23 +187,22 @@ float PostProcessor::PolygonScoreAcc(std::vector<cv::Point> contour,
cv::Mat mask; cv::Mat mask;
mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1); mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1);
cv::Point *rook_point = new cv::Point[contour.size()];
cv::Point* rook_point = new cv::Point[contour.size()];
for (int i = 0; i < contour.size(); ++i) { for (int i = 0; i < contour.size(); ++i) {
rook_point[i] = cv::Point(int(box_x[i]) - xmin, int(box_y[i]) - ymin); rook_point[i] = cv::Point(int(box_x[i]) - xmin, int(box_y[i]) - ymin);
} }
const cv::Point *ppt[1] = {rook_point}; const cv::Point *ppt[1] = {rook_point};
int npt[] = {int(contour.size())}; int npt[] = {int(contour.size())};
cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1)); cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1));
cv::Mat croppedImg; cv::Mat croppedImg;
pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1)).copyTo(croppedImg); pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1))
.copyTo(croppedImg);
float score = cv::mean(croppedImg, mask)[0]; float score = cv::mean(croppedImg, mask)[0];
delete []rook_point; delete[] rook_point;
return score; return score;
} }
...@@ -247,7 +246,7 @@ float PostProcessor::BoxScoreFast(std::vector<std::vector<float>> box_array, ...@@ -247,7 +246,7 @@ float PostProcessor::BoxScoreFast(std::vector<std::vector<float>> box_array,
std::vector<std::vector<std::vector<int>>> PostProcessor::BoxesFromBitmap( std::vector<std::vector<std::vector<int>>> PostProcessor::BoxesFromBitmap(
const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh, const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh,
const float &det_db_unclip_ratio, const bool &use_polygon_score) { const float &det_db_unclip_ratio, const std::string &det_db_score_mode) {
const int min_size = 3; const int min_size = 3;
const int max_candidates = 1000; const int max_candidates = 1000;
...@@ -281,7 +280,7 @@ std::vector<std::vector<std::vector<int>>> PostProcessor::BoxesFromBitmap( ...@@ -281,7 +280,7 @@ std::vector<std::vector<std::vector<int>>> PostProcessor::BoxesFromBitmap(
} }
float score; float score;
if (use_polygon_score) if (det_db_score_mode == "slow")
/* compute using polygon*/ /* compute using polygon*/
score = PolygonScoreAcc(contours[_i], pred); score = PolygonScoreAcc(contours[_i], pred);
else else
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册