未验证 提交 aab10b4f 编写于 作者: T Tingquan Gao 提交者: GitHub

Add polygon score param to config (#2646)

* Add polygon score param to config
上级 099957c9
...@@ -8,7 +8,6 @@ PaddleOCR同时支持动态图与静态图两种编程范式 ...@@ -8,7 +8,6 @@ PaddleOCR同时支持动态图与静态图两种编程范式
- 静态图版本:develop分支 - 静态图版本:develop分支
**近期更新** **近期更新**
- 【预告】 PaddleOCR研发团队对最新发版内容技术深入解读,4月13日晚上19:00,[直播地址](https://live.bilibili.com/21689802)
- 2021.4.8 release 2.1版本,新增AAAI 2021论文[端到端识别算法PGNet](./doc/doc_ch/pgnet.md)开源,[多语言模型](./doc/doc_ch/multi_languages.md)支持种类增加到80+。 - 2021.4.8 release 2.1版本,新增AAAI 2021论文[端到端识别算法PGNet](./doc/doc_ch/pgnet.md)开源,[多语言模型](./doc/doc_ch/multi_languages.md)支持种类增加到80+。
- 2021.2.1 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数162个,每周一都会更新,欢迎大家持续关注。 - 2021.2.1 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数162个,每周一都会更新,欢迎大家持续关注。
- 2021.1.21 更新多语言识别模型,目前支持语种超过27种,包括中文简体、中文繁体、英文、法文、德文、韩文、日文、意大利文、西班牙文、葡萄牙文、俄罗斯文、阿拉伯文等,后续计划可以参考[多语言研发计划](https://github.com/PaddlePaddle/PaddleOCR/issues/1048) - 2021.1.21 更新多语言识别模型,目前支持语种超过27种,包括中文简体、中文繁体、英文、法文、德文、韩文、日文、意大利文、西班牙文、葡萄牙文、俄罗斯文、阿拉伯文等,后续计划可以参考[多语言研发计划](https://github.com/PaddlePaddle/PaddleOCR/issues/1048)
......
...@@ -49,6 +49,8 @@ public: ...@@ -49,6 +49,8 @@ public:
this->det_db_unclip_ratio = stod(config_map_["det_db_unclip_ratio"]); this->det_db_unclip_ratio = stod(config_map_["det_db_unclip_ratio"]);
this->use_polygon_score = bool(stoi(config_map_["use_polygon_score"]));
this->det_model_dir.assign(config_map_["det_model_dir"]); this->det_model_dir.assign(config_map_["det_model_dir"]);
this->rec_model_dir.assign(config_map_["rec_model_dir"]); this->rec_model_dir.assign(config_map_["rec_model_dir"]);
...@@ -86,6 +88,8 @@ public: ...@@ -86,6 +88,8 @@ public:
double det_db_unclip_ratio = 2.0; double det_db_unclip_ratio = 2.0;
bool use_polygon_score = false;
std::string det_model_dir; std::string det_model_dir;
std::string rec_model_dir; std::string rec_model_dir;
......
...@@ -44,7 +44,8 @@ public: ...@@ -44,7 +44,8 @@ public:
const bool &use_mkldnn, const int &max_side_len, const bool &use_mkldnn, const int &max_side_len,
const double &det_db_thresh, const double &det_db_thresh,
const double &det_db_box_thresh, const double &det_db_box_thresh,
const double &det_db_unclip_ratio, const bool &visualize, const double &det_db_unclip_ratio,
const bool &use_polygon_score, const bool &visualize,
const bool &use_tensorrt, const bool &use_fp16) { const bool &use_tensorrt, const bool &use_fp16) {
this->use_gpu_ = use_gpu; this->use_gpu_ = use_gpu;
this->gpu_id_ = gpu_id; this->gpu_id_ = gpu_id;
...@@ -57,6 +58,7 @@ public: ...@@ -57,6 +58,7 @@ public:
this->det_db_thresh_ = det_db_thresh; this->det_db_thresh_ = det_db_thresh;
this->det_db_box_thresh_ = det_db_box_thresh; this->det_db_box_thresh_ = det_db_box_thresh;
this->det_db_unclip_ratio_ = det_db_unclip_ratio; this->det_db_unclip_ratio_ = det_db_unclip_ratio;
this->use_polygon_score_ = use_polygon_score;
this->visualize_ = visualize; this->visualize_ = visualize;
this->use_tensorrt_ = use_tensorrt; this->use_tensorrt_ = use_tensorrt;
...@@ -85,6 +87,7 @@ private: ...@@ -85,6 +87,7 @@ private:
double det_db_thresh_ = 0.3; double det_db_thresh_ = 0.3;
double det_db_box_thresh_ = 0.5; double det_db_box_thresh_ = 0.5;
double det_db_unclip_ratio_ = 2.0; double det_db_unclip_ratio_ = 2.0;
bool use_polygon_score_ = false;
bool visualize_ = true; bool visualize_ = true;
bool use_tensorrt_ = false; bool use_tensorrt_ = false;
......
...@@ -55,7 +55,8 @@ public: ...@@ -55,7 +55,8 @@ public:
std::vector<std::vector<std::vector<int>>> std::vector<std::vector<std::vector<int>>>
BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap, BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap,
const float &box_thresh, const float &det_db_unclip_ratio); const float &box_thresh, const float &det_db_unclip_ratio,
const bool &use_polygon_score);
std::vector<std::vector<std::vector<int>>> std::vector<std::vector<std::vector<int>>>
FilterTagDetRes(std::vector<std::vector<std::vector<int>>> boxes, FilterTagDetRes(std::vector<std::vector<std::vector<int>>> boxes,
......
...@@ -183,7 +183,7 @@ cmake .. \ ...@@ -183,7 +183,7 @@ cmake .. \
make -j make -j
``` ```
`OPENCV_DIR`为opencv编译安装的地址;`LIB_DIR`为下载(`paddle_inference`文件夹)或者编译生成的Paddle预测库地址(`build/paddle_inference_install_dir`文件夹);`CUDA_LIB_DIR`为cuda库文件地址,在docker中`/usr/local/cuda/lib64``CUDNN_LIB_DIR`为cudnn库文件地址,在docker中为`/usr/lib/x86_64-linux-gnu/` `OPENCV_DIR`为opencv编译安装的地址;`LIB_DIR`为下载(`paddle_inference`文件夹)或者编译生成的Paddle预测库地址(`build/paddle_inference_install_dir`文件夹);`CUDA_LIB_DIR`为cuda库文件地址,在docker中为`/usr/local/cuda/lib64``CUDNN_LIB_DIR`为cudnn库文件地址,在docker中为`/usr/lib/x86_64-linux-gnu/`
* 编译完成之后,会在`build`文件夹下生成一个名为`ocr_system`的可执行文件。 * 编译完成之后,会在`build`文件夹下生成一个名为`ocr_system`的可执行文件。
...@@ -211,6 +211,7 @@ max_side_len 960 # 输入图像长宽大于960时,等比例缩放图像,使 ...@@ -211,6 +211,7 @@ max_side_len 960 # 输入图像长宽大于960时,等比例缩放图像,使
det_db_thresh 0.3 # 用于过滤DB预测的二值化图像,设置为0.-0.3对结果影响不明显 det_db_thresh 0.3 # 用于过滤DB预测的二值化图像,设置为0.-0.3对结果影响不明显
det_db_box_thresh 0.5 # DB后处理过滤box的阈值,如果检测存在漏框情况,可酌情减小 det_db_box_thresh 0.5 # DB后处理过滤box的阈值,如果检测存在漏框情况,可酌情减小
det_db_unclip_ratio 1.6 # 表示文本框的紧致程度,越小则文本框更靠近文本 det_db_unclip_ratio 1.6 # 表示文本框的紧致程度,越小则文本框更靠近文本
use_polygon_score 1 # 是否使用多边形框计算bbox score,0表示使用矩形框计算。矩形框计算速度更快,多边形框对弯曲文本区域计算更准确。
det_model_dir ./inference/det_db # 检测模型inference model地址 det_model_dir ./inference/det_db # 检测模型inference model地址
# cls config # cls config
......
...@@ -217,6 +217,7 @@ max_side_len 960 # Limit the maximum image height and width to 960 ...@@ -217,6 +217,7 @@ max_side_len 960 # Limit the maximum image height and width to 960
det_db_thresh 0.3 # Used to filter the binarized image of DB prediction, setting 0.-0.3 has no obvious effect on the result det_db_thresh 0.3 # Used to filter the binarized image of DB prediction, setting 0.-0.3 has no obvious effect on the result
det_db_box_thresh 0.5 # DDB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate det_db_box_thresh 0.5 # DDB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate
det_db_unclip_ratio 1.6 # Indicates the compactness of the text box, the smaller the value, the closer the text box to the text det_db_unclip_ratio 1.6 # Indicates the compactness of the text box, the smaller the value, the closer the text box to the text
use_polygon_score 1 # Whether to use polygon box to calculate bbox score, 0 means to use rectangle box to calculate. Use rectangular box to calculate faster, and polygonal box more accurate for curved text area.
det_model_dir ./inference/det_db # Address of detection inference model det_model_dir ./inference/det_db # Address of detection inference model
# cls config # cls config
......
...@@ -59,7 +59,8 @@ int main(int argc, char **argv) { ...@@ -59,7 +59,8 @@ int main(int argc, char **argv) {
config.gpu_mem, config.cpu_math_library_num_threads, config.gpu_mem, config.cpu_math_library_num_threads,
config.use_mkldnn, config.max_side_len, config.det_db_thresh, config.use_mkldnn, config.max_side_len, config.det_db_thresh,
config.det_db_box_thresh, config.det_db_unclip_ratio, config.det_db_box_thresh, config.det_db_unclip_ratio,
config.visualize, config.use_tensorrt, config.use_fp16); config.use_polygon_score, config.visualize,
config.use_tensorrt, config.use_fp16);
Classifier *cls = nullptr; Classifier *cls = nullptr;
if (config.use_angle_cls == true) { if (config.use_angle_cls == true) {
......
...@@ -109,9 +109,9 @@ void DBDetector::Run(cv::Mat &img, ...@@ -109,9 +109,9 @@ void DBDetector::Run(cv::Mat &img,
cv::Mat dilation_map; cv::Mat dilation_map;
cv::Mat dila_ele = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2)); cv::Mat dila_ele = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2));
cv::dilate(bit_map, dilation_map, dila_ele); cv::dilate(bit_map, dilation_map, dila_ele);
boxes = post_processor_.BoxesFromBitmap(pred_map, dilation_map, boxes = post_processor_.BoxesFromBitmap(
this->det_db_box_thresh_, pred_map, dilation_map, this->det_db_box_thresh_,
this->det_db_unclip_ratio_); this->det_db_unclip_ratio_, this->use_polygon_score_);
boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg); boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg);
......
...@@ -160,26 +160,34 @@ std::vector<std::vector<float>> PostProcessor::GetMiniBoxes(cv::RotatedRect box, ...@@ -160,26 +160,34 @@ std::vector<std::vector<float>> PostProcessor::GetMiniBoxes(cv::RotatedRect box,
} }
float PostProcessor::PolygonScoreAcc(std::vector<cv::Point> contour, float PostProcessor::PolygonScoreAcc(std::vector<cv::Point> contour,
cv::Mat pred){ cv::Mat pred) {
int width = pred.cols; int width = pred.cols;
int height = pred.rows; int height = pred.rows;
std::vector<float> box_x; std::vector<float> box_x;
std::vector<float> box_y; std::vector<float> box_y;
for(int i=0; i<contour.size(); ++i){ for (int i = 0; i < contour.size(); ++i) {
box_x.push_back(contour[i].x); box_x.push_back(contour[i].x);
box_y.push_back(contour[i].y); box_y.push_back(contour[i].y);
} }
int xmin = clamp(int(std::floor(*(std::min_element(box_x.begin(), box_x.end())))), 0, width - 1); int xmin =
int xmax = clamp(int(std::ceil(*(std::max_element(box_x.begin(), box_x.end())))), 0, width - 1); clamp(int(std::floor(*(std::min_element(box_x.begin(), box_x.end())))), 0,
int ymin = clamp(int(std::floor(*(std::min_element(box_y.begin(), box_y.end())))), 0, height - 1); width - 1);
int ymax = clamp(int(std::ceil(*(std::max_element(box_y.begin(), box_y.end())))), 0, height - 1); int xmax =
clamp(int(std::ceil(*(std::max_element(box_x.begin(), box_x.end())))), 0,
width - 1);
int ymin =
clamp(int(std::floor(*(std::min_element(box_y.begin(), box_y.end())))), 0,
height - 1);
int ymax =
clamp(int(std::ceil(*(std::max_element(box_y.begin(), box_y.end())))), 0,
height - 1);
cv::Mat mask; cv::Mat mask;
mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1); mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1);
cv::Point rook_point[contour.size()]; cv::Point rook_point[contour.size()];
for(int i=0; i<contour.size(); ++i){ for (int i = 0; i < contour.size(); ++i) {
rook_point[i] = cv::Point(int(box_x[i]) - xmin, int(box_y[i]) - ymin); rook_point[i] = cv::Point(int(box_x[i]) - xmin, int(box_y[i]) - ymin);
} }
const cv::Point *ppt[1] = {rook_point}; const cv::Point *ppt[1] = {rook_point};
...@@ -187,7 +195,8 @@ float PostProcessor::PolygonScoreAcc(std::vector<cv::Point> contour, ...@@ -187,7 +195,8 @@ float PostProcessor::PolygonScoreAcc(std::vector<cv::Point> contour,
cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1)); cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1));
cv::Mat croppedImg; cv::Mat croppedImg;
pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1)).copyTo(croppedImg); pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1))
.copyTo(croppedImg);
float score = cv::mean(croppedImg, mask)[0]; float score = cv::mean(croppedImg, mask)[0];
return score; return score;
} }
...@@ -230,10 +239,9 @@ float PostProcessor::BoxScoreFast(std::vector<std::vector<float>> box_array, ...@@ -230,10 +239,9 @@ float PostProcessor::BoxScoreFast(std::vector<std::vector<float>> box_array,
return score; return score;
} }
std::vector<std::vector<std::vector<int>>> std::vector<std::vector<std::vector<int>>> PostProcessor::BoxesFromBitmap(
PostProcessor::BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap, const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh,
const float &box_thresh, const float &det_db_unclip_ratio, const bool &use_polygon_score) {
const float &det_db_unclip_ratio) {
const int min_size = 3; const int min_size = 3;
const int max_candidates = 1000; const int max_candidates = 1000;
...@@ -267,9 +275,12 @@ PostProcessor::BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap, ...@@ -267,9 +275,12 @@ PostProcessor::BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap,
} }
float score; float score;
score = BoxScoreFast(array, pred); if (use_polygon_score)
/* compute using polygon*/ /* compute using polygon*/
// score = PolygonScoreAcc(contours[_i], pred); score = PolygonScoreAcc(contours[_i], pred);
else
score = BoxScoreFast(array, pred);
if (score < box_thresh) if (score < box_thresh)
continue; continue;
......
...@@ -10,6 +10,7 @@ max_side_len 960 ...@@ -10,6 +10,7 @@ max_side_len 960
det_db_thresh 0.3 det_db_thresh 0.3
det_db_box_thresh 0.5 det_db_box_thresh 0.5
det_db_unclip_ratio 1.6 det_db_unclip_ratio 1.6
use_polygon_score 1
det_model_dir ./inference/ch_ppocr_mobile_v2.0_det_infer/ det_model_dir ./inference/ch_ppocr_mobile_v2.0_det_infer/
# cls config # cls config
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册