提交 dd9f48da 编写于 作者: 文幕地方's avatar 文幕地方

fix bug

上级 b913f664
...@@ -14,26 +14,12 @@ ...@@ -14,26 +14,12 @@
#pragma once #pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h" #include "paddle_api.h"
#include "paddle_inference_api.h" #include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/preprocess_op.h> #include <include/preprocess_op.h>
#include <include/utility.h> #include <include/utility.h>
using namespace paddle_infer;
namespace PaddleOCR { namespace PaddleOCR {
class Classifier { class Classifier {
...@@ -66,7 +52,7 @@ public: ...@@ -66,7 +52,7 @@ public:
std::vector<float> &cls_scores, std::vector<double> &times); std::vector<float> &cls_scores, std::vector<double> &times);
private: private:
std::shared_ptr<Predictor> predictor_; std::shared_ptr<paddle_infer::Predictor> predictor_;
bool use_gpu_ = false; bool use_gpu_ = false;
int gpu_id_ = 0; int gpu_id_ = 0;
......
...@@ -14,26 +14,12 @@ ...@@ -14,26 +14,12 @@
#pragma once #pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h" #include "paddle_api.h"
#include "paddle_inference_api.h" #include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/postprocess_op.h> #include <include/postprocess_op.h>
#include <include/preprocess_op.h> #include <include/preprocess_op.h>
using namespace paddle_infer;
namespace PaddleOCR { namespace PaddleOCR {
class DBDetector { class DBDetector {
...@@ -41,7 +27,7 @@ public: ...@@ -41,7 +27,7 @@ public:
explicit DBDetector(const std::string &model_dir, const bool &use_gpu, explicit DBDetector(const std::string &model_dir, const bool &use_gpu,
const int &gpu_id, const int &gpu_mem, const int &gpu_id, const int &gpu_mem,
const int &cpu_math_library_num_threads, const int &cpu_math_library_num_threads,
const bool &use_mkldnn, const string &limit_type, const bool &use_mkldnn, const std::string &limit_type,
const int &limit_side_len, const double &det_db_thresh, const int &limit_side_len, const double &det_db_thresh,
const double &det_db_box_thresh, const double &det_db_box_thresh,
const double &det_db_unclip_ratio, const double &det_db_unclip_ratio,
...@@ -77,7 +63,7 @@ public: ...@@ -77,7 +63,7 @@ public:
std::vector<double> &times); std::vector<double> &times);
private: private:
std::shared_ptr<Predictor> predictor_; std::shared_ptr<paddle_infer::Predictor> predictor_;
bool use_gpu_ = false; bool use_gpu_ = false;
int gpu_id_ = 0; int gpu_id_ = 0;
...@@ -85,7 +71,7 @@ private: ...@@ -85,7 +71,7 @@ private:
int cpu_math_library_num_threads_ = 4; int cpu_math_library_num_threads_ = 4;
bool use_mkldnn_ = false; bool use_mkldnn_ = false;
string limit_type_ = "max"; std::string limit_type_ = "max";
int limit_side_len_ = 960; int limit_side_len_ = 960;
double det_db_thresh_ = 0.3; double det_db_thresh_ = 0.3;
......
...@@ -14,27 +14,12 @@ ...@@ -14,27 +14,12 @@
#pragma once #pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h" #include "paddle_api.h"
#include "paddle_inference_api.h" #include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/ocr_cls.h> #include <include/ocr_cls.h>
#include <include/preprocess_op.h>
#include <include/utility.h> #include <include/utility.h>
using namespace paddle_infer;
namespace PaddleOCR { namespace PaddleOCR {
class CRNNRecognizer { class CRNNRecognizer {
...@@ -42,7 +27,7 @@ public: ...@@ -42,7 +27,7 @@ public:
explicit CRNNRecognizer(const std::string &model_dir, const bool &use_gpu, explicit CRNNRecognizer(const std::string &model_dir, const bool &use_gpu,
const int &gpu_id, const int &gpu_mem, const int &gpu_id, const int &gpu_mem,
const int &cpu_math_library_num_threads, const int &cpu_math_library_num_threads,
const bool &use_mkldnn, const string &label_path, const bool &use_mkldnn, const std::string &label_path,
const bool &use_tensorrt, const bool &use_tensorrt,
const std::string &precision, const std::string &precision,
const int &rec_batch_num, const int &rec_img_h, const int &rec_batch_num, const int &rec_img_h,
...@@ -75,7 +60,7 @@ public: ...@@ -75,7 +60,7 @@ public:
std::vector<float> &rec_text_scores, std::vector<double> &times); std::vector<float> &rec_text_scores, std::vector<double> &times);
private: private:
std::shared_ptr<Predictor> predictor_; std::shared_ptr<paddle_infer::Predictor> predictor_;
bool use_gpu_ = false; bool use_gpu_ = false;
int gpu_id_ = 0; int gpu_id_ = 0;
......
...@@ -14,28 +14,9 @@ ...@@ -14,28 +14,9 @@
#pragma once #pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h"
#include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/ocr_cls.h> #include <include/ocr_cls.h>
#include <include/ocr_det.h> #include <include/ocr_det.h>
#include <include/ocr_rec.h> #include <include/ocr_rec.h>
#include <include/preprocess_op.h>
#include <include/utility.h>
using namespace paddle_infer;
namespace PaddleOCR { namespace PaddleOCR {
......
...@@ -14,28 +14,9 @@ ...@@ -14,28 +14,9 @@
#pragma once #pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h"
#include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/paddleocr.h> #include <include/paddleocr.h>
#include <include/preprocess_op.h>
#include <include/structure_layout.h> #include <include/structure_layout.h>
#include <include/structure_table.h> #include <include/structure_table.h>
#include <include/utility.h>
using namespace paddle_infer;
namespace PaddleOCR { namespace PaddleOCR {
......
...@@ -14,24 +14,9 @@ ...@@ -14,24 +14,9 @@
#pragma once #pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include "include/clipper.h" #include "include/clipper.h"
#include "include/utility.h" #include "include/utility.h"
using namespace std;
namespace PaddleOCR { namespace PaddleOCR {
class DBPostProcessor { class DBPostProcessor {
...@@ -92,23 +77,7 @@ private: ...@@ -92,23 +77,7 @@ private:
class TablePostProcessor { class TablePostProcessor {
public: public:
void init(std::string label_path, bool merge_no_span_structure = true) { void init(std::string label_path, bool merge_no_span_structure = true);
this->label_list_ = Utility::ReadDict(label_path);
if (merge_no_span_structure) {
this->label_list_.push_back("<td></td>");
std::vector<std::string>::iterator it;
for (it = this->label_list_.begin(); it != this->label_list_.end();) {
if (*it == "<td>") {
it = this->label_list_.erase(it);
} else {
++it;
}
}
}
// add_special_char
this->label_list_.insert(this->label_list_.begin(), this->beg);
this->label_list_.push_back(this->end);
}
void Run(std::vector<float> &loc_preds, std::vector<float> &structure_probs, void Run(std::vector<float> &loc_preds, std::vector<float> &structure_probs,
std::vector<float> &rec_scores, std::vector<int> &loc_preds_shape, std::vector<float> &rec_scores, std::vector<int> &loc_preds_shape,
std::vector<int> &structure_probs_shape, std::vector<int> &structure_probs_shape,
...@@ -126,13 +95,7 @@ class PicodetPostProcessor { ...@@ -126,13 +95,7 @@ class PicodetPostProcessor {
public: public:
void init(std::string label_path, const double score_threshold = 0.4, void init(std::string label_path, const double score_threshold = 0.4,
const double nms_threshold = 0.5, const double nms_threshold = 0.5,
const std::vector<int> &fpn_stride = {8, 16, 32, 64}) { const std::vector<int> &fpn_stride = {8, 16, 32, 64});
this->label_list_ = Utility::ReadDict(label_path);
this->score_threshold_ = score_threshold;
this->nms_threshold_ = nms_threshold;
this->num_class_ = label_list_.size();
this->fpn_stride_ = fpn_stride;
}
void Run(std::vector<StructurePredictResult> &results, void Run(std::vector<StructurePredictResult> &results,
std::vector<std::vector<float>> outs, std::vector<int> ori_shape, std::vector<std::vector<float>> outs, std::vector<int> ori_shape,
std::vector<int> resize_shape, int eg_max); std::vector<int> resize_shape, int eg_max);
......
...@@ -14,21 +14,12 @@ ...@@ -14,21 +14,12 @@
#pragma once #pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include <chrono>
#include <iomanip>
#include <iostream> #include <iostream>
#include <ostream>
#include <vector> #include <vector>
#include <cstring> #include "opencv2/core.hpp"
#include <fstream> #include "opencv2/imgcodecs.hpp"
#include <numeric> #include "opencv2/imgproc.hpp"
using namespace std;
using namespace paddle;
namespace PaddleOCR { namespace PaddleOCR {
...@@ -51,9 +42,9 @@ public: ...@@ -51,9 +42,9 @@ public:
class ResizeImgType0 { class ResizeImgType0 {
public: public:
virtual void Run(const cv::Mat &img, cv::Mat &resize_img, string limit_type, virtual void Run(const cv::Mat &img, cv::Mat &resize_img,
int limit_side_len, float &ratio_h, float &ratio_w, std::string limit_type, int limit_side_len, float &ratio_h,
bool use_tensorrt); float &ratio_w, bool use_tensorrt);
}; };
class CrnnResizeImg { class CrnnResizeImg {
......
...@@ -14,26 +14,11 @@ ...@@ -14,26 +14,11 @@
#pragma once #pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h" #include "paddle_api.h"
#include "paddle_inference_api.h" #include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/postprocess_op.h> #include <include/postprocess_op.h>
#include <include/preprocess_op.h> #include <include/preprocess_op.h>
#include <include/utility.h>
using namespace paddle_infer;
namespace PaddleOCR { namespace PaddleOCR {
...@@ -42,7 +27,7 @@ public: ...@@ -42,7 +27,7 @@ public:
explicit StructureLayoutRecognizer( explicit StructureLayoutRecognizer(
const std::string &model_dir, const bool &use_gpu, const int &gpu_id, const std::string &model_dir, const bool &use_gpu, const int &gpu_id,
const int &gpu_mem, const int &cpu_math_library_num_threads, const int &gpu_mem, const int &cpu_math_library_num_threads,
const bool &use_mkldnn, const string &label_path, const bool &use_mkldnn, const std::string &label_path,
const bool &use_tensorrt, const std::string &precision, const bool &use_tensorrt, const std::string &precision,
const double &layout_score_threshold, const double &layout_score_threshold,
const double &layout_nms_threshold) { const double &layout_nms_threshold) {
...@@ -66,7 +51,7 @@ public: ...@@ -66,7 +51,7 @@ public:
std::vector<double> &times); std::vector<double> &times);
private: private:
std::shared_ptr<Predictor> predictor_; std::shared_ptr<paddle_infer::Predictor> predictor_;
bool use_gpu_ = false; bool use_gpu_ = false;
int gpu_id_ = 0; int gpu_id_ = 0;
......
...@@ -14,26 +14,11 @@ ...@@ -14,26 +14,11 @@
#pragma once #pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h" #include "paddle_api.h"
#include "paddle_inference_api.h" #include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/postprocess_op.h> #include <include/postprocess_op.h>
#include <include/preprocess_op.h> #include <include/preprocess_op.h>
#include <include/utility.h>
using namespace paddle_infer;
namespace PaddleOCR { namespace PaddleOCR {
...@@ -42,7 +27,7 @@ public: ...@@ -42,7 +27,7 @@ public:
explicit StructureTableRecognizer( explicit StructureTableRecognizer(
const std::string &model_dir, const bool &use_gpu, const int &gpu_id, const std::string &model_dir, const bool &use_gpu, const int &gpu_id,
const int &gpu_mem, const int &cpu_math_library_num_threads, const int &gpu_mem, const int &cpu_math_library_num_threads,
const bool &use_mkldnn, const string &label_path, const bool &use_mkldnn, const std::string &label_path,
const bool &use_tensorrt, const std::string &precision, const bool &use_tensorrt, const std::string &precision,
const int &table_batch_num, const int &table_max_len, const int &table_batch_num, const int &table_max_len,
const bool &merge_no_span_structure) { const bool &merge_no_span_structure) {
...@@ -70,7 +55,7 @@ public: ...@@ -70,7 +55,7 @@ public:
std::vector<double> &times); std::vector<double> &times);
private: private:
std::shared_ptr<Predictor> predictor_; std::shared_ptr<paddle_infer::Predictor> predictor_;
bool use_gpu_ = false; bool use_gpu_ = false;
int gpu_id_ = 0; int gpu_id_ = 0;
......
...@@ -41,8 +41,7 @@ struct OCRPredictResult { ...@@ -41,8 +41,7 @@ struct OCRPredictResult {
}; };
struct StructurePredictResult { struct StructurePredictResult {
std::vector<int> box; std::vector<float> box;
std::vector<float> box_float;
std::vector<std::vector<int>> cell_box; std::vector<std::vector<int>> cell_box;
std::string type; std::string type;
std::vector<OCRPredictResult> text_res; std::vector<OCRPredictResult> text_res;
...@@ -60,7 +59,7 @@ public: ...@@ -60,7 +59,7 @@ public:
const std::string &save_path); const std::string &save_path);
static void VisualizeBboxes(const cv::Mat &srcimg, static void VisualizeBboxes(const cv::Mat &srcimg,
StructurePredictResult &structure_result, const StructurePredictResult &structure_result,
const std::string &save_path); const std::string &save_path);
template <class ForwardIterator> template <class ForwardIterator>
...@@ -84,7 +83,8 @@ public: ...@@ -84,7 +83,8 @@ public:
static void print_result(const std::vector<OCRPredictResult> &ocr_result); static void print_result(const std::vector<OCRPredictResult> &ocr_result);
static cv::Mat crop_image(cv::Mat &img, std::vector<int> &area); static cv::Mat crop_image(cv::Mat &img, const std::vector<int> &area);
static cv::Mat crop_image(cv::Mat &img, const std::vector<float> &area);
static void sorted_boxes(std::vector<OCRPredictResult> &ocr_result); static void sorted_boxes(std::vector<OCRPredictResult> &ocr_result);
......
...@@ -75,7 +75,8 @@ void check_params() { ...@@ -75,7 +75,8 @@ void check_params() {
} }
if (FLAGS_precision != "fp32" && FLAGS_precision != "fp16" && if (FLAGS_precision != "fp32" && FLAGS_precision != "fp16" &&
FLAGS_precision != "int8") { FLAGS_precision != "int8") {
cout << "precison should be 'fp32'(default), 'fp16' or 'int8'. " << endl; std::cout << "precison should be 'fp32'(default), 'fp16' or 'int8'. "
<< std::endl;
exit(1); exit(1);
} }
} }
...@@ -93,7 +94,7 @@ void ocr(std::vector<cv::String> &cv_all_img_names) { ...@@ -93,7 +94,7 @@ void ocr(std::vector<cv::String> &cv_all_img_names) {
cv::Mat img = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); cv::Mat img = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
if (!img.data) { if (!img.data) {
std::cerr << "[ERROR] image read failed! image path: " std::cerr << "[ERROR] image read failed! image path: "
<< cv_all_img_names[i] << endl; << cv_all_img_names[i] << std::endl;
continue; continue;
} }
img_list.push_back(img); img_list.push_back(img);
...@@ -104,7 +105,7 @@ void ocr(std::vector<cv::String> &cv_all_img_names) { ...@@ -104,7 +105,7 @@ void ocr(std::vector<cv::String> &cv_all_img_names) {
ocr.ocr(img_list, FLAGS_det, FLAGS_rec, FLAGS_cls); ocr.ocr(img_list, FLAGS_det, FLAGS_rec, FLAGS_cls);
for (int i = 0; i < img_names.size(); ++i) { for (int i = 0; i < img_names.size(); ++i) {
cout << "predict img: " << cv_all_img_names[i] << endl; std::cout << "predict img: " << cv_all_img_names[i] << std::endl;
Utility::print_result(ocr_results[i]); Utility::print_result(ocr_results[i]);
if (FLAGS_visualize && FLAGS_det) { if (FLAGS_visualize && FLAGS_det) {
std::string file_name = Utility::basename(img_names[i]); std::string file_name = Utility::basename(img_names[i]);
...@@ -126,11 +127,11 @@ void structure(std::vector<cv::String> &cv_all_img_names) { ...@@ -126,11 +127,11 @@ void structure(std::vector<cv::String> &cv_all_img_names) {
} }
for (int i = 0; i < cv_all_img_names.size(); i++) { for (int i = 0; i < cv_all_img_names.size(); i++) {
cout << "predict img: " << cv_all_img_names[i] << endl; std::cout << "predict img: " << cv_all_img_names[i] << std::endl;
cv::Mat img = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); cv::Mat img = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
if (!img.data) { if (!img.data) {
std::cerr << "[ERROR] image read failed! image path: " std::cerr << "[ERROR] image read failed! image path: "
<< cv_all_img_names[i] << endl; << cv_all_img_names[i] << std::endl;
continue; continue;
} }
...@@ -156,14 +157,14 @@ void structure(std::vector<cv::String> &cv_all_img_names) { ...@@ -156,14 +157,14 @@ void structure(std::vector<cv::String> &cv_all_img_names) {
"_" + file_name); "_" + file_name);
} }
} else { } else {
cout << "count of ocr result is : " std::cout << "count of ocr result is : "
<< structure_results[j].text_res.size() << endl; << structure_results[j].text_res.size() << std::endl;
if (structure_results[j].text_res.size() > 0) { if (structure_results[j].text_res.size() > 0) {
cout << "********** print ocr result " std::cout << "********** print ocr result "
<< "**********" << endl; << "**********" << std::endl;
Utility::print_result(structure_results[j].text_res); Utility::print_result(structure_results[j].text_res);
cout << "********** end print ocr result " std::cout << "********** end print ocr result "
<< "**********" << endl; << "**********" << std::endl;
} }
} }
} }
...@@ -180,13 +181,13 @@ int main(int argc, char **argv) { ...@@ -180,13 +181,13 @@ int main(int argc, char **argv) {
if (!Utility::PathExists(FLAGS_image_dir)) { if (!Utility::PathExists(FLAGS_image_dir)) {
std::cerr << "[ERROR] image path not exist! image_dir: " << FLAGS_image_dir std::cerr << "[ERROR] image path not exist! image_dir: " << FLAGS_image_dir
<< endl; << std::endl;
exit(1); exit(1);
} }
std::vector<cv::String> cv_all_img_names; std::vector<cv::String> cv_all_img_names;
cv::glob(FLAGS_image_dir, cv_all_img_names); cv::glob(FLAGS_image_dir, cv_all_img_names);
std::cout << "total images num: " << cv_all_img_names.size() << endl; std::cout << "total images num: " << cv_all_img_names.size() << std::endl;
if (!Utility::PathExists(FLAGS_output)) { if (!Utility::PathExists(FLAGS_output)) {
Utility::CreateDir(FLAGS_output); Utility::CreateDir(FLAGS_output);
...@@ -196,6 +197,6 @@ int main(int argc, char **argv) { ...@@ -196,6 +197,6 @@ int main(int argc, char **argv) {
} else if (FLAGS_type == "structure") { } else if (FLAGS_type == "structure") {
structure(cv_all_img_names); structure(cv_all_img_names);
} else { } else {
std::cout << "only value in ['ocr','structure'] is supported" << endl; std::cout << "only value in ['ocr','structure'] is supported" << std::endl;
} }
} }
...@@ -32,7 +32,7 @@ void Classifier::Run(std::vector<cv::Mat> img_list, ...@@ -32,7 +32,7 @@ void Classifier::Run(std::vector<cv::Mat> img_list,
for (int beg_img_no = 0; beg_img_no < img_num; for (int beg_img_no = 0; beg_img_no < img_num;
beg_img_no += this->cls_batch_num_) { beg_img_no += this->cls_batch_num_) {
auto preprocess_start = std::chrono::steady_clock::now(); auto preprocess_start = std::chrono::steady_clock::now();
int end_img_no = min(img_num, beg_img_no + this->cls_batch_num_); int end_img_no = std::min(img_num, beg_img_no + this->cls_batch_num_);
int batch_num = end_img_no - beg_img_no; int batch_num = end_img_no - beg_img_no;
// preprocess // preprocess
std::vector<cv::Mat> norm_img_batch; std::vector<cv::Mat> norm_img_batch;
...@@ -97,7 +97,7 @@ void Classifier::Run(std::vector<cv::Mat> img_list, ...@@ -97,7 +97,7 @@ void Classifier::Run(std::vector<cv::Mat> img_list,
} }
void Classifier::LoadModel(const std::string &model_dir) { void Classifier::LoadModel(const std::string &model_dir) {
AnalysisConfig config; paddle_infer::Config config;
config.SetModel(model_dir + "/inference.pdmodel", config.SetModel(model_dir + "/inference.pdmodel",
model_dir + "/inference.pdiparams"); model_dir + "/inference.pdiparams");
...@@ -112,9 +112,9 @@ void Classifier::LoadModel(const std::string &model_dir) { ...@@ -112,9 +112,9 @@ void Classifier::LoadModel(const std::string &model_dir) {
precision = paddle_infer::Config::Precision::kInt8; precision = paddle_infer::Config::Precision::kInt8;
} }
config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false); config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false);
if (!Utility::PathExists("./trt_cls_shape.txt")){ if (!Utility::PathExists("./trt_cls_shape.txt")) {
config.CollectShapeRangeInfo("./trt_cls_shape.txt"); config.CollectShapeRangeInfo("./trt_cls_shape.txt");
} else { } else {
config.EnableTunedTensorRtDynamicShape("./trt_cls_shape.txt", true); config.EnableTunedTensorRtDynamicShape("./trt_cls_shape.txt", true);
} }
} }
...@@ -136,6 +136,6 @@ void Classifier::LoadModel(const std::string &model_dir) { ...@@ -136,6 +136,6 @@ void Classifier::LoadModel(const std::string &model_dir) {
config.EnableMemoryOptim(); config.EnableMemoryOptim();
config.DisableGlogInfo(); config.DisableGlogInfo();
this->predictor_ = CreatePredictor(config); this->predictor_ = paddle_infer::CreatePredictor(config);
} }
} // namespace PaddleOCR } // namespace PaddleOCR
...@@ -33,12 +33,11 @@ void DBDetector::LoadModel(const std::string &model_dir) { ...@@ -33,12 +33,11 @@ void DBDetector::LoadModel(const std::string &model_dir) {
precision = paddle_infer::Config::Precision::kInt8; precision = paddle_infer::Config::Precision::kInt8;
} }
config.EnableTensorRtEngine(1 << 30, 1, 20, precision, false, false); config.EnableTensorRtEngine(1 << 30, 1, 20, precision, false, false);
if (!Utility::PathExists("./trt_det_shape.txt")){ if (!Utility::PathExists("./trt_det_shape.txt")) {
config.CollectShapeRangeInfo("./trt_det_shape.txt"); config.CollectShapeRangeInfo("./trt_det_shape.txt");
} else { } else {
config.EnableTunedTensorRtDynamicShape("./trt_det_shape.txt", true); config.EnableTunedTensorRtDynamicShape("./trt_det_shape.txt", true);
} }
} }
} else { } else {
config.DisableGpu(); config.DisableGpu();
...@@ -59,7 +58,7 @@ void DBDetector::LoadModel(const std::string &model_dir) { ...@@ -59,7 +58,7 @@ void DBDetector::LoadModel(const std::string &model_dir) {
config.EnableMemoryOptim(); config.EnableMemoryOptim();
// config.DisableGlogInfo(); // config.DisableGlogInfo();
this->predictor_ = CreatePredictor(config); this->predictor_ = paddle_infer::CreatePredictor(config);
} }
void DBDetector::Run(cv::Mat &img, void DBDetector::Run(cv::Mat &img,
......
...@@ -37,7 +37,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, ...@@ -37,7 +37,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
for (int beg_img_no = 0; beg_img_no < img_num; for (int beg_img_no = 0; beg_img_no < img_num;
beg_img_no += this->rec_batch_num_) { beg_img_no += this->rec_batch_num_) {
auto preprocess_start = std::chrono::steady_clock::now(); auto preprocess_start = std::chrono::steady_clock::now();
int end_img_no = min(img_num, beg_img_no + this->rec_batch_num_); int end_img_no = std::min(img_num, beg_img_no + this->rec_batch_num_);
int batch_num = end_img_no - beg_img_no; int batch_num = end_img_no - beg_img_no;
int imgH = this->rec_image_shape_[1]; int imgH = this->rec_image_shape_[1];
int imgW = this->rec_image_shape_[2]; int imgW = this->rec_image_shape_[2];
...@@ -46,7 +46,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, ...@@ -46,7 +46,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
int h = img_list[indices[ino]].rows; int h = img_list[indices[ino]].rows;
int w = img_list[indices[ino]].cols; int w = img_list[indices[ino]].cols;
float wh_ratio = w * 1.0 / h; float wh_ratio = w * 1.0 / h;
max_wh_ratio = max(max_wh_ratio, wh_ratio); max_wh_ratio = std::max(max_wh_ratio, wh_ratio);
} }
int batch_width = imgW; int batch_width = imgW;
...@@ -60,7 +60,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, ...@@ -60,7 +60,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
this->normalize_op_.Run(&resize_img, this->mean_, this->scale_, this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
this->is_scale_); this->is_scale_);
norm_img_batch.push_back(resize_img); norm_img_batch.push_back(resize_img);
batch_width = max(resize_img.cols, batch_width); batch_width = std::max(resize_img.cols, batch_width);
} }
std::vector<float> input(batch_num * 3 * imgH * batch_width, 0.0f); std::vector<float> input(batch_num * 3 * imgH * batch_width, 0.0f);
...@@ -115,7 +115,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, ...@@ -115,7 +115,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
last_index = argmax_idx; last_index = argmax_idx;
} }
score /= count; score /= count;
if (isnan(score)) { if (std::isnan(score)) {
continue; continue;
} }
rec_texts[indices[beg_img_no + m]] = str_res; rec_texts[indices[beg_img_no + m]] = str_res;
...@@ -130,7 +130,6 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, ...@@ -130,7 +130,6 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
} }
void CRNNRecognizer::LoadModel(const std::string &model_dir) { void CRNNRecognizer::LoadModel(const std::string &model_dir) {
// AnalysisConfig config;
paddle_infer::Config config; paddle_infer::Config config;
config.SetModel(model_dir + "/inference.pdmodel", config.SetModel(model_dir + "/inference.pdmodel",
model_dir + "/inference.pdiparams"); model_dir + "/inference.pdiparams");
...@@ -147,12 +146,11 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) { ...@@ -147,12 +146,11 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
if (this->precision_ == "int8") { if (this->precision_ == "int8") {
precision = paddle_infer::Config::Precision::kInt8; precision = paddle_infer::Config::Precision::kInt8;
} }
if (!Utility::PathExists("./trt_rec_shape.txt")){ if (!Utility::PathExists("./trt_rec_shape.txt")) {
config.CollectShapeRangeInfo("./trt_rec_shape.txt"); config.CollectShapeRangeInfo("./trt_rec_shape.txt");
} else { } else {
config.EnableTunedTensorRtDynamicShape("./trt_rec_shape.txt", true); config.EnableTunedTensorRtDynamicShape("./trt_rec_shape.txt", true);
} }
} }
} else { } else {
config.DisableGpu(); config.DisableGpu();
...@@ -177,7 +175,7 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) { ...@@ -177,7 +175,7 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
config.EnableMemoryOptim(); config.EnableMemoryOptim();
// config.DisableGlogInfo(); // config.DisableGlogInfo();
this->predictor_ = CreatePredictor(config); this->predictor_ = paddle_infer::CreatePredictor(config);
} }
} // namespace PaddleOCR } // namespace PaddleOCR
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include <include/paddleocr.h> #include <include/paddleocr.h>
#include "auto_log/autolog.h" #include "auto_log/autolog.h"
#include <numeric>
namespace PaddleOCR { namespace PaddleOCR {
PPOCR::PPOCR() { PPOCR::PPOCR() {
......
...@@ -16,8 +16,6 @@ ...@@ -16,8 +16,6 @@
#include <include/paddlestructure.h> #include <include/paddlestructure.h>
#include "auto_log/autolog.h" #include "auto_log/autolog.h"
#include <numeric>
#include <sys/stat.h>
namespace PaddleOCR { namespace PaddleOCR {
...@@ -50,7 +48,7 @@ PaddleStructure::structure(cv::Mat srcimg, bool layout, bool table, bool ocr) { ...@@ -50,7 +48,7 @@ PaddleStructure::structure(cv::Mat srcimg, bool layout, bool table, bool ocr) {
} else { } else {
StructurePredictResult res; StructurePredictResult res;
res.type = "table"; res.type = "table";
res.box = std::vector<int>(4, 0); res.box = std::vector<float>(4, 0.0);
res.box[2] = img.cols; res.box[2] = img.cols;
res.box[3] = img.rows; res.box[3] = img.rows;
structure_results.push_back(res); structure_results.push_back(res);
...@@ -108,10 +106,10 @@ void PaddleStructure::table(cv::Mat img, ...@@ -108,10 +106,10 @@ void PaddleStructure::table(cv::Mat img,
std::vector<int> ocr_box; std::vector<int> ocr_box;
for (int j = 0; j < ocr_result.size(); j++) { for (int j = 0; j < ocr_result.size(); j++) {
ocr_box = Utility::xyxyxyxy2xyxy(ocr_result[j].box); ocr_box = Utility::xyxyxyxy2xyxy(ocr_result[j].box);
ocr_box[0] = max(0, ocr_box[0] - expand_pixel); ocr_box[0] = std::max(0, ocr_box[0] - expand_pixel);
ocr_box[1] = max(0, ocr_box[1] - expand_pixel), ocr_box[1] = std::max(0, ocr_box[1] - expand_pixel),
ocr_box[2] = min(img_list[i].cols, ocr_box[2] + expand_pixel); ocr_box[2] = std::min(img_list[i].cols, ocr_box[2] + expand_pixel);
ocr_box[3] = min(img_list[i].rows, ocr_box[3] + expand_pixel); ocr_box[3] = std::min(img_list[i].rows, ocr_box[3] + expand_pixel);
cv::Mat crop_img = Utility::crop_image(img_list[i], ocr_box); cv::Mat crop_img = Utility::crop_image(img_list[i], ocr_box);
rec_img_list.push_back(crop_img); rec_img_list.push_back(crop_img);
...@@ -132,8 +130,8 @@ PaddleStructure::rebuild_table(std::vector<std::string> structure_html_tags, ...@@ -132,8 +130,8 @@ PaddleStructure::rebuild_table(std::vector<std::string> structure_html_tags,
std::vector<std::vector<int>> structure_boxes, std::vector<std::vector<int>> structure_boxes,
std::vector<OCRPredictResult> &ocr_result) { std::vector<OCRPredictResult> &ocr_result) {
// match text in same cell // match text in same cell
std::vector<std::vector<string>> matched(structure_boxes.size(), std::vector<std::vector<std::string>> matched(structure_boxes.size(),
std::vector<std::string>()); std::vector<std::string>());
std::vector<int> ocr_box; std::vector<int> ocr_box;
std::vector<int> structure_box; std::vector<int> structure_box;
...@@ -233,7 +231,7 @@ float PaddleStructure::dis(std::vector<int> &box1, std::vector<int> &box2) { ...@@ -233,7 +231,7 @@ float PaddleStructure::dis(std::vector<int> &box1, std::vector<int> &box2) {
abs(x1_2 - x1_1) + abs(y1_2 - y1_1) + abs(x2_2 - x2_1) + abs(y2_2 - y2_1); abs(x1_2 - x1_1) + abs(y1_2 - y1_1) + abs(x2_2 - x2_1) + abs(y2_2 - y2_1);
float dis_2 = abs(x1_2 - x1_1) + abs(y1_2 - y1_1); float dis_2 = abs(x1_2 - x1_1) + abs(y1_2 - y1_1);
float dis_3 = abs(x2_2 - x2_1) + abs(y2_2 - y2_1); float dis_3 = abs(x2_2 - x2_1) + abs(y2_2 - y2_1);
return dis + min(dis_2, dis_3); return dis + std::min(dis_2, dis_3);
} }
void PaddleStructure::reset_timer() { void PaddleStructure::reset_timer() {
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <include/clipper.h>
#include <include/postprocess_op.h> #include <include/postprocess_op.h>
namespace PaddleOCR { namespace PaddleOCR {
...@@ -352,6 +351,25 @@ std::vector<std::vector<std::vector<int>>> DBPostProcessor::FilterTagDetRes( ...@@ -352,6 +351,25 @@ std::vector<std::vector<std::vector<int>>> DBPostProcessor::FilterTagDetRes(
return root_points; return root_points;
} }
void TablePostProcessor::init(std::string label_path,
bool merge_no_span_structure) {
this->label_list_ = Utility::ReadDict(label_path);
if (merge_no_span_structure) {
this->label_list_.push_back("<td></td>");
std::vector<std::string>::iterator it;
for (it = this->label_list_.begin(); it != this->label_list_.end();) {
if (*it == "<td>") {
it = this->label_list_.erase(it);
} else {
++it;
}
}
}
// add_special_char
this->label_list_.insert(this->label_list_.begin(), this->beg);
this->label_list_.push_back(this->end);
}
void TablePostProcessor::Run( void TablePostProcessor::Run(
std::vector<float> &loc_preds, std::vector<float> &structure_probs, std::vector<float> &loc_preds, std::vector<float> &structure_probs,
std::vector<float> &rec_scores, std::vector<int> &loc_preds_shape, std::vector<float> &rec_scores, std::vector<int> &loc_preds_shape,
...@@ -412,7 +430,7 @@ void TablePostProcessor::Run( ...@@ -412,7 +430,7 @@ void TablePostProcessor::Run(
} }
} }
score /= count; score /= count;
if (isnan(score) || rec_boxes.size() == 0) { if (std::isnan(score) || rec_boxes.size() == 0) {
score = -1; score = -1;
} }
rec_scores.push_back(score); rec_scores.push_back(score);
...@@ -421,6 +439,17 @@ void TablePostProcessor::Run( ...@@ -421,6 +439,17 @@ void TablePostProcessor::Run(
} }
} }
void PicodetPostProcessor::init(std::string label_path,
const double score_threshold,
const double nms_threshold,
const std::vector<int> &fpn_stride) {
this->label_list_ = Utility::ReadDict(label_path);
this->score_threshold_ = score_threshold;
this->nms_threshold_ = nms_threshold;
this->num_class_ = label_list_.size();
this->fpn_stride_ = fpn_stride;
}
void PicodetPostProcessor::Run(std::vector<StructurePredictResult> &results, void PicodetPostProcessor::Run(std::vector<StructurePredictResult> &results,
std::vector<std::vector<float>> outs, std::vector<std::vector<float>> outs,
std::vector<int> ori_shape, std::vector<int> ori_shape,
...@@ -469,12 +498,10 @@ void PicodetPostProcessor::Run(std::vector<StructurePredictResult> &results, ...@@ -469,12 +498,10 @@ void PicodetPostProcessor::Run(std::vector<StructurePredictResult> &results,
} }
this->nms(bbox_results[i], this->nms_threshold_); this->nms(bbox_results[i], this->nms_threshold_);
for (auto box : bbox_results[i]) { for (auto box : bbox_results[i]) {
box.box_float[0] = box.box_float[0] / scale_factor_w; box.box[0] = box.box[0] / scale_factor_w;
box.box_float[2] = box.box_float[2] / scale_factor_w; box.box[2] = box.box[2] / scale_factor_w;
box.box_float[1] = box.box_float[1] / scale_factor_h; box.box[1] = box.box[1] / scale_factor_h;
box.box_float[3] = box.box_float[3] / scale_factor_h; box.box[3] = box.box[3] / scale_factor_h;
box.box = {(int)box.box_float[0], (int)box.box_float[1],
(int)box.box_float[2], (int)box.box_float[3]};
results.push_back(box); results.push_back(box);
} }
} }
...@@ -501,13 +528,13 @@ PicodetPostProcessor::disPred2Bbox(std::vector<float> bbox_pred, int label, ...@@ -501,13 +528,13 @@ PicodetPostProcessor::disPred2Bbox(std::vector<float> bbox_pred, int label,
dis_pred[i] = dis; dis_pred[i] = dis;
} }
float xmin_float = (std::max)(ct_x - dis_pred[0], .0f); float xmin = (std::max)(ct_x - dis_pred[0], .0f);
float ymin_float = (std::max)(ct_y - dis_pred[1], .0f); float ymin = (std::max)(ct_y - dis_pred[1], .0f);
float xmax_float = (std::min)(ct_x + dis_pred[2], (float)im_shape[1]); float xmax = (std::min)(ct_x + dis_pred[2], (float)im_shape[1]);
float ymax_float = (std::min)(ct_y + dis_pred[3], (float)im_shape[0]); float ymax = (std::min)(ct_y + dis_pred[3], (float)im_shape[0]);
StructurePredictResult result_item; StructurePredictResult result_item;
result_item.box_float = {xmin_float, ymin_float, xmax_float, ymax_float}; result_item.box = {xmin, ymin, xmax, ymax};
result_item.type = this->label_list_[label]; result_item.type = this->label_list_[label];
result_item.confidence = score; result_item.confidence = score;
...@@ -530,8 +557,7 @@ void PicodetPostProcessor::nms(std::vector<StructurePredictResult> &input_boxes, ...@@ -530,8 +557,7 @@ void PicodetPostProcessor::nms(std::vector<StructurePredictResult> &input_boxes,
if (picked[j] == 0) { if (picked[j] == 0) {
continue; continue;
} }
float iou = float iou = Utility::iou(input_boxes[i].box, input_boxes[j].box);
Utility::iou(input_boxes[i].box_float, input_boxes[j].box_float);
if (iou > nms_threshold) { if (iou > nms_threshold) {
picked[j] = 0; picked[j] = 0;
} }
......
...@@ -12,21 +12,6 @@ ...@@ -12,21 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h"
#include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/preprocess_op.h> #include <include/preprocess_op.h>
namespace PaddleOCR { namespace PaddleOCR {
...@@ -69,13 +54,13 @@ void Normalize::Run(cv::Mat *im, const std::vector<float> &mean, ...@@ -69,13 +54,13 @@ void Normalize::Run(cv::Mat *im, const std::vector<float> &mean,
} }
void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img, void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
string limit_type, int limit_side_len, float &ratio_h, std::string limit_type, int limit_side_len,
float &ratio_w, bool use_tensorrt) { float &ratio_h, float &ratio_w, bool use_tensorrt) {
int w = img.cols; int w = img.cols;
int h = img.rows; int h = img.rows;
float ratio = 1.f; float ratio = 1.f;
if (limit_type == "min") { if (limit_type == "min") {
int min_wh = min(h, w); int min_wh = std::min(h, w);
if (min_wh < limit_side_len) { if (min_wh < limit_side_len) {
if (h < w) { if (h < w) {
ratio = float(limit_side_len) / float(h); ratio = float(limit_side_len) / float(h);
...@@ -84,7 +69,7 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img, ...@@ -84,7 +69,7 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
} }
} }
} else { } else {
int max_wh = max(h, w); int max_wh = std::max(h, w);
if (max_wh > limit_side_len) { if (max_wh > limit_side_len) {
if (h > w) { if (h > w) {
ratio = float(limit_side_len) / float(h); ratio = float(limit_side_len) / float(h);
...@@ -97,8 +82,8 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img, ...@@ -97,8 +82,8 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
int resize_h = int(float(h) * ratio); int resize_h = int(float(h) * ratio);
int resize_w = int(float(w) * ratio); int resize_w = int(float(w) * ratio);
resize_h = max(int(round(float(resize_h) / 32) * 32), 32); resize_h = std::max(int(round(float(resize_h) / 32) * 32), 32);
resize_w = max(int(round(float(resize_w) / 32) * 32), 32); resize_w = std::max(int(round(float(resize_w) / 32) * 32), 32);
cv::resize(img, resize_img, cv::Size(resize_w, resize_h)); cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
ratio_h = float(resize_h) / float(h); ratio_h = float(resize_h) / float(h);
......
...@@ -94,7 +94,7 @@ void StructureLayoutRecognizer::Run(cv::Mat img, ...@@ -94,7 +94,7 @@ void StructureLayoutRecognizer::Run(cv::Mat img,
} }
void StructureLayoutRecognizer::LoadModel(const std::string &model_dir) { void StructureLayoutRecognizer::LoadModel(const std::string &model_dir) {
AnalysisConfig config; paddle_infer::Config config;
if (Utility::PathExists(model_dir + "/inference.pdmodel") && if (Utility::PathExists(model_dir + "/inference.pdmodel") &&
Utility::PathExists(model_dir + "/inference.pdiparams")) { Utility::PathExists(model_dir + "/inference.pdiparams")) {
config.SetModel(model_dir + "/inference.pdmodel", config.SetModel(model_dir + "/inference.pdmodel",
...@@ -105,7 +105,7 @@ void StructureLayoutRecognizer::LoadModel(const std::string &model_dir) { ...@@ -105,7 +105,7 @@ void StructureLayoutRecognizer::LoadModel(const std::string &model_dir) {
model_dir + "/model.pdiparams"); model_dir + "/model.pdiparams");
} else { } else {
std::cerr << "[ERROR] not find model.pdiparams or inference.pdiparams in " std::cerr << "[ERROR] not find model.pdiparams or inference.pdiparams in "
<< model_dir << endl; << model_dir << std::endl;
exit(1); exit(1);
} }
...@@ -144,6 +144,6 @@ void StructureLayoutRecognizer::LoadModel(const std::string &model_dir) { ...@@ -144,6 +144,6 @@ void StructureLayoutRecognizer::LoadModel(const std::string &model_dir) {
config.EnableMemoryOptim(); config.EnableMemoryOptim();
config.DisableGlogInfo(); config.DisableGlogInfo();
this->predictor_ = CreatePredictor(config); this->predictor_ = paddle_infer::CreatePredictor(config);
} }
} // namespace PaddleOCR } // namespace PaddleOCR
...@@ -34,7 +34,7 @@ void StructureTableRecognizer::Run( ...@@ -34,7 +34,7 @@ void StructureTableRecognizer::Run(
beg_img_no += this->table_batch_num_) { beg_img_no += this->table_batch_num_) {
// preprocess // preprocess
auto preprocess_start = std::chrono::steady_clock::now(); auto preprocess_start = std::chrono::steady_clock::now();
int end_img_no = min(img_num, beg_img_no + this->table_batch_num_); int end_img_no = std::min(img_num, beg_img_no + this->table_batch_num_);
int batch_num = end_img_no - beg_img_no; int batch_num = end_img_no - beg_img_no;
std::vector<cv::Mat> norm_img_batch; std::vector<cv::Mat> norm_img_batch;
std::vector<int> width_list; std::vector<int> width_list;
...@@ -118,7 +118,7 @@ void StructureTableRecognizer::Run( ...@@ -118,7 +118,7 @@ void StructureTableRecognizer::Run(
} }
void StructureTableRecognizer::LoadModel(const std::string &model_dir) { void StructureTableRecognizer::LoadModel(const std::string &model_dir) {
AnalysisConfig config; paddle_infer::Config config;
config.SetModel(model_dir + "/inference.pdmodel", config.SetModel(model_dir + "/inference.pdmodel",
model_dir + "/inference.pdiparams"); model_dir + "/inference.pdiparams");
...@@ -157,6 +157,6 @@ void StructureTableRecognizer::LoadModel(const std::string &model_dir) { ...@@ -157,6 +157,6 @@ void StructureTableRecognizer::LoadModel(const std::string &model_dir) {
config.EnableMemoryOptim(); config.EnableMemoryOptim();
config.DisableGlogInfo(); config.DisableGlogInfo();
this->predictor_ = CreatePredictor(config); this->predictor_ = paddle_infer::CreatePredictor(config);
} }
} // namespace PaddleOCR } // namespace PaddleOCR
...@@ -66,7 +66,7 @@ void Utility::VisualizeBboxes(const cv::Mat &srcimg, ...@@ -66,7 +66,7 @@ void Utility::VisualizeBboxes(const cv::Mat &srcimg,
} }
void Utility::VisualizeBboxes(const cv::Mat &srcimg, void Utility::VisualizeBboxes(const cv::Mat &srcimg,
StructurePredictResult &structure_result, const StructurePredictResult &structure_result,
const std::string &save_path) { const std::string &save_path) {
cv::Mat img_vis; cv::Mat img_vis;
srcimg.copyTo(img_vis); srcimg.copyTo(img_vis);
...@@ -281,7 +281,7 @@ void Utility::print_result(const std::vector<OCRPredictResult> &ocr_result) { ...@@ -281,7 +281,7 @@ void Utility::print_result(const std::vector<OCRPredictResult> &ocr_result) {
} }
} }
cv::Mat Utility::crop_image(cv::Mat &img, std::vector<int> &box) { cv::Mat Utility::crop_image(cv::Mat &img, const std::vector<int> &box) {
cv::Mat crop_im; cv::Mat crop_im;
int crop_x1 = std::max(0, box[0]); int crop_x1 = std::max(0, box[0]);
int crop_y1 = std::max(0, box[1]); int crop_y1 = std::max(0, box[1]);
...@@ -298,6 +298,12 @@ cv::Mat Utility::crop_image(cv::Mat &img, std::vector<int> &box) { ...@@ -298,6 +298,12 @@ cv::Mat Utility::crop_image(cv::Mat &img, std::vector<int> &box) {
return crop_im; return crop_im;
} }
cv::Mat Utility::crop_image(cv::Mat &img, const std::vector<float> &box) {
std::vector<int> box_int = {(int)box[0], (int)box[1], (int)box[2],
(int)box[3]};
return crop_image(img, box_int);
}
void Utility::sorted_boxes(std::vector<OCRPredictResult> &ocr_result) { void Utility::sorted_boxes(std::vector<OCRPredictResult> &ocr_result) {
std::sort(ocr_result.begin(), ocr_result.end(), Utility::comparison_box); std::sort(ocr_result.begin(), ocr_result.end(), Utility::comparison_box);
if (ocr_result.size() > 0) { if (ocr_result.size() > 0) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册